sanzang 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MANUAL.rdoc +3 -8
- data/README.rdoc +4 -11
- data/lib/sanzang.rb +3 -2
- data/lib/sanzang/batch_translator.rb +5 -14
- data/lib/sanzang/command/batch.rb +8 -25
- data/lib/sanzang/command/reflow.rb +14 -41
- data/lib/sanzang/command/sanzang_cmd.rb +24 -18
- data/lib/sanzang/command/translate.rb +10 -40
- data/lib/sanzang/platform.rb +128 -0
- data/lib/sanzang/text_formatter.rb +1 -1
- data/lib/sanzang/translation_table.rb +1 -1
- data/lib/sanzang/translator.rb +17 -5
- data/lib/sanzang/version.rb +2 -2
- data/test/tc_simple_translation.rb +9 -11
- metadata +14 -9
- checksums.yaml +0 -7
data/MANUAL.rdoc
CHANGED
@@ -293,14 +293,9 @@ messages will still be displayed in the console's native IBM-437 encoding.
|
|
293
293
|
|
294
294
|
$ sanzang t -E UTF-16LE -i in.txt -o out.txt TABLE.txt
|
295
295
|
|
296
|
-
If the "-E" option is not specified, then \Sanzang will use the default
|
297
|
-
encoding
|
298
|
-
\
|
299
|
-
written to in the UTF-8 encoding. The one *exception* to this is for
|
300
|
-
environments using the IBM-437 encoding (typically an old Windows command
|
301
|
-
shell). In this case, \Sanzang will take pity on you and automatically switch
|
302
|
-
to UTF-8 by default, as if you had specified the option "-E" with value
|
303
|
-
"UTF-8".
|
296
|
+
If the "-E" option is not specified, then \Sanzang will use the default data
|
297
|
+
encoding for that environment. The data encoding can be seen by running
|
298
|
+
\sanzang with the "--version" or "--platform" options.
|
304
299
|
|
305
300
|
== Responsible Use
|
306
301
|
|
data/README.rdoc
CHANGED
@@ -34,19 +34,12 @@ automatically download and install \Sanzang onto your computer.
|
|
34
34
|
# gem install sanzang
|
35
35
|
|
36
36
|
After this, you should be able to run the _sanzang_ command. Run the following
|
37
|
-
command to verify your installation and print
|
37
|
+
command to verify your installation and print version information.
|
38
38
|
|
39
|
-
# sanzang -
|
39
|
+
# sanzang -V
|
40
40
|
|
41
|
-
This command should show a summary of your
|
41
|
+
This command should show a summary of your \Sanzang version and environment.
|
42
42
|
|
43
|
-
|
44
|
-
Ruby version: 2.0.0
|
45
|
-
External encoding: UTF-8
|
46
|
-
Internal encoding: none
|
47
|
-
Fork implemented: true
|
48
|
-
Parallel version: 0.6.4
|
49
|
-
Processors found: 4
|
50
|
-
Sanzang version: 1.0.0
|
43
|
+
sanzang 1.0.4 [ruby_1.9.3] [x86_64-linux] [UTF-8]
|
51
44
|
|
52
45
|
You now have \Sanzang installed on your computer.
|
data/lib/sanzang.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby -w
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -23,10 +23,11 @@
|
|
23
23
|
module Sanzang
|
24
24
|
end
|
25
25
|
|
26
|
+
require_relative File.join("sanzang", "batch_translator")
|
27
|
+
require_relative File.join("sanzang", "platform")
|
26
28
|
require_relative File.join("sanzang", "text_formatter")
|
27
29
|
require_relative File.join("sanzang", "translation_table")
|
28
30
|
require_relative File.join("sanzang", "translator")
|
29
|
-
require_relative File.join("sanzang", "batch_translator")
|
30
31
|
require_relative File.join("sanzang", "version")
|
31
32
|
|
32
33
|
# The Sanzang::Command module contains Unix style commands utilizing the
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "parallel"
|
20
20
|
|
21
|
+
require_relative "platform"
|
21
22
|
require_relative "translator"
|
22
23
|
|
23
24
|
module Sanzang
|
@@ -28,18 +29,6 @@ module Sanzang
|
|
28
29
|
#
|
29
30
|
class BatchTranslator < Translator
|
30
31
|
|
31
|
-
# Evaluates to true if this Ruby can execute the fork(2) system call.
|
32
|
-
#
|
33
|
-
def forking?
|
34
|
-
Process.respond_to?(:fork)
|
35
|
-
end
|
36
|
-
|
37
|
-
# The number of logical processors detected on the current system.
|
38
|
-
#
|
39
|
-
def processor_count
|
40
|
-
Parallel.processor_count
|
41
|
-
end
|
42
|
-
|
43
32
|
# Translate a batch of files. The main parameter is an array, each element
|
44
33
|
# of which should be a two-dimensional array with the first element being
|
45
34
|
# the input file path, and the second element being the output file path.
|
@@ -47,8 +36,10 @@ module Sanzang
|
|
47
36
|
# return value is an array containing all the output file paths.
|
48
37
|
#
|
49
38
|
def translate_batch(fpath_pairs, verbose = true, jobs = nil)
|
50
|
-
if not
|
39
|
+
if not Sanzang::Platform.unix_processes?
|
51
40
|
jobs = 0
|
41
|
+
elsif not jobs
|
42
|
+
jobs = Sanzang::Platform.processor_count
|
52
43
|
end
|
53
44
|
Parallel.map(fpath_pairs, :in_processes => jobs) do |f1,f2|
|
54
45
|
translate_io(f1, f2)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "optparse"
|
20
20
|
|
21
|
+
require_relative File.join("..", "platform")
|
21
22
|
require_relative File.join("..", "translation_table")
|
22
23
|
require_relative File.join("..", "batch_translator")
|
23
24
|
require_relative File.join("..", "version")
|
@@ -35,7 +36,7 @@ module Sanzang::Command
|
|
35
36
|
#
|
36
37
|
def initialize
|
37
38
|
@name = "sanzang batch"
|
38
|
-
@encoding =
|
39
|
+
@encoding = Sanzang::Platform.data_encoding
|
39
40
|
@outdir = nil
|
40
41
|
@jobs = nil
|
41
42
|
@verbose = false
|
@@ -56,8 +57,6 @@ module Sanzang::Command
|
|
56
57
|
return 1
|
57
58
|
end
|
58
59
|
|
59
|
-
set_data_encoding
|
60
|
-
|
61
60
|
translator = nil
|
62
61
|
File.open(args[0], "rb", encoding: @encoding) do |table_file|
|
63
62
|
table = Sanzang::TranslationTable.new(table_file.read)
|
@@ -79,20 +78,11 @@ module Sanzang::Command
|
|
79
78
|
return 1
|
80
79
|
end
|
81
80
|
|
82
|
-
|
81
|
+
# Name of the command
|
82
|
+
#
|
83
|
+
attr_reader :name
|
83
84
|
|
84
|
-
|
85
|
-
#
|
86
|
-
def set_data_encoding
|
87
|
-
if @encoding == nil
|
88
|
-
if Encoding.default_external.to_s =~ /ASCII|IBM/
|
89
|
-
$stderr.puts "Encoding: UTF-8"
|
90
|
-
@encoding = Encoding::UTF_8
|
91
|
-
else
|
92
|
-
@encoding = Encoding.default_external
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
85
|
+
private
|
96
86
|
|
97
87
|
# Return an OptionParser object for this command
|
98
88
|
#
|
@@ -116,10 +106,7 @@ module Sanzang::Command
|
|
116
106
|
@encoding = Encoding.find(v)
|
117
107
|
end
|
118
108
|
op.on("-L", "--list-encodings", "list possible encodings") do |v|
|
119
|
-
|
120
|
-
x.to_s.upcase <=> y.to_s.upcase
|
121
|
-
end
|
122
|
-
puts encodings
|
109
|
+
Sanzang::Platform.valid_encodings.each {|e| puts e.to_s }
|
123
110
|
exit 0
|
124
111
|
end
|
125
112
|
op.on("-j", "--jobs=N", "allow N concurrent processes") do |v|
|
@@ -131,9 +118,5 @@ module Sanzang::Command
|
|
131
118
|
end
|
132
119
|
end
|
133
120
|
|
134
|
-
# Name of the command
|
135
|
-
#
|
136
|
-
attr_reader :name
|
137
|
-
|
138
121
|
end
|
139
122
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "optparse"
|
20
20
|
|
21
|
+
require_relative File.join("..", "platform")
|
21
22
|
require_relative File.join("..", "text_formatter")
|
22
23
|
require_relative File.join("..", "version")
|
23
24
|
|
@@ -36,27 +37,12 @@ module Sanzang::Command
|
|
36
37
|
#
|
37
38
|
def initialize
|
38
39
|
@name = "sanzang reflow"
|
39
|
-
@encoding =
|
40
|
+
@encoding = Sanzang::Platform.data_encoding
|
40
41
|
@infile = nil
|
41
42
|
@outfile = nil
|
42
43
|
@verbose = false
|
43
44
|
end
|
44
45
|
|
45
|
-
# Get a list of all acceptable text encodings.
|
46
|
-
#
|
47
|
-
def valid_encodings
|
48
|
-
all_enc = Encoding.list.collect {|e| e.to_s }.sort do |x,y|
|
49
|
-
x.upcase <=> y.upcase
|
50
|
-
end
|
51
|
-
all_enc.find_all do |e|
|
52
|
-
begin
|
53
|
-
Encoding::Converter.search_convpath(e, Encoding::UTF_8)
|
54
|
-
rescue Encoding::ConverterNotFoundError
|
55
|
-
e == "UTF-8" ? true : false
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
46
|
# Run the reflow command with the given arguments. The parameter _args_
|
61
47
|
# would typically be an array of command options and parameters. Calling
|
62
48
|
# this with the "-h" or "--help" option will print full usage information
|
@@ -71,8 +57,6 @@ module Sanzang::Command
|
|
71
57
|
return 1
|
72
58
|
end
|
73
59
|
|
74
|
-
set_data_encoding
|
75
|
-
|
76
60
|
begin
|
77
61
|
fin = @infile ? File.open(@infile, "r") : $stdin
|
78
62
|
fin.binmode.set_encoding(@encoding)
|
@@ -101,20 +85,11 @@ module Sanzang::Command
|
|
101
85
|
return 1
|
102
86
|
end
|
103
87
|
|
104
|
-
|
105
|
-
|
106
|
-
# Initialize the encoding for text data if it is not already set
|
88
|
+
# The name of the command
|
107
89
|
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
$stderr.puts "Encoding: UTF-8"
|
112
|
-
@encoding = Encoding::UTF_8
|
113
|
-
else
|
114
|
-
@encoding = Encoding.default_external
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
90
|
+
attr_reader :name
|
91
|
+
|
92
|
+
private
|
118
93
|
|
119
94
|
# An OptionParser for the command
|
120
95
|
#
|
@@ -122,10 +97,12 @@ module Sanzang::Command
|
|
122
97
|
OptionParser.new do |op|
|
123
98
|
op.banner = "Usage: #{@name} [options]\n"
|
124
99
|
|
125
|
-
op.banner << "\nReformat text
|
126
|
-
op.banner << "
|
127
|
-
op.banner << "
|
128
|
-
op.banner << "
|
100
|
+
op.banner << "\nReformat text into lines based on spacing, "
|
101
|
+
op.banner << "punctuation, etc. This should work\nfor the CJK "
|
102
|
+
op.banner << "languages (Chinese, Japanese, and Korean). By default, "
|
103
|
+
op.banner << "text is read\nfrom STDIN and written to STDOUT."
|
104
|
+
op.banner << "\n"
|
105
|
+
|
129
106
|
op.banner << "\nOptions:\n"
|
130
107
|
|
131
108
|
op.on("-h", "--help", "show this help message and exit") do |v|
|
@@ -136,7 +113,7 @@ module Sanzang::Command
|
|
136
113
|
@encoding = Encoding.find(v)
|
137
114
|
end
|
138
115
|
op.on("-L", "--list-encodings", "list possible encodings") do |v|
|
139
|
-
puts
|
116
|
+
Sanzang::Platform.valid_encodings.each {|e| puts e.to_s }
|
140
117
|
exit 0
|
141
118
|
end
|
142
119
|
op.on("-i", "--infile=FILE", "read input text from FILE") do |v|
|
@@ -151,9 +128,5 @@ module Sanzang::Command
|
|
151
128
|
end
|
152
129
|
end
|
153
130
|
|
154
|
-
# The name of the command
|
155
|
-
#
|
156
|
-
attr_reader :name
|
157
|
-
|
158
131
|
end
|
159
132
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -23,6 +23,7 @@ require_relative "reflow"
|
|
23
23
|
require_relative "translate"
|
24
24
|
require_relative "batch"
|
25
25
|
|
26
|
+
require_relative File.join("..", "platform")
|
26
27
|
require_relative File.join("..", "version")
|
27
28
|
|
28
29
|
module Sanzang::Command
|
@@ -77,22 +78,30 @@ module Sanzang::Command
|
|
77
78
|
# A string giving a listing of platform information
|
78
79
|
#
|
79
80
|
def platform_info
|
80
|
-
info = "
|
81
|
-
info << "
|
82
|
-
info << "
|
83
|
-
info << "
|
84
|
-
info << "
|
85
|
-
info << "
|
86
|
-
info << "
|
87
|
-
info << "
|
81
|
+
info = "host_arch = #{Sanzang::Platform.machine_arch}\n"
|
82
|
+
info << "host_os = #{Sanzang::Platform.os_name}\n"
|
83
|
+
info << "host_processors = #{Sanzang::Platform.processor_count}\n"
|
84
|
+
info << "ruby_encoding_ext = #{Encoding.default_external}\n"
|
85
|
+
info << "ruby_encoding_int = #{Encoding.default_internal or 'none'}\n"
|
86
|
+
info << "ruby_multiproc = #{Sanzang::Platform.unix_processes?}\n"
|
87
|
+
info << "ruby_platform = #{RUBY_PLATFORM}\n"
|
88
|
+
info << "ruby_version = #{RUBY_VERSION}\n"
|
89
|
+
info << "sanzang_encoding = #{Sanzang::Platform.data_encoding}\n"
|
90
|
+
info << "sanzang_parallel = #{Parallel::VERSION}\n"
|
91
|
+
info << "sanzang_version = #{Sanzang::VERSION}\n"
|
88
92
|
end
|
89
93
|
|
90
94
|
# This is a string giving a brief one-line summary of version information
|
91
95
|
#
|
92
96
|
def version_info
|
93
|
-
"sanzang #{Sanzang::VERSION} [ruby_#{RUBY_VERSION}] [#{RUBY_PLATFORM}]"
|
97
|
+
"sanzang #{Sanzang::VERSION} [ruby_#{RUBY_VERSION}] [#{RUBY_PLATFORM}]" \
|
98
|
+
+ " [#{Sanzang::Platform.data_encoding}]"
|
94
99
|
end
|
95
100
|
|
101
|
+
# Name of the command
|
102
|
+
#
|
103
|
+
attr_reader :name
|
104
|
+
|
96
105
|
private
|
97
106
|
|
98
107
|
# An OptionParser object for parsing command options and parameters
|
@@ -100,16 +109,17 @@ module Sanzang::Command
|
|
100
109
|
def option_parser
|
101
110
|
OptionParser.new do |op|
|
102
111
|
op.banner = "Usage: #{@name} [options]\n"
|
103
|
-
op.banner << "Usage: #{@name} <command> [options] [args]\n
|
112
|
+
op.banner << "Usage: #{@name} <command> [options] [args]\n"
|
104
113
|
|
105
|
-
op.banner << "
|
114
|
+
op.banner << "\nUse \"-h\" or \"--help\" with sanzang commands for "
|
115
|
+
op.banner << "usage information.\n"
|
106
116
|
|
107
117
|
op.banner << "\nSanzang commands:\n"
|
108
|
-
op.banner << " batch translate many files in parallel\n"
|
118
|
+
op.banner << " batch translate many files in parallel\n"
|
109
119
|
op.banner << " reflow format CJK text for translation\n"
|
110
120
|
op.banner << " translate standard single text translation\n"
|
111
|
-
op.banner << "\nOptions:\n"
|
112
121
|
|
122
|
+
op.banner << "\nOptions:\n"
|
113
123
|
op.on("-h", "--help", "show this help message and exit") do |v|
|
114
124
|
puts op
|
115
125
|
exit 0
|
@@ -125,9 +135,5 @@ module Sanzang::Command
|
|
125
135
|
end
|
126
136
|
end
|
127
137
|
|
128
|
-
# Name of the command
|
129
|
-
#
|
130
|
-
attr_reader :name
|
131
|
-
|
132
138
|
end
|
133
139
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "optparse"
|
20
20
|
|
21
|
+
require_relative File.join("..", "platform")
|
21
22
|
require_relative File.join("..", "translation_table")
|
22
23
|
require_relative File.join("..", "translator")
|
23
24
|
require_relative File.join("..", "version")
|
@@ -34,27 +35,12 @@ module Sanzang::Command
|
|
34
35
|
#
|
35
36
|
def initialize
|
36
37
|
@name = "sanzang translate"
|
37
|
-
@encoding =
|
38
|
+
@encoding = Sanzang::Platform.data_encoding
|
38
39
|
@infile = nil
|
39
40
|
@outfile = nil
|
40
41
|
@verbose = false
|
41
42
|
end
|
42
43
|
|
43
|
-
# Get a list of all acceptable text encodings.
|
44
|
-
#
|
45
|
-
def valid_encodings
|
46
|
-
all_enc = Encoding.list.collect {|e| e.to_s }.sort do |x,y|
|
47
|
-
x.upcase <=> y.upcase
|
48
|
-
end
|
49
|
-
all_enc.find_all do |e|
|
50
|
-
begin
|
51
|
-
Encoding::Converter.search_convpath(e, Encoding::UTF_8)
|
52
|
-
rescue Encoding::ConverterNotFoundError
|
53
|
-
e == "UTF-8" ? true : false
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
44
|
# Run the translate command with the given arguments. The parameter _args_
|
59
45
|
# would typically be an array of command options and parameters. Calling
|
60
46
|
# this with the "-h" or "--help" option will print full usage information
|
@@ -69,8 +55,6 @@ module Sanzang::Command
|
|
69
55
|
return 1
|
70
56
|
end
|
71
57
|
|
72
|
-
set_data_encoding
|
73
|
-
|
74
58
|
translator = nil
|
75
59
|
File.open(args[0], "rb", encoding: @encoding) do |table_file|
|
76
60
|
table = Sanzang::TranslationTable.new(table_file.read)
|
@@ -105,20 +89,11 @@ module Sanzang::Command
|
|
105
89
|
return 1
|
106
90
|
end
|
107
91
|
|
108
|
-
|
92
|
+
# Name of the command
|
93
|
+
#
|
94
|
+
attr_reader :name
|
109
95
|
|
110
|
-
|
111
|
-
#
|
112
|
-
def set_data_encoding
|
113
|
-
if @encoding == nil
|
114
|
-
if Encoding.default_external.to_s =~ /ASCII|IBM/
|
115
|
-
$stderr.puts "Encoding: UTF-8"
|
116
|
-
@encoding = Encoding::UTF_8
|
117
|
-
else
|
118
|
-
@encoding = Encoding.default_external
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
96
|
+
private
|
122
97
|
|
123
98
|
# An OptionParser for the command
|
124
99
|
#
|
@@ -128,10 +103,9 @@ module Sanzang::Command
|
|
128
103
|
|
129
104
|
op.banner << "\nTranslate text using simple table rules. Input text "
|
130
105
|
op.banner << "is read from STDIN by\ndefault, and the output is "
|
131
|
-
op.banner << "written to STDOUT by default
|
106
|
+
op.banner << "written to STDOUT by default. The translation table "
|
107
|
+
op.banner << "\nfile is specified as a parameter.\n"
|
132
108
|
|
133
|
-
op.banner << "\nExample:\n"
|
134
|
-
op.banner << " #{@name} -i text.txt -o text.sz.txt table.txt\n"
|
135
109
|
op.banner << "\nOptions:\n"
|
136
110
|
|
137
111
|
op.on("-h", "--help", "show this help message and exit") do |v|
|
@@ -142,7 +116,7 @@ module Sanzang::Command
|
|
142
116
|
@encoding = Encoding.find(v)
|
143
117
|
end
|
144
118
|
op.on("-L", "--list-encodings", "list possible encodings") do |v|
|
145
|
-
puts
|
119
|
+
Sanzang::Platform.valid_encodings.each {|e| puts e.to_s }
|
146
120
|
exit 0
|
147
121
|
end
|
148
122
|
op.on("-i", "--infile=FILE", "read input text from FILE") do |v|
|
@@ -157,9 +131,5 @@ module Sanzang::Command
|
|
157
131
|
end
|
158
132
|
end
|
159
133
|
|
160
|
-
# Name of the command
|
161
|
-
#
|
162
|
-
attr_reader :name
|
163
|
-
|
164
134
|
end
|
165
135
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- encoding: UTF-8 -*-
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
|
+
#
|
6
|
+
# This program is free software: you can redistribute it and/or modify it under
|
7
|
+
# the terms of the GNU General Public License as published by the Free Software
|
8
|
+
# Foundation, either version 3 of the License, or (at your option) any later
|
9
|
+
# version.
|
10
|
+
#
|
11
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
12
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
13
|
+
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
14
|
+
# details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License along with
|
17
|
+
# this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'rbconfig'
|
20
|
+
|
21
|
+
# The Sanzang::Platform module includes information about the underlying system
|
22
|
+
# that is needed by the \Sanzang system. This includes information about the
|
23
|
+
# machine architecture and OS, the number of processors available, encodings
|
24
|
+
# that are supported, and encodings that are optimal.
|
25
|
+
#
|
26
|
+
module Sanzang::Platform
|
27
|
+
class << self
|
28
|
+
|
29
|
+
# CPU architecture of the underlying machine
|
30
|
+
#
|
31
|
+
def machine_arch
|
32
|
+
RbConfig::CONFIG["target_cpu"]
|
33
|
+
end
|
34
|
+
|
35
|
+
# Operating system, which may be different from RUBY_PLATFORM
|
36
|
+
#
|
37
|
+
def os_name
|
38
|
+
RbConfig::CONFIG["target_os"]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Does this Ruby VM support Unix-style process handling?
|
42
|
+
#
|
43
|
+
def unix_processes?
|
44
|
+
[:fork, :wait, :kill].each do |f|
|
45
|
+
if not Process.respond_to?(f)
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
true
|
50
|
+
end
|
51
|
+
|
52
|
+
# Find the number of logical processors seen by the system. This may be
|
53
|
+
# different from the number of physical processors or CPU cores. If the
|
54
|
+
# number of processors cannot be detected, nil is returned. For Windows,
|
55
|
+
# this is detected through an OLE lookup, and for Unix systems, a heuristic
|
56
|
+
# approach is taken. Supported Unix types include:
|
57
|
+
#
|
58
|
+
# * AIX: pmcycles (AIX 5+), lsdev
|
59
|
+
# * BSD: /sbin/sysctl
|
60
|
+
# * Cygwin: /proc/cpuinfo
|
61
|
+
# * Darwin: hwprefs, /usr/sbin/sysctl
|
62
|
+
# * HP-UX: ioscan
|
63
|
+
# * IRIX: sysconf
|
64
|
+
# * Linux: /proc/cpuinfo
|
65
|
+
# * Minix 3+: /proc/cpuinfo
|
66
|
+
# * Solaris: psrinfo
|
67
|
+
# * Tru64 UNIX: psrinfo
|
68
|
+
# * UnixWare: psrinfo
|
69
|
+
#
|
70
|
+
def processor_count
|
71
|
+
if os_name =~ /mingw|mswin/
|
72
|
+
require 'win32ole'
|
73
|
+
result = WIN32OLE.connect("winmgmts://").ExecQuery(
|
74
|
+
"select NumberOfLogicalProcessors from Win32_Processor")
|
75
|
+
result.to_enum.first.NumberOfLogicalProcessors
|
76
|
+
elsif File.readable?("/proc/cpuinfo")
|
77
|
+
IO.read("/proc/cpuinfo").scan(/^processor/).size
|
78
|
+
elsif File.executable?("/usr/bin/hwprefs")
|
79
|
+
IO.popen(%w[/usr/bin/hwprefs thread_count]).read.to_i
|
80
|
+
elsif File.executable?("/usr/sbin/psrinfo")
|
81
|
+
IO.popen("/usr/sbin/psrinfo").read.scan(/^.*on-*line/).size
|
82
|
+
elsif File.executable?("/usr/sbin/ioscan")
|
83
|
+
IO.popen(%w[/usr/sbin/ioscan -kC processor]) do |out|
|
84
|
+
out.read.scan(/^.*processor/).size
|
85
|
+
end
|
86
|
+
elsif File.executable?("/usr/sbin/pmcycles")
|
87
|
+
IO.popen(%w[/usr/sbin/pmcycles -m]).read.count("\n")
|
88
|
+
elsif File.executable?("/usr/sbin/lsdev")
|
89
|
+
IO.popen(%w[/usr/sbin/lsdev -Cc processor -S 1]).read.count("\n")
|
90
|
+
elsif File.executable?("/usr/sbin/sysconf") and os_name =~ /IRIX/i
|
91
|
+
IO.popen(%w[/usr/sbin/sysconf NPROC_ONLN]).read.to_i
|
92
|
+
elsif File.executable?("/usr/sbin/sysctl")
|
93
|
+
IO.popen(%w[/usr/sbin/sysctl -n hw.ncpu]).read.to_i
|
94
|
+
elsif File.executable?("/sbin/sysctl")
|
95
|
+
IO.popen(%w[/sbin/sysctl -n hw.ncpu]).read.to_i
|
96
|
+
else
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Text encodings that can be converted to UTF-8. MRI still lacks some
|
102
|
+
# converter implementations for obscure encodings.
|
103
|
+
#
|
104
|
+
def valid_encodings
|
105
|
+
Encoding.list.find_all do |e|
|
106
|
+
begin
|
107
|
+
Encoding::Converter.search_convpath(e, Encoding::UTF_8)
|
108
|
+
rescue Encoding::ConverterNotFoundError
|
109
|
+
e == Encoding::UTF_8 ? true : false
|
110
|
+
end
|
111
|
+
end.sort_by! {|e| e.to_s.upcase }
|
112
|
+
end
|
113
|
+
|
114
|
+
# Default text data encoding on this platform. This is usually the default
|
115
|
+
# external encoding of the Ruby interpreter; however, if the encoding is
|
116
|
+
# an ASCII variant or an old IBM DOS encoding, then it should default to
|
117
|
+
# UTF-8 since these are effectively obsolete, or they are subsets of UTF-8.
|
118
|
+
#
|
119
|
+
def data_encoding
|
120
|
+
if Encoding.default_external.to_s =~ /ASCII|IBM/
|
121
|
+
Encoding::UTF_8
|
122
|
+
else
|
123
|
+
Encoding.default_external
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
data/lib/sanzang/translator.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -69,7 +69,7 @@ module Sanzang
|
|
69
69
|
# Translator#translate is collated and numbered for reference purposes.
|
70
70
|
# This is the normal text listing output of the Sanzang Translator.
|
71
71
|
#
|
72
|
-
def gen_listing(source_text)
|
72
|
+
def gen_listing(source_text, pos = 1)
|
73
73
|
source_encoding = source_text.encoding
|
74
74
|
source_text.encode!(Encoding::UTF_8)
|
75
75
|
|
@@ -79,7 +79,7 @@ module Sanzang
|
|
79
79
|
listing = ""
|
80
80
|
texts[0].length.times do |line_i|
|
81
81
|
@table.width.times do |col_i|
|
82
|
-
listing << "[#{
|
82
|
+
listing << "[#{pos + line_i}.#{col_i + 1}] #{texts[col_i][line_i]}" \
|
83
83
|
<< newline
|
84
84
|
end
|
85
85
|
listing << newline
|
@@ -90,7 +90,8 @@ module Sanzang
|
|
90
90
|
# Read a text from _input_ and write its translation listing to _output_.
|
91
91
|
# If a parameter is a string, it is interpreted as the path to a file, and
|
92
92
|
# the relevant file is opened and used. Otherwise, the parameter is treated
|
93
|
-
# as an open IO object.
|
93
|
+
# as an open IO object. I/O is buffered for better performance and to avoid
|
94
|
+
# reading entire texts into memory.
|
94
95
|
#
|
95
96
|
def translate_io(input, output)
|
96
97
|
if input.kind_of?(String)
|
@@ -103,7 +104,18 @@ module Sanzang
|
|
103
104
|
else
|
104
105
|
io_out = output
|
105
106
|
end
|
106
|
-
|
107
|
+
|
108
|
+
buf_size = 96
|
109
|
+
buffer = ""
|
110
|
+
io_in.each do |line|
|
111
|
+
buffer << line
|
112
|
+
if io_in.lineno % buf_size == 0
|
113
|
+
io_out.write(gen_listing(buffer, io_in.lineno - buf_size + 1))
|
114
|
+
buffer = ""
|
115
|
+
end
|
116
|
+
end
|
117
|
+
io_out.write(
|
118
|
+
gen_listing(buffer, io_in.lineno - buffer.rstrip.count("\n")))
|
107
119
|
ensure
|
108
120
|
io_in.close if input.kind_of?(String) and not io_in.closed?
|
109
121
|
io_out.close if output.kind_of?(String) and not io_out.closed?
|
data/lib/sanzang/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -20,6 +20,6 @@ module Sanzang
|
|
20
20
|
|
21
21
|
# Current version number of Sanzang
|
22
22
|
#
|
23
|
-
VERSION = "1.0.
|
23
|
+
VERSION = "1.0.4"
|
24
24
|
|
25
25
|
end
|
@@ -60,26 +60,24 @@ class TestSanzang < Test::Unit::TestCase
|
|
60
60
|
assert_equal(stage_2(), text)
|
61
61
|
end
|
62
62
|
|
63
|
-
def test_translate_string
|
64
|
-
table = Sanzang::TranslationTable.new(table_string())
|
65
|
-
text = Sanzang::Translator.new(table).gen_listing(stage_2())
|
66
|
-
assert_equal(stage_3(), text)
|
67
|
-
end
|
68
|
-
|
69
63
|
def test_translate_file
|
70
64
|
table_path = File.join(File.dirname(__FILE__), "utf-8", "table.txt")
|
71
65
|
s2_path = File.join(File.dirname(__FILE__), "utf-8", "stage_2.txt")
|
72
66
|
s3_path = File.join(File.dirname(__FILE__), "utf-8", "stage_3.txt")
|
73
67
|
tab = Sanzang::TranslationTable.new(IO.read(table_path, encoding: "UTF-8"))
|
74
68
|
translator = Sanzang::Translator.new(tab)
|
75
|
-
translator.translate_io(s2_path, s3_path)
|
69
|
+
translator.translate_io(s2_path, s3_path)
|
76
70
|
end
|
77
71
|
|
78
|
-
def
|
72
|
+
def test_translate_string
|
79
73
|
table = Sanzang::TranslationTable.new(table_string())
|
80
|
-
|
81
|
-
|
82
|
-
|
74
|
+
text = Sanzang::Translator.new(table).gen_listing(stage_2())
|
75
|
+
assert_equal(stage_3(), text)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_translator_parallel
|
79
|
+
procs = Sanzang::Platform.processor_count
|
80
|
+
assert(procs > 0, "Processor count less than zero")
|
83
81
|
end
|
84
82
|
|
85
83
|
def test_translate_batch
|
metadata
CHANGED
@@ -1,27 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanzang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Lapis Lazuli Texts
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-07-25 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: parallel
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
|
-
- - '>='
|
19
|
+
- - ! '>='
|
18
20
|
- !ruby/object:Gem::Version
|
19
21
|
version: 0.5.19
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
|
-
- - '>='
|
27
|
+
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: 0.5.19
|
27
30
|
description: Sanzang is a program built for machine translation of natural languages.
|
@@ -58,6 +61,7 @@ files:
|
|
58
61
|
- lib/sanzang/translation_table.rb
|
59
62
|
- lib/sanzang/batch_translator.rb
|
60
63
|
- lib/sanzang/version.rb
|
64
|
+
- lib/sanzang/platform.rb
|
61
65
|
- lib/sanzang/command/reflow.rb
|
62
66
|
- lib/sanzang/command/sanzang_cmd.rb
|
63
67
|
- lib/sanzang/command/translate.rb
|
@@ -70,26 +74,27 @@ files:
|
|
70
74
|
homepage: http://www.lapislazulitexts.com/sanzang/
|
71
75
|
licenses:
|
72
76
|
- GPL-3
|
73
|
-
metadata: {}
|
74
77
|
post_install_message:
|
75
78
|
rdoc_options: []
|
76
79
|
require_paths:
|
77
80
|
- lib
|
78
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
79
83
|
requirements:
|
80
|
-
- - '>='
|
84
|
+
- - ! '>='
|
81
85
|
- !ruby/object:Gem::Version
|
82
86
|
version: 1.9.0
|
83
87
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
84
89
|
requirements:
|
85
|
-
- - '>='
|
90
|
+
- - ! '>='
|
86
91
|
- !ruby/object:Gem::Version
|
87
92
|
version: '0'
|
88
93
|
requirements: []
|
89
94
|
rubyforge_project:
|
90
|
-
rubygems_version:
|
95
|
+
rubygems_version: 1.8.23
|
91
96
|
signing_key:
|
92
|
-
specification_version:
|
97
|
+
specification_version: 3
|
93
98
|
summary: Simple rule-based machine translation system.
|
94
99
|
test_files:
|
95
100
|
- test/tc_reflow_encodings.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 8b8f836d96d322d790415d013b67a6313007b29c
|
4
|
-
data.tar.gz: c929928a0b63f3e16fe7d4b5dd9c14936b67f6c0
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 69eea67e41a7e29330ab5733be22e3f6299d59dc498c7348bcd7f0f6bbff6b75923bc231c6405d43943430a224e6a9dbe52a53d9ae9ef24d6853863944be5306
|
7
|
-
data.tar.gz: ead0983545667b9d315647f0d862b5b2b2aef960ba961dd9f95a224107084e015b71e240c5c1ed87116ce3963bcc25038c953c936fff4e78fbcbb727712f3367
|