sanzang 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/MANUAL.rdoc +3 -8
- data/README.rdoc +4 -11
- data/lib/sanzang.rb +3 -2
- data/lib/sanzang/batch_translator.rb +5 -14
- data/lib/sanzang/command/batch.rb +8 -25
- data/lib/sanzang/command/reflow.rb +14 -41
- data/lib/sanzang/command/sanzang_cmd.rb +24 -18
- data/lib/sanzang/command/translate.rb +10 -40
- data/lib/sanzang/platform.rb +128 -0
- data/lib/sanzang/text_formatter.rb +1 -1
- data/lib/sanzang/translation_table.rb +1 -1
- data/lib/sanzang/translator.rb +17 -5
- data/lib/sanzang/version.rb +2 -2
- data/test/tc_simple_translation.rb +9 -11
- metadata +14 -9
- checksums.yaml +0 -7
data/MANUAL.rdoc
CHANGED
@@ -293,14 +293,9 @@ messages will still be displayed in the console's native IBM-437 encoding.
|
|
293
293
|
|
294
294
|
$ sanzang t -E UTF-16LE -i in.txt -o out.txt TABLE.txt
|
295
295
|
|
296
|
-
If the "-E" option is not specified, then \Sanzang will use the default
|
297
|
-
encoding
|
298
|
-
\
|
299
|
-
written to in the UTF-8 encoding. The one *exception* to this is for
|
300
|
-
environments using the IBM-437 encoding (typically an old Windows command
|
301
|
-
shell). In this case, \Sanzang will take pity on you and automatically switch
|
302
|
-
to UTF-8 by default, as if you had specified the option "-E" with value
|
303
|
-
"UTF-8".
|
296
|
+
If the "-E" option is not specified, then \Sanzang will use the default data
|
297
|
+
encoding for that environment. The data encoding can be seen by running
|
298
|
+
\sanzang with the "--version" or "--platform" options.
|
304
299
|
|
305
300
|
== Responsible Use
|
306
301
|
|
data/README.rdoc
CHANGED
@@ -34,19 +34,12 @@ automatically download and install \Sanzang onto your computer.
|
|
34
34
|
# gem install sanzang
|
35
35
|
|
36
36
|
After this, you should be able to run the _sanzang_ command. Run the following
|
37
|
-
command to verify your installation and print
|
37
|
+
command to verify your installation and print version information.
|
38
38
|
|
39
|
-
# sanzang -
|
39
|
+
# sanzang -V
|
40
40
|
|
41
|
-
This command should show a summary of your
|
41
|
+
This command should show a summary of your \Sanzang version and environment.
|
42
42
|
|
43
|
-
|
44
|
-
Ruby version: 2.0.0
|
45
|
-
External encoding: UTF-8
|
46
|
-
Internal encoding: none
|
47
|
-
Fork implemented: true
|
48
|
-
Parallel version: 0.6.4
|
49
|
-
Processors found: 4
|
50
|
-
Sanzang version: 1.0.0
|
43
|
+
sanzang 1.0.4 [ruby_1.9.3] [x86_64-linux] [UTF-8]
|
51
44
|
|
52
45
|
You now have \Sanzang installed on your computer.
|
data/lib/sanzang.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby -w
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -23,10 +23,11 @@
|
|
23
23
|
module Sanzang
|
24
24
|
end
|
25
25
|
|
26
|
+
require_relative File.join("sanzang", "batch_translator")
|
27
|
+
require_relative File.join("sanzang", "platform")
|
26
28
|
require_relative File.join("sanzang", "text_formatter")
|
27
29
|
require_relative File.join("sanzang", "translation_table")
|
28
30
|
require_relative File.join("sanzang", "translator")
|
29
|
-
require_relative File.join("sanzang", "batch_translator")
|
30
31
|
require_relative File.join("sanzang", "version")
|
31
32
|
|
32
33
|
# The Sanzang::Command module contains Unix style commands utilizing the
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "parallel"
|
20
20
|
|
21
|
+
require_relative "platform"
|
21
22
|
require_relative "translator"
|
22
23
|
|
23
24
|
module Sanzang
|
@@ -28,18 +29,6 @@ module Sanzang
|
|
28
29
|
#
|
29
30
|
class BatchTranslator < Translator
|
30
31
|
|
31
|
-
# Evaluates to true if this Ruby can execute the fork(2) system call.
|
32
|
-
#
|
33
|
-
def forking?
|
34
|
-
Process.respond_to?(:fork)
|
35
|
-
end
|
36
|
-
|
37
|
-
# The number of logical processors detected on the current system.
|
38
|
-
#
|
39
|
-
def processor_count
|
40
|
-
Parallel.processor_count
|
41
|
-
end
|
42
|
-
|
43
32
|
# Translate a batch of files. The main parameter is an array, each element
|
44
33
|
# of which should be a two-dimensional array with the first element being
|
45
34
|
# the input file path, and the second element being the output file path.
|
@@ -47,8 +36,10 @@ module Sanzang
|
|
47
36
|
# return value is an array containing all the output file paths.
|
48
37
|
#
|
49
38
|
def translate_batch(fpath_pairs, verbose = true, jobs = nil)
|
50
|
-
if not
|
39
|
+
if not Sanzang::Platform.unix_processes?
|
51
40
|
jobs = 0
|
41
|
+
elsif not jobs
|
42
|
+
jobs = Sanzang::Platform.processor_count
|
52
43
|
end
|
53
44
|
Parallel.map(fpath_pairs, :in_processes => jobs) do |f1,f2|
|
54
45
|
translate_io(f1, f2)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "optparse"
|
20
20
|
|
21
|
+
require_relative File.join("..", "platform")
|
21
22
|
require_relative File.join("..", "translation_table")
|
22
23
|
require_relative File.join("..", "batch_translator")
|
23
24
|
require_relative File.join("..", "version")
|
@@ -35,7 +36,7 @@ module Sanzang::Command
|
|
35
36
|
#
|
36
37
|
def initialize
|
37
38
|
@name = "sanzang batch"
|
38
|
-
@encoding =
|
39
|
+
@encoding = Sanzang::Platform.data_encoding
|
39
40
|
@outdir = nil
|
40
41
|
@jobs = nil
|
41
42
|
@verbose = false
|
@@ -56,8 +57,6 @@ module Sanzang::Command
|
|
56
57
|
return 1
|
57
58
|
end
|
58
59
|
|
59
|
-
set_data_encoding
|
60
|
-
|
61
60
|
translator = nil
|
62
61
|
File.open(args[0], "rb", encoding: @encoding) do |table_file|
|
63
62
|
table = Sanzang::TranslationTable.new(table_file.read)
|
@@ -79,20 +78,11 @@ module Sanzang::Command
|
|
79
78
|
return 1
|
80
79
|
end
|
81
80
|
|
82
|
-
|
81
|
+
# Name of the command
|
82
|
+
#
|
83
|
+
attr_reader :name
|
83
84
|
|
84
|
-
|
85
|
-
#
|
86
|
-
def set_data_encoding
|
87
|
-
if @encoding == nil
|
88
|
-
if Encoding.default_external.to_s =~ /ASCII|IBM/
|
89
|
-
$stderr.puts "Encoding: UTF-8"
|
90
|
-
@encoding = Encoding::UTF_8
|
91
|
-
else
|
92
|
-
@encoding = Encoding.default_external
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
85
|
+
private
|
96
86
|
|
97
87
|
# Return an OptionParser object for this command
|
98
88
|
#
|
@@ -116,10 +106,7 @@ module Sanzang::Command
|
|
116
106
|
@encoding = Encoding.find(v)
|
117
107
|
end
|
118
108
|
op.on("-L", "--list-encodings", "list possible encodings") do |v|
|
119
|
-
|
120
|
-
x.to_s.upcase <=> y.to_s.upcase
|
121
|
-
end
|
122
|
-
puts encodings
|
109
|
+
Sanzang::Platform.valid_encodings.each {|e| puts e.to_s }
|
123
110
|
exit 0
|
124
111
|
end
|
125
112
|
op.on("-j", "--jobs=N", "allow N concurrent processes") do |v|
|
@@ -131,9 +118,5 @@ module Sanzang::Command
|
|
131
118
|
end
|
132
119
|
end
|
133
120
|
|
134
|
-
# Name of the command
|
135
|
-
#
|
136
|
-
attr_reader :name
|
137
|
-
|
138
121
|
end
|
139
122
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "optparse"
|
20
20
|
|
21
|
+
require_relative File.join("..", "platform")
|
21
22
|
require_relative File.join("..", "text_formatter")
|
22
23
|
require_relative File.join("..", "version")
|
23
24
|
|
@@ -36,27 +37,12 @@ module Sanzang::Command
|
|
36
37
|
#
|
37
38
|
def initialize
|
38
39
|
@name = "sanzang reflow"
|
39
|
-
@encoding =
|
40
|
+
@encoding = Sanzang::Platform.data_encoding
|
40
41
|
@infile = nil
|
41
42
|
@outfile = nil
|
42
43
|
@verbose = false
|
43
44
|
end
|
44
45
|
|
45
|
-
# Get a list of all acceptable text encodings.
|
46
|
-
#
|
47
|
-
def valid_encodings
|
48
|
-
all_enc = Encoding.list.collect {|e| e.to_s }.sort do |x,y|
|
49
|
-
x.upcase <=> y.upcase
|
50
|
-
end
|
51
|
-
all_enc.find_all do |e|
|
52
|
-
begin
|
53
|
-
Encoding::Converter.search_convpath(e, Encoding::UTF_8)
|
54
|
-
rescue Encoding::ConverterNotFoundError
|
55
|
-
e == "UTF-8" ? true : false
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
46
|
# Run the reflow command with the given arguments. The parameter _args_
|
61
47
|
# would typically be an array of command options and parameters. Calling
|
62
48
|
# this with the "-h" or "--help" option will print full usage information
|
@@ -71,8 +57,6 @@ module Sanzang::Command
|
|
71
57
|
return 1
|
72
58
|
end
|
73
59
|
|
74
|
-
set_data_encoding
|
75
|
-
|
76
60
|
begin
|
77
61
|
fin = @infile ? File.open(@infile, "r") : $stdin
|
78
62
|
fin.binmode.set_encoding(@encoding)
|
@@ -101,20 +85,11 @@ module Sanzang::Command
|
|
101
85
|
return 1
|
102
86
|
end
|
103
87
|
|
104
|
-
|
105
|
-
|
106
|
-
# Initialize the encoding for text data if it is not already set
|
88
|
+
# The name of the command
|
107
89
|
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
$stderr.puts "Encoding: UTF-8"
|
112
|
-
@encoding = Encoding::UTF_8
|
113
|
-
else
|
114
|
-
@encoding = Encoding.default_external
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end
|
90
|
+
attr_reader :name
|
91
|
+
|
92
|
+
private
|
118
93
|
|
119
94
|
# An OptionParser for the command
|
120
95
|
#
|
@@ -122,10 +97,12 @@ module Sanzang::Command
|
|
122
97
|
OptionParser.new do |op|
|
123
98
|
op.banner = "Usage: #{@name} [options]\n"
|
124
99
|
|
125
|
-
op.banner << "\nReformat text
|
126
|
-
op.banner << "
|
127
|
-
op.banner << "
|
128
|
-
op.banner << "
|
100
|
+
op.banner << "\nReformat text into lines based on spacing, "
|
101
|
+
op.banner << "punctuation, etc. This should work\nfor the CJK "
|
102
|
+
op.banner << "languages (Chinese, Japanese, and Korean). By default, "
|
103
|
+
op.banner << "text is read\nfrom STDIN and written to STDOUT."
|
104
|
+
op.banner << "\n"
|
105
|
+
|
129
106
|
op.banner << "\nOptions:\n"
|
130
107
|
|
131
108
|
op.on("-h", "--help", "show this help message and exit") do |v|
|
@@ -136,7 +113,7 @@ module Sanzang::Command
|
|
136
113
|
@encoding = Encoding.find(v)
|
137
114
|
end
|
138
115
|
op.on("-L", "--list-encodings", "list possible encodings") do |v|
|
139
|
-
puts
|
116
|
+
Sanzang::Platform.valid_encodings.each {|e| puts e.to_s }
|
140
117
|
exit 0
|
141
118
|
end
|
142
119
|
op.on("-i", "--infile=FILE", "read input text from FILE") do |v|
|
@@ -151,9 +128,5 @@ module Sanzang::Command
|
|
151
128
|
end
|
152
129
|
end
|
153
130
|
|
154
|
-
# The name of the command
|
155
|
-
#
|
156
|
-
attr_reader :name
|
157
|
-
|
158
131
|
end
|
159
132
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -23,6 +23,7 @@ require_relative "reflow"
|
|
23
23
|
require_relative "translate"
|
24
24
|
require_relative "batch"
|
25
25
|
|
26
|
+
require_relative File.join("..", "platform")
|
26
27
|
require_relative File.join("..", "version")
|
27
28
|
|
28
29
|
module Sanzang::Command
|
@@ -77,22 +78,30 @@ module Sanzang::Command
|
|
77
78
|
# A string giving a listing of platform information
|
78
79
|
#
|
79
80
|
def platform_info
|
80
|
-
info = "
|
81
|
-
info << "
|
82
|
-
info << "
|
83
|
-
info << "
|
84
|
-
info << "
|
85
|
-
info << "
|
86
|
-
info << "
|
87
|
-
info << "
|
81
|
+
info = "host_arch = #{Sanzang::Platform.machine_arch}\n"
|
82
|
+
info << "host_os = #{Sanzang::Platform.os_name}\n"
|
83
|
+
info << "host_processors = #{Sanzang::Platform.processor_count}\n"
|
84
|
+
info << "ruby_encoding_ext = #{Encoding.default_external}\n"
|
85
|
+
info << "ruby_encoding_int = #{Encoding.default_internal or 'none'}\n"
|
86
|
+
info << "ruby_multiproc = #{Sanzang::Platform.unix_processes?}\n"
|
87
|
+
info << "ruby_platform = #{RUBY_PLATFORM}\n"
|
88
|
+
info << "ruby_version = #{RUBY_VERSION}\n"
|
89
|
+
info << "sanzang_encoding = #{Sanzang::Platform.data_encoding}\n"
|
90
|
+
info << "sanzang_parallel = #{Parallel::VERSION}\n"
|
91
|
+
info << "sanzang_version = #{Sanzang::VERSION}\n"
|
88
92
|
end
|
89
93
|
|
90
94
|
# This is a string giving a brief one-line summary of version information
|
91
95
|
#
|
92
96
|
def version_info
|
93
|
-
"sanzang #{Sanzang::VERSION} [ruby_#{RUBY_VERSION}] [#{RUBY_PLATFORM}]"
|
97
|
+
"sanzang #{Sanzang::VERSION} [ruby_#{RUBY_VERSION}] [#{RUBY_PLATFORM}]" \
|
98
|
+
+ " [#{Sanzang::Platform.data_encoding}]"
|
94
99
|
end
|
95
100
|
|
101
|
+
# Name of the command
|
102
|
+
#
|
103
|
+
attr_reader :name
|
104
|
+
|
96
105
|
private
|
97
106
|
|
98
107
|
# An OptionParser object for parsing command options and parameters
|
@@ -100,16 +109,17 @@ module Sanzang::Command
|
|
100
109
|
def option_parser
|
101
110
|
OptionParser.new do |op|
|
102
111
|
op.banner = "Usage: #{@name} [options]\n"
|
103
|
-
op.banner << "Usage: #{@name} <command> [options] [args]\n
|
112
|
+
op.banner << "Usage: #{@name} <command> [options] [args]\n"
|
104
113
|
|
105
|
-
op.banner << "
|
114
|
+
op.banner << "\nUse \"-h\" or \"--help\" with sanzang commands for "
|
115
|
+
op.banner << "usage information.\n"
|
106
116
|
|
107
117
|
op.banner << "\nSanzang commands:\n"
|
108
|
-
op.banner << " batch translate many files in parallel\n"
|
118
|
+
op.banner << " batch translate many files in parallel\n"
|
109
119
|
op.banner << " reflow format CJK text for translation\n"
|
110
120
|
op.banner << " translate standard single text translation\n"
|
111
|
-
op.banner << "\nOptions:\n"
|
112
121
|
|
122
|
+
op.banner << "\nOptions:\n"
|
113
123
|
op.on("-h", "--help", "show this help message and exit") do |v|
|
114
124
|
puts op
|
115
125
|
exit 0
|
@@ -125,9 +135,5 @@ module Sanzang::Command
|
|
125
135
|
end
|
126
136
|
end
|
127
137
|
|
128
|
-
# Name of the command
|
129
|
-
#
|
130
|
-
attr_reader :name
|
131
|
-
|
132
138
|
end
|
133
139
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -18,6 +18,7 @@
|
|
18
18
|
|
19
19
|
require "optparse"
|
20
20
|
|
21
|
+
require_relative File.join("..", "platform")
|
21
22
|
require_relative File.join("..", "translation_table")
|
22
23
|
require_relative File.join("..", "translator")
|
23
24
|
require_relative File.join("..", "version")
|
@@ -34,27 +35,12 @@ module Sanzang::Command
|
|
34
35
|
#
|
35
36
|
def initialize
|
36
37
|
@name = "sanzang translate"
|
37
|
-
@encoding =
|
38
|
+
@encoding = Sanzang::Platform.data_encoding
|
38
39
|
@infile = nil
|
39
40
|
@outfile = nil
|
40
41
|
@verbose = false
|
41
42
|
end
|
42
43
|
|
43
|
-
# Get a list of all acceptable text encodings.
|
44
|
-
#
|
45
|
-
def valid_encodings
|
46
|
-
all_enc = Encoding.list.collect {|e| e.to_s }.sort do |x,y|
|
47
|
-
x.upcase <=> y.upcase
|
48
|
-
end
|
49
|
-
all_enc.find_all do |e|
|
50
|
-
begin
|
51
|
-
Encoding::Converter.search_convpath(e, Encoding::UTF_8)
|
52
|
-
rescue Encoding::ConverterNotFoundError
|
53
|
-
e == "UTF-8" ? true : false
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
44
|
# Run the translate command with the given arguments. The parameter _args_
|
59
45
|
# would typically be an array of command options and parameters. Calling
|
60
46
|
# this with the "-h" or "--help" option will print full usage information
|
@@ -69,8 +55,6 @@ module Sanzang::Command
|
|
69
55
|
return 1
|
70
56
|
end
|
71
57
|
|
72
|
-
set_data_encoding
|
73
|
-
|
74
58
|
translator = nil
|
75
59
|
File.open(args[0], "rb", encoding: @encoding) do |table_file|
|
76
60
|
table = Sanzang::TranslationTable.new(table_file.read)
|
@@ -105,20 +89,11 @@ module Sanzang::Command
|
|
105
89
|
return 1
|
106
90
|
end
|
107
91
|
|
108
|
-
|
92
|
+
# Name of the command
|
93
|
+
#
|
94
|
+
attr_reader :name
|
109
95
|
|
110
|
-
|
111
|
-
#
|
112
|
-
def set_data_encoding
|
113
|
-
if @encoding == nil
|
114
|
-
if Encoding.default_external.to_s =~ /ASCII|IBM/
|
115
|
-
$stderr.puts "Encoding: UTF-8"
|
116
|
-
@encoding = Encoding::UTF_8
|
117
|
-
else
|
118
|
-
@encoding = Encoding.default_external
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
96
|
+
private
|
122
97
|
|
123
98
|
# An OptionParser for the command
|
124
99
|
#
|
@@ -128,10 +103,9 @@ module Sanzang::Command
|
|
128
103
|
|
129
104
|
op.banner << "\nTranslate text using simple table rules. Input text "
|
130
105
|
op.banner << "is read from STDIN by\ndefault, and the output is "
|
131
|
-
op.banner << "written to STDOUT by default
|
106
|
+
op.banner << "written to STDOUT by default. The translation table "
|
107
|
+
op.banner << "\nfile is specified as a parameter.\n"
|
132
108
|
|
133
|
-
op.banner << "\nExample:\n"
|
134
|
-
op.banner << " #{@name} -i text.txt -o text.sz.txt table.txt\n"
|
135
109
|
op.banner << "\nOptions:\n"
|
136
110
|
|
137
111
|
op.on("-h", "--help", "show this help message and exit") do |v|
|
@@ -142,7 +116,7 @@ module Sanzang::Command
|
|
142
116
|
@encoding = Encoding.find(v)
|
143
117
|
end
|
144
118
|
op.on("-L", "--list-encodings", "list possible encodings") do |v|
|
145
|
-
puts
|
119
|
+
Sanzang::Platform.valid_encodings.each {|e| puts e.to_s }
|
146
120
|
exit 0
|
147
121
|
end
|
148
122
|
op.on("-i", "--infile=FILE", "read input text from FILE") do |v|
|
@@ -157,9 +131,5 @@ module Sanzang::Command
|
|
157
131
|
end
|
158
132
|
end
|
159
133
|
|
160
|
-
# Name of the command
|
161
|
-
#
|
162
|
-
attr_reader :name
|
163
|
-
|
164
134
|
end
|
165
135
|
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- encoding: UTF-8 -*-
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
|
+
#
|
6
|
+
# This program is free software: you can redistribute it and/or modify it under
|
7
|
+
# the terms of the GNU General Public License as published by the Free Software
|
8
|
+
# Foundation, either version 3 of the License, or (at your option) any later
|
9
|
+
# version.
|
10
|
+
#
|
11
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
12
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
13
|
+
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
14
|
+
# details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License along with
|
17
|
+
# this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
require 'rbconfig'
|
20
|
+
|
21
|
+
# The Sanzang::Platform module includes information about the underlying system
|
22
|
+
# that is needed by the \Sanzang system. This includes information about the
|
23
|
+
# machine architecture and OS, the number of processors available, encodings
|
24
|
+
# that are supported, and encodings that are optimal.
|
25
|
+
#
|
26
|
+
module Sanzang::Platform
|
27
|
+
class << self
|
28
|
+
|
29
|
+
# CPU architecture of the underlying machine
|
30
|
+
#
|
31
|
+
def machine_arch
|
32
|
+
RbConfig::CONFIG["target_cpu"]
|
33
|
+
end
|
34
|
+
|
35
|
+
# Operating system, which may be different from RUBY_PLATFORM
|
36
|
+
#
|
37
|
+
def os_name
|
38
|
+
RbConfig::CONFIG["target_os"]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Does this Ruby VM support Unix-style process handling?
|
42
|
+
#
|
43
|
+
def unix_processes?
|
44
|
+
[:fork, :wait, :kill].each do |f|
|
45
|
+
if not Process.respond_to?(f)
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
true
|
50
|
+
end
|
51
|
+
|
52
|
+
# Find the number of logical processors seen by the system. This may be
|
53
|
+
# different from the number of physical processors or CPU cores. If the
|
54
|
+
# number of processors cannot be detected, nil is returned. For Windows,
|
55
|
+
# this is detected through an OLE lookup, and for Unix systems, a heuristic
|
56
|
+
# approach is taken. Supported Unix types include:
|
57
|
+
#
|
58
|
+
# * AIX: pmcycles (AIX 5+), lsdev
|
59
|
+
# * BSD: /sbin/sysctl
|
60
|
+
# * Cygwin: /proc/cpuinfo
|
61
|
+
# * Darwin: hwprefs, /usr/sbin/sysctl
|
62
|
+
# * HP-UX: ioscan
|
63
|
+
# * IRIX: sysconf
|
64
|
+
# * Linux: /proc/cpuinfo
|
65
|
+
# * Minix 3+: /proc/cpuinfo
|
66
|
+
# * Solaris: psrinfo
|
67
|
+
# * Tru64 UNIX: psrinfo
|
68
|
+
# * UnixWare: psrinfo
|
69
|
+
#
|
70
|
+
def processor_count
|
71
|
+
if os_name =~ /mingw|mswin/
|
72
|
+
require 'win32ole'
|
73
|
+
result = WIN32OLE.connect("winmgmts://").ExecQuery(
|
74
|
+
"select NumberOfLogicalProcessors from Win32_Processor")
|
75
|
+
result.to_enum.first.NumberOfLogicalProcessors
|
76
|
+
elsif File.readable?("/proc/cpuinfo")
|
77
|
+
IO.read("/proc/cpuinfo").scan(/^processor/).size
|
78
|
+
elsif File.executable?("/usr/bin/hwprefs")
|
79
|
+
IO.popen(%w[/usr/bin/hwprefs thread_count]).read.to_i
|
80
|
+
elsif File.executable?("/usr/sbin/psrinfo")
|
81
|
+
IO.popen("/usr/sbin/psrinfo").read.scan(/^.*on-*line/).size
|
82
|
+
elsif File.executable?("/usr/sbin/ioscan")
|
83
|
+
IO.popen(%w[/usr/sbin/ioscan -kC processor]) do |out|
|
84
|
+
out.read.scan(/^.*processor/).size
|
85
|
+
end
|
86
|
+
elsif File.executable?("/usr/sbin/pmcycles")
|
87
|
+
IO.popen(%w[/usr/sbin/pmcycles -m]).read.count("\n")
|
88
|
+
elsif File.executable?("/usr/sbin/lsdev")
|
89
|
+
IO.popen(%w[/usr/sbin/lsdev -Cc processor -S 1]).read.count("\n")
|
90
|
+
elsif File.executable?("/usr/sbin/sysconf") and os_name =~ /IRIX/i
|
91
|
+
IO.popen(%w[/usr/sbin/sysconf NPROC_ONLN]).read.to_i
|
92
|
+
elsif File.executable?("/usr/sbin/sysctl")
|
93
|
+
IO.popen(%w[/usr/sbin/sysctl -n hw.ncpu]).read.to_i
|
94
|
+
elsif File.executable?("/sbin/sysctl")
|
95
|
+
IO.popen(%w[/sbin/sysctl -n hw.ncpu]).read.to_i
|
96
|
+
else
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Text encodings that can be converted to UTF-8. MRI still lacks some
|
102
|
+
# converter implementations for obscure encodings.
|
103
|
+
#
|
104
|
+
def valid_encodings
|
105
|
+
Encoding.list.find_all do |e|
|
106
|
+
begin
|
107
|
+
Encoding::Converter.search_convpath(e, Encoding::UTF_8)
|
108
|
+
rescue Encoding::ConverterNotFoundError
|
109
|
+
e == Encoding::UTF_8 ? true : false
|
110
|
+
end
|
111
|
+
end.sort_by! {|e| e.to_s.upcase }
|
112
|
+
end
|
113
|
+
|
114
|
+
# Default text data encoding on this platform. This is usually the default
|
115
|
+
# external encoding of the Ruby interpreter; however, if the encoding is
|
116
|
+
# an ASCII variant or an old IBM DOS encoding, then it should default to
|
117
|
+
# UTF-8 since these are effectively obsolete, or they are subsets of UTF-8.
|
118
|
+
#
|
119
|
+
def data_encoding
|
120
|
+
if Encoding.default_external.to_s =~ /ASCII|IBM/
|
121
|
+
Encoding::UTF_8
|
122
|
+
else
|
123
|
+
Encoding.default_external
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
data/lib/sanzang/translator.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -69,7 +69,7 @@ module Sanzang
|
|
69
69
|
# Translator#translate is collated and numbered for reference purposes.
|
70
70
|
# This is the normal text listing output of the Sanzang Translator.
|
71
71
|
#
|
72
|
-
def gen_listing(source_text)
|
72
|
+
def gen_listing(source_text, pos = 1)
|
73
73
|
source_encoding = source_text.encoding
|
74
74
|
source_text.encode!(Encoding::UTF_8)
|
75
75
|
|
@@ -79,7 +79,7 @@ module Sanzang
|
|
79
79
|
listing = ""
|
80
80
|
texts[0].length.times do |line_i|
|
81
81
|
@table.width.times do |col_i|
|
82
|
-
listing << "[#{
|
82
|
+
listing << "[#{pos + line_i}.#{col_i + 1}] #{texts[col_i][line_i]}" \
|
83
83
|
<< newline
|
84
84
|
end
|
85
85
|
listing << newline
|
@@ -90,7 +90,8 @@ module Sanzang
|
|
90
90
|
# Read a text from _input_ and write its translation listing to _output_.
|
91
91
|
# If a parameter is a string, it is interpreted as the path to a file, and
|
92
92
|
# the relevant file is opened and used. Otherwise, the parameter is treated
|
93
|
-
# as an open IO object.
|
93
|
+
# as an open IO object. I/O is buffered for better performance and to avoid
|
94
|
+
# reading entire texts into memory.
|
94
95
|
#
|
95
96
|
def translate_io(input, output)
|
96
97
|
if input.kind_of?(String)
|
@@ -103,7 +104,18 @@ module Sanzang
|
|
103
104
|
else
|
104
105
|
io_out = output
|
105
106
|
end
|
106
|
-
|
107
|
+
|
108
|
+
buf_size = 96
|
109
|
+
buffer = ""
|
110
|
+
io_in.each do |line|
|
111
|
+
buffer << line
|
112
|
+
if io_in.lineno % buf_size == 0
|
113
|
+
io_out.write(gen_listing(buffer, io_in.lineno - buf_size + 1))
|
114
|
+
buffer = ""
|
115
|
+
end
|
116
|
+
end
|
117
|
+
io_out.write(
|
118
|
+
gen_listing(buffer, io_in.lineno - buffer.rstrip.count("\n")))
|
107
119
|
ensure
|
108
120
|
io_in.close if input.kind_of?(String) and not io_in.closed?
|
109
121
|
io_out.close if output.kind_of?(String) and not io_out.closed?
|
data/lib/sanzang/version.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# -*- encoding: UTF-8 -*-
|
3
3
|
#--
|
4
|
-
# Copyright (C) 2012 Lapis Lazuli Texts
|
4
|
+
# Copyright (C) 2012-2013 Lapis Lazuli Texts
|
5
5
|
#
|
6
6
|
# This program is free software: you can redistribute it and/or modify it under
|
7
7
|
# the terms of the GNU General Public License as published by the Free Software
|
@@ -20,6 +20,6 @@ module Sanzang
|
|
20
20
|
|
21
21
|
# Current version number of Sanzang
|
22
22
|
#
|
23
|
-
VERSION = "1.0.
|
23
|
+
VERSION = "1.0.4"
|
24
24
|
|
25
25
|
end
|
@@ -60,26 +60,24 @@ class TestSanzang < Test::Unit::TestCase
|
|
60
60
|
assert_equal(stage_2(), text)
|
61
61
|
end
|
62
62
|
|
63
|
-
def test_translate_string
|
64
|
-
table = Sanzang::TranslationTable.new(table_string())
|
65
|
-
text = Sanzang::Translator.new(table).gen_listing(stage_2())
|
66
|
-
assert_equal(stage_3(), text)
|
67
|
-
end
|
68
|
-
|
69
63
|
def test_translate_file
|
70
64
|
table_path = File.join(File.dirname(__FILE__), "utf-8", "table.txt")
|
71
65
|
s2_path = File.join(File.dirname(__FILE__), "utf-8", "stage_2.txt")
|
72
66
|
s3_path = File.join(File.dirname(__FILE__), "utf-8", "stage_3.txt")
|
73
67
|
tab = Sanzang::TranslationTable.new(IO.read(table_path, encoding: "UTF-8"))
|
74
68
|
translator = Sanzang::Translator.new(tab)
|
75
|
-
translator.translate_io(s2_path, s3_path)
|
69
|
+
translator.translate_io(s2_path, s3_path)
|
76
70
|
end
|
77
71
|
|
78
|
-
def
|
72
|
+
def test_translate_string
|
79
73
|
table = Sanzang::TranslationTable.new(table_string())
|
80
|
-
|
81
|
-
|
82
|
-
|
74
|
+
text = Sanzang::Translator.new(table).gen_listing(stage_2())
|
75
|
+
assert_equal(stage_3(), text)
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_translator_parallel
|
79
|
+
procs = Sanzang::Platform.processor_count
|
80
|
+
assert(procs > 0, "Processor count less than zero")
|
83
81
|
end
|
84
82
|
|
85
83
|
def test_translate_batch
|
metadata
CHANGED
@@ -1,27 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanzang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Lapis Lazuli Texts
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-07-25 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: parallel
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
|
-
- - '>='
|
19
|
+
- - ! '>='
|
18
20
|
- !ruby/object:Gem::Version
|
19
21
|
version: 0.5.19
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
|
-
- - '>='
|
27
|
+
- - ! '>='
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: 0.5.19
|
27
30
|
description: Sanzang is a program built for machine translation of natural languages.
|
@@ -58,6 +61,7 @@ files:
|
|
58
61
|
- lib/sanzang/translation_table.rb
|
59
62
|
- lib/sanzang/batch_translator.rb
|
60
63
|
- lib/sanzang/version.rb
|
64
|
+
- lib/sanzang/platform.rb
|
61
65
|
- lib/sanzang/command/reflow.rb
|
62
66
|
- lib/sanzang/command/sanzang_cmd.rb
|
63
67
|
- lib/sanzang/command/translate.rb
|
@@ -70,26 +74,27 @@ files:
|
|
70
74
|
homepage: http://www.lapislazulitexts.com/sanzang/
|
71
75
|
licenses:
|
72
76
|
- GPL-3
|
73
|
-
metadata: {}
|
74
77
|
post_install_message:
|
75
78
|
rdoc_options: []
|
76
79
|
require_paths:
|
77
80
|
- lib
|
78
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
79
83
|
requirements:
|
80
|
-
- - '>='
|
84
|
+
- - ! '>='
|
81
85
|
- !ruby/object:Gem::Version
|
82
86
|
version: 1.9.0
|
83
87
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
84
89
|
requirements:
|
85
|
-
- - '>='
|
90
|
+
- - ! '>='
|
86
91
|
- !ruby/object:Gem::Version
|
87
92
|
version: '0'
|
88
93
|
requirements: []
|
89
94
|
rubyforge_project:
|
90
|
-
rubygems_version:
|
95
|
+
rubygems_version: 1.8.23
|
91
96
|
signing_key:
|
92
|
-
specification_version:
|
97
|
+
specification_version: 3
|
93
98
|
summary: Simple rule-based machine translation system.
|
94
99
|
test_files:
|
95
100
|
- test/tc_reflow_encodings.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 8b8f836d96d322d790415d013b67a6313007b29c
|
4
|
-
data.tar.gz: c929928a0b63f3e16fe7d4b5dd9c14936b67f6c0
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 69eea67e41a7e29330ab5733be22e3f6299d59dc498c7348bcd7f0f6bbff6b75923bc231c6405d43943430a224e6a9dbe52a53d9ae9ef24d6853863944be5306
|
7
|
-
data.tar.gz: ead0983545667b9d315647f0d862b5b2b2aef960ba961dd9f95a224107084e015b71e240c5c1ed87116ce3963bcc25038c953c936fff4e78fbcbb727712f3367
|