natsukantou 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a75166acdb08099edebb39e02f78ca3bf04af192e3233f71067f1178950e964b
4
+ data.tar.gz: f5af8a9a4df6a4c82e61b7bef67ec6beb3bcac6b49db2dbe7ae46ce8385084ef
5
+ SHA512:
6
+ metadata.gz: 3079ead2000c24c0c6e06156e6e775e56e233c11fe06a851e655d2b2197bb133b855ae901d8749520f9661ff37e89d759e318c2ad1e9516097fd7c14c0a6d125
7
+ data.tar.gz: 6612f175de419c560af5e221bbf3f9e074692a5fee2f3907dd800bddc7404db932201ffe69e191fadc87169ae8a6ce7df93700c48510dcbbfc96fc2bdbc28b1f
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,86 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/HashSyntax:
9
+ Enabled: false
10
+
11
+ Style/EmptyElse:
12
+ Enabled: false
13
+
14
+ Style/BlockDelimiters:
15
+ Enabled: false
16
+
17
+ Style/GuardClause:
18
+ Enabled: false
19
+
20
+ Style/SymbolArray:
21
+ Enabled: false
22
+
23
+ Style/TrailingCommaInArguments:
24
+ Enabled: false
25
+
26
+ Style/TrailingCommaInHashLiteral:
27
+ Enabled: false
28
+
29
+ Style/NegatedIf:
30
+ Enabled: false
31
+
32
+ Style/UnlessLogicalOperators:
33
+ Enabled: true
34
+ EnforcedStyle: forbid_logical_operators
35
+
36
+ Style/RescueStandardError:
37
+ Enabled: false
38
+
39
+ Style/StringLiteralsInInterpolation:
40
+ Enabled: true
41
+ EnforcedStyle: double_quotes
42
+
43
+ Style/PercentLiteralDelimiters:
44
+ Enabled: false
45
+
46
+ Style/WordArray:
47
+ Enabled: false
48
+
49
+ Style/AccessorGrouping:
50
+ Enabled: false
51
+
52
+ Style/IfUnlessModifier:
53
+ Enabled: false
54
+
55
+ Style/Documentation:
56
+ Enabled: false
57
+
58
+ Layout/ArgumentAlignment:
59
+ EnforcedStyle: with_fixed_indentation
60
+
61
+ Layout/LineLength:
62
+ Max: 120
63
+
64
+ Layout/MultilineMethodCallIndentation:
65
+ EnforcedStyle: indented
66
+
67
+ Lint/UnusedBlockArgument:
68
+ Enabled: false
69
+
70
+ Metrics/AbcSize:
71
+ Enabled: false
72
+
73
+ Metrics/BlockLength:
74
+ Enabled: false
75
+
76
+ Metrics/MethodLength:
77
+ Enabled: false
78
+
79
+ Metrics/ParameterLists:
80
+ Enabled: false
81
+
82
+ Metrics/PerceivedComplexity:
83
+ Enabled: false
84
+
85
+ Metrics/CyclomaticComplexity:
86
+ Enabled: false
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2022-07-05
4
+
5
+ - Initial release
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in natsukantou.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+
10
+ gem "rspec", "~> 3.0"
11
+
12
+ gem "rubocop", "~> 1.21"
13
+
14
+ gem 'vcr', '~> 6.1'
15
+ gem 'webmock', '~> 3.17'
16
+
17
+ gem 'rib'
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2022 lulalala
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # Natsukantou
2
+
3
+ Natsukantou is a human language translation library for XML documents.
4
+
5
+ Its strength is to allow users to mix & match different middleware filters and translation services (e.g. DeepL).
6
+
7
+ [Natsukantou](https://commons.wikimedia.org/wiki/File:NatsuKanTou_Oimatsu.jpg) (夏柑糖) is a Japanese sweet. It is made by taking out the pulp of an orange, "translating" it into a jelly, and then reinserting it back to the hollow peel.
8
+
9
+ ## Installation
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ $ bundle add natsukantou
14
+
15
+ If bundler is not being used to manage dependencies, install the gem by executing:
16
+
17
+ $ gem install natsukantou
18
+
19
+ ## Usage
20
+
21
+ If you want to just use it as a executable, run `natsukantou [XML_FILE]`.
22
+
23
+ It's a wizard that guides you through setting up a translator configuration.
24
+
25
+ Then it will translate the XML document.
26
+
27
+ If you choose to save the config (`translator_config.rb`), next time you can reuse it by calling `natsukantou -c [CONFIG_FILE] [XML_FILE]`.
28
+
29
+ ## Feature
30
+
31
+ ### Translators
32
+
33
+ * [DeepL API](https://www.deepl.com/pro-api)
34
+ * [みんなの自動翻訳@TexTra](https://mt-auto-minhon-mlt.ucri.jgn-x.jp/)
35
+
36
+ ### Middlewares
37
+
38
+ #### SubstitudeGlossary
39
+
40
+ An alternative when glossary isn't supported by the translator. This middleware would substitude glossaries and wrap replaced terms with `<skip>` tag to mark it as being translated (which is supported by DeepL). It supports glossary file in TSV format.
41
+
42
+ #### HandleRubyMarkup
43
+
44
+ [Ruby markup](https://www.w3.org/International/articles/ruby/markup.en.html) is not about Ruby the programming language, but the HTML feature: annotations in Japanese and Chinese content that are rendered alongside base text.
45
+
46
+ A phrase in ruby markup are often segmented by characters, making them less translatable. This middleware flattens ruby markup to just the base text to avoid such issue.
47
+
48
+ ## Development
49
+
50
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
51
+
52
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
53
+
54
+ ### Middleware
55
+
56
+ Natsukantou aims to be flexible by utilizing the [middleware pattern](https://github.com/mitchellh/middleware). Essentially it allows the user to cherry-pick components (translator or middleware) when needed, to improve translation.
57
+
58
+ To develop your own component, create a class which responds to `call` with an `env` variable.
59
+
60
+ * `call` needs to trigger `@app.call(env)` to continue to the next component.
61
+ * `env` is a `Natsukantou::Env`, which is a special Hash. It provides convenience methods `dom`, `lang_from` and `lang_to`. You are also free to add key/values into it.
62
+ * `lang_from` and `lang_to` are `Natsukantou::LanguageCode`, representing ISO 639-1 language codes. Its `is?` method allows the following comparison:
63
+ `Natsukantou::LanguageCode.new("en-gb").is?("en") #=> true`
64
+ * The wizard to be made aware of a new component by using `autoload_and_register`:
65
+ * To infer config prompt, `initialize` needs to accept keyword arguments for all except the first parameter (`app`), and documented with Yard.
66
+
67
+ ## Contributing
68
+
69
+ Bug reports and pull requests are welcome on GitLab at https://gitlab.com/lulalala/natsukantou.
70
+
71
+ ## License
72
+
73
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
data/exe/natsukantou ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "natsukantou"
6
+ require 'tty-prompt'
7
+ require 'optparse'
8
+ require 'natsukantou/setup/config_load_or_prompt'
9
+ require 'natsukantou/setup/write_prompt'
10
+
11
+ options = {}
12
+
13
+ optparse = OptionParser.new do |opts|
14
+ opts.banner = "Usage: natsukantou [options] XML_FILE"
15
+
16
+ opts.on("-c", "--config CONFIG_FILE",
17
+ "Config file generated after the first time natsukantou is run") do |v|
18
+ options[:config_path] = v
19
+ end
20
+ end
21
+ optparse.parse!
22
+
23
+ if ARGV.empty?
24
+ puts optparse
25
+ exit(-1)
26
+ end
27
+
28
+ begin
29
+ xml_path = ARGV[0]
30
+ dom = Oga.parse_xml(File.read(xml_path))
31
+ rescue Errno::ENOENT
32
+ warn 'XML not found'
33
+ return
34
+ end
35
+
36
+ prompt = TTY::Prompt.new
37
+
38
+ Natsukantou::Setup::ConfigLoadOrPrompt.new.execute(
39
+ prompt: prompt,
40
+ config_path: options[:config_path],
41
+ )
42
+
43
+ lang_from = prompt.ask("Language code to translate from?")
44
+ lang_to = prompt.ask("Language code to translate to?")
45
+
46
+ env = Natsukantou::Env.new(
47
+ dom: dom, lang_from: lang_from, lang_to: lang_to
48
+ )
49
+
50
+ NatsukantouTranslator.call(env)
51
+
52
+ Natsukantou::Setup::WritePrompt.new.execute(
53
+ prompt: prompt,
54
+ content: env[:dom].to_xml,
55
+ origin_path: xml_path,
56
+ lang_to: env.lang_to.code,
57
+ )
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deepl'
4
+
5
+ # Machine translator
6
+ #
7
+ # DeepL
8
+ # https://www.deepl.com
9
+ module Natsukantou
10
+ class DeepL
11
+ include UtilityBase
12
+
13
+ # @param app [Hash]
14
+ # @param auth_key [String] authentication key
15
+ # @param host [String] url of API endpoint
16
+ # @param version [String] API version, see https://www.deepl.com/docs-api/accessing-the-api/api-versions/
17
+ # @param request_params [Hash] other optional request parameters, see https://www.deepl.com/docs-api/translating-text/request/
18
+ def initialize(
19
+ app,
20
+ auth_key:,
21
+ host: "https://api-free.deepl.com",
22
+ version: "v2",
23
+ request_params: {}
24
+ )
25
+ @app = app
26
+ @request_params = {
27
+ tag_handling: 'xml',
28
+ ignore_tags: 'skip',
29
+ }.merge(request_params)
30
+
31
+ ::DeepL.configure do |config|
32
+ config.auth_key = auth_key
33
+ config.host = host
34
+ config.version = version
35
+ end
36
+ end
37
+
38
+ attr_reader :request_params
39
+
40
+ def call(env)
41
+ result = ::DeepL.translate(
42
+ env.dom.to_xml,
43
+ env.lang_from.code.upcase,
44
+ env.lang_to.code.upcase,
45
+ request_params,
46
+ ).to_s
47
+
48
+ env[:dom] = dom(result)
49
+
50
+ @app.call(env)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Ruby markup is not about Ruby the programming language,
4
+ # but the HTML feature: annotations in Japanese and Chinese content
5
+ # that are rendered alongside base text.
6
+ # https://www.w3.org/International/articles/ruby/markup.en.html
7
+ #
8
+ # A phrase in ruby markup are often segmented by characters,
9
+ # making them less translatable.
10
+ # This flattens ruby markup to just the base text to avoid such issue.
11
+ module Natsukantou
12
+ class HandleRubyMarkup
13
+ include UtilityBase
14
+
15
+ def initialize(app)
16
+ @app = app
17
+ end
18
+
19
+ def call(env)
20
+ env[:dom].css('ruby').each(&method(:process_node))
21
+
22
+ @app.call(env)
23
+ end
24
+
25
+ private
26
+
27
+ def process_node(node)
28
+ node.css('rt').each(&:remove)
29
+ node.inner_text = node.text.gsub(' ', '')
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'oauth'
4
+ require 'json'
5
+
6
+ # Machine translator
7
+ #
8
+ # みんなの自動翻訳@TexTra (Min'na no Jidou Hon'yaku @ TexTra)
9
+ # https://mt-auto-minhon-mlt.ucri.jgn-x.jp
10
+ module Natsukantou
11
+ class Minhon
12
+ include UtilityBase
13
+
14
+ # @param app [Hash]
15
+ # @param name [String] login ID
16
+ # @param key [String] API Key
17
+ # @param secret [String] authentication secret
18
+ # @param url [String] request URL of translator API endpoint
19
+ # @param request_params [Hash] other optional request parameters
20
+ #
21
+ # @param translate_by_section [String] CSS selector to translate matched elements one by one.
22
+ # Minhon often returns invalid XML if xml is large.
23
+ # To bypass this limitation, one can specify a css path (e.g. "body>p") to translate in smaller chunks.
24
+ def initialize(
25
+ app, name:, key:, secret:, url:, request_params: {},
26
+ translate_by_section: nil
27
+ )
28
+ @app = app
29
+ @key = key
30
+ @secret = secret
31
+ @url = url
32
+ @request_params = request_params.merge(name: name, key: key)
33
+
34
+ ### Non request related setting
35
+ @translate_by_section = translate_by_section
36
+ end
37
+
38
+ attr_reader :request_params, :env
39
+ attr_reader :key, :secret, :url
40
+ attr_reader :translate_by_section
41
+
42
+ def call(env)
43
+ @env = env
44
+
45
+ if translate_by_section
46
+ env[:dom].css(translate_by_section).each do |node|
47
+ next if node.text.empty?
48
+
49
+ translated_xml = translate(node.to_xml)
50
+ node.replace(dom_node(translated_xml))
51
+ end
52
+ else
53
+ translated_xml = translate(env[:dom].to_xml)
54
+ env[:dom] = dom(translated_xml)
55
+ end
56
+
57
+ @app.call(env)
58
+ end
59
+
60
+ private
61
+
62
+ def translate(text)
63
+ response = client.post(url, text: text, **request_params)
64
+
65
+ body = JSON.parse(response.body)
66
+
67
+ translated_xml = body.dig('resultset', 'result', 'text')
68
+
69
+ temp_fix(env, translated_xml)
70
+
71
+ translated_xml
72
+ rescue Net::OpenTimeout
73
+ sleep 10
74
+ retry
75
+ end
76
+
77
+ def client
78
+ @client ||= OAuth::AccessToken.new(OAuth::Consumer.new(key, secret))
79
+ end
80
+
81
+ def temp_fix(env, text)
82
+ text.tr!('麵', '面') if env[:lang_to].is?('zh-TW')
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ module Setup
5
+ class ConfigLoadOrPrompt
6
+ def execute(prompt:, config_path:)
7
+ begin
8
+ load(config_path) if config_path
9
+ rescue LoadError # rubocop:disable Lint/SuppressedException
10
+ end
11
+
12
+ if !defined?(NatsukantouTranslator)
13
+ require_relative 'config_prompt'
14
+ config_content = ConfigPrompt.new.execute
15
+
16
+ if prompt.yes?("\nConfig complete. Do you want to save it for later reuse?")
17
+ File.write('translator_config.rb', config_content)
18
+ puts "Saved as translator_config.rb, which you can specify with -c flag next time.\n\n"
19
+ end
20
+
21
+ Kernel.eval(config_content)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tty-prompt'
4
+ require 'yard'
5
+ require 'erb'
6
+ require 'json'
7
+ require 'delegate'
8
+
9
+ YARD::Tags::Library.define_tag(
10
+ 'Method parameters which should be entered as Ruby expression',
11
+ :fields_to_input_ruby
12
+ )
13
+
14
+ module Natsukantou
15
+ module Setup
16
+ class Component
17
+ attr_reader :name, :klass,
18
+ :initialize_method, :initialize_method_param_tags
19
+
20
+ def initialize(name, path)
21
+ @name = name
22
+
23
+ YARD.parse(path)
24
+
25
+ @klass = YARD::Registry.all.find do |item|
26
+ item.type == :class && item.name == name
27
+ end
28
+
29
+ @initialize_method = klass.meths.find do |item|
30
+ item.type == :method && item.name == :initialize
31
+ end
32
+
33
+ @initialize_method_param_tags = @initialize_method.tags('param')
34
+ .reject { |tag| tag.name == 'app' }
35
+ .map { |tag| MethodParamTagDecorator.new(tag) }
36
+
37
+ YARD::Registry.clear
38
+ end
39
+ end
40
+
41
+ # Both a decorator to Yard tag object,
42
+ # and where the user input is stored.
43
+ # Assumption: only dealing with keyword argument
44
+ class MethodParamTagDecorator < SimpleDelegator
45
+ attr_accessor :user_input
46
+
47
+ def initialize_method
48
+ object
49
+ end
50
+
51
+ def enter_ruby?
52
+ return true if type == 'Hash'
53
+
54
+ list = initialize_method.tag('fields_to_input_ruby')
55
+
56
+ return false if list.nil?
57
+
58
+ list.text.split(' ').include?(name)
59
+ end
60
+
61
+ def convert
62
+ case type
63
+ when Integer
64
+ :integer
65
+ when Float
66
+ :float
67
+ else
68
+ nil # do not convert and assume Ruby code
69
+ end
70
+ end
71
+
72
+ def default_value_in_string
73
+ initialize_method.parameters.find { |p| p.first == "#{name}:" }&.last
74
+ end
75
+
76
+ # tty-prompt's `value` field requires String
77
+ def default_value_in_tty
78
+ if type == 'String'
79
+ JSON.parse(default_value_in_string)
80
+ else
81
+ default_value_in_string
82
+ end
83
+ end
84
+
85
+ def user_input_in_erb
86
+ if enter_ruby?
87
+ user_input
88
+ else
89
+ user_input.inspect
90
+ end
91
+ end
92
+ end
93
+
94
+ class ConfigPrompt
95
+ def initialize
96
+ @prompt = TTY::Prompt.new
97
+
98
+ registry = Natsukantou::Setup::Registry.instance
99
+
100
+ @translators = registry.translators.map do |args|
101
+ Natsukantou::Setup::Component.new(*args)
102
+ end
103
+
104
+ @filters = registry.middlewares.map do |args|
105
+ Natsukantou::Setup::Component.new(*args)
106
+ end
107
+
108
+ @config = {}
109
+ end
110
+
111
+ attr_reader :prompt, :translators, :filters, :config
112
+ attr_reader :know_ruby
113
+
114
+ def execute
115
+ ask
116
+ output
117
+ end
118
+
119
+ def ask
120
+ @know_ruby = prompt.yes?('Do you know Ruby or JSON syntax?')
121
+
122
+ translator = prompt.select(
123
+ "Choose translation engine:",
124
+ translators.map { |t| [t.name, t] }.to_h
125
+ )
126
+
127
+ config[:translator] = {
128
+ component: translator,
129
+ parameters: ask_initialize_parameters(translator)
130
+ }
131
+
132
+ config[:filters] = []
133
+
134
+ selected_filters = prompt.multi_select(
135
+ "Select one or many filters (using spacebar):",
136
+ filters.map { |t| [t.name, t] }.to_h
137
+ )
138
+
139
+ selected_filters.each do |filter|
140
+ config[:filters] << {
141
+ component: filter,
142
+ parameters: ask_initialize_parameters(filter)
143
+ }
144
+ end
145
+ end
146
+
147
+ def output
148
+ template = ERB.new(
149
+ File.read((File.expand_path 'output.erb', __dir__)),
150
+ trim_mode: "<>-"
151
+ )
152
+ template.result(binding)
153
+ end
154
+
155
+ private
156
+
157
+ def ask_initialize_parameters(component)
158
+ puts "\n## Parameters for #{component.name} ---\n"
159
+
160
+ answers = []
161
+
162
+ component.initialize_method_param_tags.each do |tag|
163
+ next if tag.enter_ruby? && !know_ruby
164
+
165
+ ask_params = {}
166
+
167
+ printf "Enter '#{tag.name}', #{tag.text}"
168
+
169
+ ask_params[:convert] = tag.convert if tag.convert
170
+
171
+ default = tag.default_value_in_string
172
+ if default == 'nil'
173
+ printf " (optional)"
174
+ elsif default
175
+ printf " (with default)"
176
+ ask_params[:value] = tag.default_value_in_tty
177
+ else
178
+ ask_params[:required] = true
179
+ printf " (required)"
180
+ end
181
+
182
+ if tag.enter_ruby?
183
+ printf("\nEnter Ruby expression (#{tag.type}), or press Enter to skip")
184
+ end
185
+
186
+ printf ":\n"
187
+
188
+ result = prompt.ask(**ask_params)
189
+
190
+ next if result.nil?
191
+
192
+ # Check if provided default value was changed, and if not,
193
+ # the param can be ignored by relying on method declaration's default value.
194
+ next if ask_params.key?(:value) && result == tag.default_value_in_tty
195
+
196
+ tag.user_input = result
197
+ answers << tag
198
+ end
199
+
200
+ answers
201
+ end
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,10 @@
1
+ ::NatsukantouTranslator = Middleware::Builder.new do
2
+ <% config[:filters].each do |component| -%>
3
+ use Natsukantou::<%= component[:component].name -%>
4
+ <% component[:parameters].each do |tag| -%>, <%= tag.name %>: <%= tag.user_input_in_erb -%><% end %>
5
+ <% end %>
6
+ <% config[:translator].tap do |component| -%>
7
+ use Natsukantou::<%= component[:component].name -%>
8
+ <% component[:parameters].each do |tag| -%>, <%= tag.name %>: <%= tag.user_input_in_erb -%><% end %>
9
+ <% end -%>
10
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+
5
+ module Natsukantou
6
+ module Setup
7
+ class Registry
8
+ include Singleton
9
+
10
+ attr_reader :translators
11
+ attr_reader :middlewares
12
+
13
+ def initialize
14
+ @translators = []
15
+ @middlewares = []
16
+ end
17
+
18
+ def register(type, klass, path)
19
+ case type
20
+ when :translator
21
+ translators << [klass, path]
22
+ when :middleware
23
+ middlewares << [klass, path]
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/ext/string'
4
+
5
+ module Natsukantou
6
+ module Setup
7
+ class WritePrompt
8
+ def execute(prompt:, content:, origin_path:, lang_to:)
9
+ result_path = prompt.ask(
10
+ 'Where do you want to save the result?',
11
+ value: origin_path.pathmap("%X.#{lang_to}%x")
12
+ )
13
+
14
+ File.write(result_path, content)
15
+ rescue Errno::EISDIR
16
+ warn 'Specified path is a directory.'
17
+ retry
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tsv'
4
+
5
+ # Glossary isn't supported in some language
6
+ # combinations (e.g. DeepL ja to zh).
7
+ # We can however substitude terms prior to translation
8
+ # and mark it as translated.
9
+ #
10
+ # Support TSV format
11
+ module Natsukantou
12
+ class SubstitudeGlossary
13
+ include UtilityBase
14
+
15
+ LANGUAGES_WITHOUT_WORD_DIVIDERS = %w{
16
+ ja zh th lo
17
+ }.freeze
18
+
19
+ # @param filepath [String] path to TSV glossary file.
20
+ # @param glossary [Array(Array(String, String))] array representing glossary, e.g. [['book', '本']]
21
+ # @fields_to_input_ruby glossary
22
+ def initialize(app, filepath: nil, glossary: [])
23
+ @app = app
24
+
25
+ @glossary = glossary
26
+ @glossary.concat(parse_tsv(filepath)) if filepath
27
+
28
+ @glossary.uniq! { |row| row[0] }
29
+
30
+ # Longer term has higher priority
31
+ @glossary.sort_by! { |row| -row[0].length }
32
+ end
33
+
34
+ attr_reader :glossary, :regex, :replacement_mapping
35
+
36
+ def call(env)
37
+ prepare_regular_expression(env)
38
+
39
+ env[:dom].each_node(&method(:process_node))
40
+
41
+ @app.call(env)
42
+ end
43
+
44
+ def prepare_regular_expression(env)
45
+ source_terms = glossary.map(&:first)
46
+ source_terms.map! { |w| /\b#{w}\b/ } if with_space_divider?(env)
47
+ @regex = Regexp.union(*source_terms)
48
+
49
+ @replacement_mapping = glossary.to_h
50
+ @replacement_mapping.transform_values! { |v| "<skip>#{v}</skip>" }
51
+ end
52
+
53
+ private
54
+
55
+ def parse_tsv(path)
56
+ TSV.parse_file(path).without_header.map do |row|
57
+ [row[0], row[1]]
58
+ end
59
+ end
60
+
61
+ def process_node(node)
62
+ return if !node.is_a?(Oga::XML::Text)
63
+
64
+ new_text = node.text.gsub(regex, replacement_mapping)
65
+
66
+ return if new_text == node.text
67
+
68
+ dom(new_text).children.reverse_each do |new_node|
69
+ node.after(new_node)
70
+ end
71
+ node.remove
72
+ end
73
+
74
+ def with_space_divider?(env)
75
+ LANGUAGES_WITHOUT_WORD_DIVIDERS.none? do |lang|
76
+ env[:lang_from].is?(lang)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ # Environment of the middleware,
6
+ # which is just a specialized hash with convenience methods,
7
+ # and converts language settings for ease of comparison.
8
+ module Natsukantou
9
+ class Env < SimpleDelegator
10
+ # @param [Hash] env
11
+ # @option env [Oga::XML::Document] dom
12
+ # @option lang_from [String] Language code in ISO 639-1 to be translated from.
13
+ # @option lang_to [String] Language code in ISO 639-1 to be translated to.
14
+ def initialize(env)
15
+ # Convert languages
16
+ [:lang_from, :lang_to].each do |key|
17
+ env[key] = LanguageCode.new(env.fetch(key))
18
+ end
19
+
20
+ # Required keys
21
+ [:dom].each do |key|
22
+ env.fetch(key)
23
+ end
24
+
25
+ super(env)
26
+ end
27
+
28
+ def dom
29
+ self[:dom]
30
+ end
31
+
32
+ def lang_from
33
+ self[:lang_from]
34
+ end
35
+
36
+ def lang_to
37
+ self[:lang_to]
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ class LanguageCode
5
+ def initialize(code)
6
+ @code = code.downcase
7
+ @main, @sub = @code.split('-')
8
+
9
+ if @main.length != 2
10
+ raise 'Language code shoul be in the form of xx or xx-yy'
11
+ end
12
+ end
13
+
14
+ attr_reader :code, :main, :sub
15
+
16
+ def is?(other_code)
17
+ other = self.class.new(other_code)
18
+
19
+ return false if other.main != main
20
+
21
+ return true if other.sub.nil?
22
+
23
+ sub ? other.sub == sub : false
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ module Logger
5
+ def logger
6
+ ::Natsukantou.logger
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ module ParseXml
5
+ # @param xml [String]
6
+ # @return [Oga::XML::Document]
7
+ def dom(xml)
8
+ Oga.parse_xml(xml, strict: true)
9
+ rescue => e
10
+ logger.error(e.message)
11
+
12
+ Oga.parse_xml(xml)
13
+ end
14
+
15
+ # @param xml [String]
16
+ # @return [Oga::XML::Node]
17
+ def dom_node(xml)
18
+ dom = dom(xml)
19
+
20
+ if dom.children.size != 1
21
+ logger.error("Parsed XML contains more than one root element:")
22
+ logger.error(xml)
23
+ end
24
+
25
+ dom.children.first
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'parse_xml'
4
+ require_relative 'logger'
5
+
6
+ module Natsukantou
7
+ module UtilityBase
8
+ include ParseXml
9
+ include Logger
10
+ end
11
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'middleware'
5
+ require 'oga'
6
+
7
+ require_relative "natsukantou/version"
8
+ require_relative "natsukantou/setup/registry"
9
+ require_relative "natsukantou/utility/language_code"
10
+ require_relative "natsukantou/utility/env"
11
+
12
+ module Natsukantou
13
+ class Error < StandardError; end
14
+
15
+ # While autoloading in Ruby, also keep it in the local registry,
16
+ # which can be used in setup prompt.
17
+ def self.autoload_and_register(type, constant, path)
18
+ Setup::Registry.instance.register(
19
+ type,
20
+ constant,
21
+ (File.expand_path "../#{path}.rb", __FILE__)
22
+ )
23
+
24
+ autoload(constant, path)
25
+ end
26
+
27
+ # Misc
28
+ autoload :UtilityBase, "natsukantou/utility/utility_base"
29
+
30
+ # Translators
31
+ autoload_and_register :translator, :DeepL, "natsukantou/deep_l"
32
+ autoload_and_register :translator, :Minhon, "natsukantou/minhon"
33
+
34
+ # Middlewares
35
+ autoload_and_register :middleware, :SubstitudeGlossary, "natsukantou/substitude_glossary"
36
+ autoload_and_register :middleware, :HandleRubyMarkup, "natsukantou/handle_ruby_markup"
37
+
38
+ def self.logger
39
+ @logger ||= ::Logger.new($stdout)
40
+ end
41
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/natsukantou/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "natsukantou"
7
+ spec.version = Natsukantou::VERSION
8
+ spec.authors = ["lulalala"]
9
+ spec.email = ["mark@goodlife.tw"]
10
+
11
+ spec.summary = "human language translation library for XML documents"
12
+ spec.description = "human language translation library for XML documents, supporting DeepL"
13
+ spec.homepage = "https://gitlab.com/lulalala/natsukantou"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 2.7.0"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = spec.homepage
19
+ spec.metadata["changelog_uri"] = 'https://gitlab.com/lulalala/natsukantou/-/releases'
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(__dir__) do
24
+ `git ls-files -z`.split("\x0").reject do |f|
25
+ (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
26
+ end
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ spec.add_dependency "middleware", "~> 0.1.0"
33
+ spec.add_dependency "oga", "~> 3.4"
34
+ spec.add_dependency "tsv", "~> 1.0"
35
+
36
+ spec.add_dependency "tty-prompt", "~> 0.23"
37
+ spec.add_dependency "yard", "~> 0.9"
38
+
39
+ # DeepL
40
+ spec.add_dependency "deepl-rb", "~> 2.5"
41
+
42
+ # Minhon
43
+ spec.add_dependency 'oauth', '~> 0.5.1'
44
+
45
+ # For more information and examples about making a new gem, check out our
46
+ # guide at: https://bundler.io/guides/creating_gem.html
47
+ end
@@ -0,0 +1,4 @@
1
+ module Natsukantou
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: natsukantou
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - lulalala
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: middleware
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.1.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: oga
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: tsv
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: tty-prompt
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.23'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.23'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.9'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: deepl-rb
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.5'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.5'
97
+ - !ruby/object:Gem::Dependency
98
+ name: oauth
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.5.1
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.5.1
111
+ description: human language translation library for XML documents, supporting DeepL
112
+ email:
113
+ - mark@goodlife.tw
114
+ executables:
115
+ - natsukantou
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".rspec"
120
+ - ".rubocop.yml"
121
+ - CHANGELOG.md
122
+ - Gemfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - exe/natsukantou
127
+ - lib/natsukantou.rb
128
+ - lib/natsukantou/deep_l.rb
129
+ - lib/natsukantou/handle_ruby_markup.rb
130
+ - lib/natsukantou/minhon.rb
131
+ - lib/natsukantou/setup/config_load_or_prompt.rb
132
+ - lib/natsukantou/setup/config_prompt.rb
133
+ - lib/natsukantou/setup/output.erb
134
+ - lib/natsukantou/setup/registry.rb
135
+ - lib/natsukantou/setup/write_prompt.rb
136
+ - lib/natsukantou/substitude_glossary.rb
137
+ - lib/natsukantou/utility/env.rb
138
+ - lib/natsukantou/utility/language_code.rb
139
+ - lib/natsukantou/utility/logger.rb
140
+ - lib/natsukantou/utility/parse_xml.rb
141
+ - lib/natsukantou/utility/utility_base.rb
142
+ - lib/natsukantou/version.rb
143
+ - natsukantou.gemspec
144
+ - sig/natsukantou.rbs
145
+ homepage: https://gitlab.com/lulalala/natsukantou
146
+ licenses:
147
+ - MIT
148
+ metadata:
149
+ homepage_uri: https://gitlab.com/lulalala/natsukantou
150
+ source_code_uri: https://gitlab.com/lulalala/natsukantou
151
+ changelog_uri: https://gitlab.com/lulalala/natsukantou/-/releases
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: 2.7.0
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubygems_version: 3.1.6
168
+ signing_key:
169
+ specification_version: 4
170
+ summary: human language translation library for XML documents
171
+ test_files: []