natsukantou 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a75166acdb08099edebb39e02f78ca3bf04af192e3233f71067f1178950e964b
4
+ data.tar.gz: f5af8a9a4df6a4c82e61b7bef67ec6beb3bcac6b49db2dbe7ae46ce8385084ef
5
+ SHA512:
6
+ metadata.gz: 3079ead2000c24c0c6e06156e6e775e56e233c11fe06a851e655d2b2197bb133b855ae901d8749520f9661ff37e89d759e318c2ad1e9516097fd7c14c0a6d125
7
+ data.tar.gz: 6612f175de419c560af5e221bbf3f9e074692a5fee2f3907dd800bddc7404db932201ffe69e191fadc87169ae8a6ce7df93700c48510dcbbfc96fc2bdbc28b1f
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,86 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/HashSyntax:
9
+ Enabled: false
10
+
11
+ Style/EmptyElse:
12
+ Enabled: false
13
+
14
+ Style/BlockDelimiters:
15
+ Enabled: false
16
+
17
+ Style/GuardClause:
18
+ Enabled: false
19
+
20
+ Style/SymbolArray:
21
+ Enabled: false
22
+
23
+ Style/TrailingCommaInArguments:
24
+ Enabled: false
25
+
26
+ Style/TrailingCommaInHashLiteral:
27
+ Enabled: false
28
+
29
+ Style/NegatedIf:
30
+ Enabled: false
31
+
32
+ Style/UnlessLogicalOperators:
33
+ Enabled: true
34
+ EnforcedStyle: forbid_logical_operators
35
+
36
+ Style/RescueStandardError:
37
+ Enabled: false
38
+
39
+ Style/StringLiteralsInInterpolation:
40
+ Enabled: true
41
+ EnforcedStyle: double_quotes
42
+
43
+ Style/PercentLiteralDelimiters:
44
+ Enabled: false
45
+
46
+ Style/WordArray:
47
+ Enabled: false
48
+
49
+ Style/AccessorGrouping:
50
+ Enabled: false
51
+
52
+ Style/IfUnlessModifier:
53
+ Enabled: false
54
+
55
+ Style/Documentation:
56
+ Enabled: false
57
+
58
+ Layout/ArgumentAlignment:
59
+ EnforcedStyle: with_fixed_indentation
60
+
61
+ Layout/LineLength:
62
+ Max: 120
63
+
64
+ Layout/MultilineMethodCallIndentation:
65
+ EnforcedStyle: indented
66
+
67
+ Lint/UnusedBlockArgument:
68
+ Enabled: false
69
+
70
+ Metrics/AbcSize:
71
+ Enabled: false
72
+
73
+ Metrics/BlockLength:
74
+ Enabled: false
75
+
76
+ Metrics/MethodLength:
77
+ Enabled: false
78
+
79
+ Metrics/ParameterLists:
80
+ Enabled: false
81
+
82
+ Metrics/PerceivedComplexity:
83
+ Enabled: false
84
+
85
+ Metrics/CyclomaticComplexity:
86
+ Enabled: false
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2022-07-05
4
+
5
+ - Initial release
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in natsukantou.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+
10
+ gem "rspec", "~> 3.0"
11
+
12
+ gem "rubocop", "~> 1.21"
13
+
14
+ gem 'vcr', '~> 6.1'
15
+ gem 'webmock', '~> 3.17'
16
+
17
+ gem 'rib'
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2022 lulalala
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # Natsukantou
2
+
3
+ Natsukantou is a human language translation library for XML documents.
4
+
5
+ Its strength is to allow users to mix & match different middleware filters and translation services (e.g. DeepL).
6
+
7
+ [Natsukantou](https://commons.wikimedia.org/wiki/File:NatsuKanTou_Oimatsu.jpg) (夏柑糖) is a Japanese sweet. It is made by taking out the pulp of an orange, "translating" it into a jelly, and then reinserting it back to the hollow peel.
8
+
9
+ ## Installation
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ $ bundle add natsukantou
14
+
15
+ If bundler is not being used to manage dependencies, install the gem by executing:
16
+
17
+ $ gem install natsukantou
18
+
19
+ ## Usage
20
+
21
+ If you want to just use it as a executable, run `natsukantou [XML_FILE]`.
22
+
23
+ It's a wizard that guides you through setting up a translator configuration.
24
+
25
+ Then it will translate the XML document.
26
+
27
+ If you choose to save the config (`translator_config.rb`), next time you can reuse it by calling `natsukantou -c [CONFIG_FILE] [XML_FILE]`.
28
+
29
+ ## Feature
30
+
31
+ ### Translators
32
+
33
+ * [DeepL API](https://www.deepl.com/pro-api)
34
+ * [みんなの自動翻訳@TexTra](https://mt-auto-minhon-mlt.ucri.jgn-x.jp/)
35
+
36
+ ### Middlewares
37
+
38
+ #### SubstitudeGlossary
39
+
40
+ An alternative when glossary isn't supported by the translator. This middleware would substitude glossaries and wrap replaced terms with `<skip>` tag to mark it as being translated (which is supported by DeepL). It supports glossary file in TSV format.
41
+
42
+ #### HandleRubyMarkup
43
+
44
+ [Ruby markup](https://www.w3.org/International/articles/ruby/markup.en.html) is not about Ruby the programming language, but the HTML feature: annotations in Japanese and Chinese content that are rendered alongside base text.
45
+
46
+ A phrase in ruby markup are often segmented by characters, making them less translatable. This middleware flattens ruby markup to just the base text to avoid such issue.
47
+
48
+ ## Development
49
+
50
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
51
+
52
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
53
+
54
+ ### Middleware
55
+
56
+ Natsukantou aims to be flexible by utilizing the [middleware pattern](https://github.com/mitchellh/middleware). Essentially it allows the user to cherry-pick components (translator or middleware) when needed, to improve translation.
57
+
58
+ To develop your own component, create a class which responds to `call` with an `env` variable.
59
+
60
+ * `call` needs to trigger `@app.call(env)` to continue to the next component.
61
+ * `env` is a `Natsukantou::Env`, which is a special Hash. It provides convenience methods `dom`, `lang_from` and `lang_to`. You are also free to add key/values into it.
62
+ * `lang_from` and `lang_to` are `Natsukantou::LanguageCode`, representing ISO 639-1 language codes. Its `is?` method allows the following comparison:
63
+ `Natsukantou::LanguageCode.new("en-gb").is?("en") #=> true`
64
+ * The wizard to be made aware of a new component by using `autoload_and_register`:
65
+ * To infer config prompt, `initialize` needs to accept keyword arguments for all except the first parameter (`app`), and documented with Yard.
66
+
67
+ ## Contributing
68
+
69
+ Bug reports and pull requests are welcome on GitLab at https://gitlab.com/lulalala/natsukantou.
70
+
71
+ ## License
72
+
73
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
data/exe/natsukantou ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "natsukantou"
6
+ require 'tty-prompt'
7
+ require 'optparse'
8
+ require 'natsukantou/setup/config_load_or_prompt'
9
+ require 'natsukantou/setup/write_prompt'
10
+
11
+ options = {}
12
+
13
+ optparse = OptionParser.new do |opts|
14
+ opts.banner = "Usage: natsukantou [options] XML_FILE"
15
+
16
+ opts.on("-c", "--config CONFIG_FILE",
17
+ "Config file generated after the first time natsukantou is run") do |v|
18
+ options[:config_path] = v
19
+ end
20
+ end
21
+ optparse.parse!
22
+
23
+ if ARGV.empty?
24
+ puts optparse
25
+ exit(-1)
26
+ end
27
+
28
+ begin
29
+ xml_path = ARGV[0]
30
+ dom = Oga.parse_xml(File.read(xml_path))
31
+ rescue Errno::ENOENT
32
+ warn 'XML not found'
33
+ return
34
+ end
35
+
36
+ prompt = TTY::Prompt.new
37
+
38
+ Natsukantou::Setup::ConfigLoadOrPrompt.new.execute(
39
+ prompt: prompt,
40
+ config_path: options[:config_path],
41
+ )
42
+
43
+ lang_from = prompt.ask("Language code to translate from?")
44
+ lang_to = prompt.ask("Language code to translate to?")
45
+
46
+ env = Natsukantou::Env.new(
47
+ dom: dom, lang_from: lang_from, lang_to: lang_to
48
+ )
49
+
50
+ NatsukantouTranslator.call(env)
51
+
52
+ Natsukantou::Setup::WritePrompt.new.execute(
53
+ prompt: prompt,
54
+ content: env[:dom].to_xml,
55
+ origin_path: xml_path,
56
+ lang_to: env.lang_to.code,
57
+ )
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deepl'
4
+
5
+ # Machine translator
6
+ #
7
+ # DeepL
8
+ # https://www.deepl.com
9
+ module Natsukantou
10
+ class DeepL
11
+ include UtilityBase
12
+
13
+ # @param app [Hash]
14
+ # @param auth_key [String] authentication key
15
+ # @param host [String] url of API endpoint
16
+ # @param version [String] API version, see https://www.deepl.com/docs-api/accessing-the-api/api-versions/
17
+ # @param request_params [Hash] other optional request parameters, see https://www.deepl.com/docs-api/translating-text/request/
18
+ def initialize(
19
+ app,
20
+ auth_key:,
21
+ host: "https://api-free.deepl.com",
22
+ version: "v2",
23
+ request_params: {}
24
+ )
25
+ @app = app
26
+ @request_params = {
27
+ tag_handling: 'xml',
28
+ ignore_tags: 'skip',
29
+ }.merge(request_params)
30
+
31
+ ::DeepL.configure do |config|
32
+ config.auth_key = auth_key
33
+ config.host = host
34
+ config.version = version
35
+ end
36
+ end
37
+
38
+ attr_reader :request_params
39
+
40
+ def call(env)
41
+ result = ::DeepL.translate(
42
+ env.dom.to_xml,
43
+ env.lang_from.code.upcase,
44
+ env.lang_to.code.upcase,
45
+ request_params,
46
+ ).to_s
47
+
48
+ env[:dom] = dom(result)
49
+
50
+ @app.call(env)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Ruby markup is not about Ruby the programming language,
4
+ # but the HTML feature: annotations in Japanese and Chinese content
5
+ # that are rendered alongside base text.
6
+ # https://www.w3.org/International/articles/ruby/markup.en.html
7
+ #
8
+ # A phrase in ruby markup are often segmented by characters,
9
+ # making them less translatable.
10
+ # This flattens ruby markup to just the base text to avoid such issue.
11
+ module Natsukantou
12
+ class HandleRubyMarkup
13
+ include UtilityBase
14
+
15
+ def initialize(app)
16
+ @app = app
17
+ end
18
+
19
+ def call(env)
20
+ env[:dom].css('ruby').each(&method(:process_node))
21
+
22
+ @app.call(env)
23
+ end
24
+
25
+ private
26
+
27
+ def process_node(node)
28
+ node.css('rt').each(&:remove)
29
+ node.inner_text = node.text.gsub(' ', '')
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'oauth'
4
+ require 'json'
5
+
6
+ # Machine translator
7
+ #
8
+ # みんなの自動翻訳@TexTra (Min'na no Jidou Hon'yaku @ TexTra)
9
+ # https://mt-auto-minhon-mlt.ucri.jgn-x.jp
10
+ module Natsukantou
11
+ class Minhon
12
+ include UtilityBase
13
+
14
+ # @param app [Hash]
15
+ # @param name [String] login ID
16
+ # @param key [String] API Key
17
+ # @param secret [String] authentication secret
18
+ # @param url [String] request URL of translator API endpoint
19
+ # @param request_params [Hash] other optional request parameters
20
+ #
21
+ # @param translate_by_section [String] CSS selector to translate matched elements one by one.
22
+ # Minhon often returns invalid XML if xml is large.
23
+ # To bypass this limitation, one can specify a css path (e.g. "body>p") to translate in smaller chunks.
24
+ def initialize(
25
+ app, name:, key:, secret:, url:, request_params: {},
26
+ translate_by_section: nil
27
+ )
28
+ @app = app
29
+ @key = key
30
+ @secret = secret
31
+ @url = url
32
+ @request_params = request_params.merge(name: name, key: key)
33
+
34
+ ### Non request related setting
35
+ @translate_by_section = translate_by_section
36
+ end
37
+
38
+ attr_reader :request_params, :env
39
+ attr_reader :key, :secret, :url
40
+ attr_reader :translate_by_section
41
+
42
+ def call(env)
43
+ @env = env
44
+
45
+ if translate_by_section
46
+ env[:dom].css(translate_by_section).each do |node|
47
+ next if node.text.empty?
48
+
49
+ translated_xml = translate(node.to_xml)
50
+ node.replace(dom_node(translated_xml))
51
+ end
52
+ else
53
+ translated_xml = translate(env[:dom].to_xml)
54
+ env[:dom] = dom(translated_xml)
55
+ end
56
+
57
+ @app.call(env)
58
+ end
59
+
60
+ private
61
+
62
+ def translate(text)
63
+ response = client.post(url, text: text, **request_params)
64
+
65
+ body = JSON.parse(response.body)
66
+
67
+ translated_xml = body.dig('resultset', 'result', 'text')
68
+
69
+ temp_fix(env, translated_xml)
70
+
71
+ translated_xml
72
+ rescue Net::OpenTimeout
73
+ sleep 10
74
+ retry
75
+ end
76
+
77
+ def client
78
+ @client ||= OAuth::AccessToken.new(OAuth::Consumer.new(key, secret))
79
+ end
80
+
81
+ def temp_fix(env, text)
82
+ text.tr!('麵', '面') if env[:lang_to].is?('zh-TW')
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ module Setup
5
+ class ConfigLoadOrPrompt
6
+ def execute(prompt:, config_path:)
7
+ begin
8
+ load(config_path) if config_path
9
+ rescue LoadError # rubocop:disable Lint/SuppressedException
10
+ end
11
+
12
+ if !defined?(NatsukantouTranslator)
13
+ require_relative 'config_prompt'
14
+ config_content = ConfigPrompt.new.execute
15
+
16
+ if prompt.yes?("\nConfig complete. Do you want to save it for later reuse?")
17
+ File.write('translator_config.rb', config_content)
18
+ puts "Saved as translator_config.rb, which you can specify with -c flag next time.\n\n"
19
+ end
20
+
21
+ Kernel.eval(config_content)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tty-prompt'
4
+ require 'yard'
5
+ require 'erb'
6
+ require 'json'
7
+ require 'delegate'
8
+
9
+ YARD::Tags::Library.define_tag(
10
+ 'Method parameters which should be entered as Ruby expression',
11
+ :fields_to_input_ruby
12
+ )
13
+
14
+ module Natsukantou
15
+ module Setup
16
+ class Component
17
+ attr_reader :name, :klass,
18
+ :initialize_method, :initialize_method_param_tags
19
+
20
+ def initialize(name, path)
21
+ @name = name
22
+
23
+ YARD.parse(path)
24
+
25
+ @klass = YARD::Registry.all.find do |item|
26
+ item.type == :class && item.name == name
27
+ end
28
+
29
+ @initialize_method = klass.meths.find do |item|
30
+ item.type == :method && item.name == :initialize
31
+ end
32
+
33
+ @initialize_method_param_tags = @initialize_method.tags('param')
34
+ .reject { |tag| tag.name == 'app' }
35
+ .map { |tag| MethodParamTagDecorator.new(tag) }
36
+
37
+ YARD::Registry.clear
38
+ end
39
+ end
40
+
41
+ # Both a decorator to Yard tag object,
42
+ # and where the user input is stored.
43
+ # Assumption: only dealing with keyword argument
44
+ class MethodParamTagDecorator < SimpleDelegator
45
+ attr_accessor :user_input
46
+
47
+ def initialize_method
48
+ object
49
+ end
50
+
51
+ def enter_ruby?
52
+ return true if type == 'Hash'
53
+
54
+ list = initialize_method.tag('fields_to_input_ruby')
55
+
56
+ return false if list.nil?
57
+
58
+ list.text.split(' ').include?(name)
59
+ end
60
+
61
+ def convert
62
+ case type
63
+ when Integer
64
+ :integer
65
+ when Float
66
+ :float
67
+ else
68
+ nil # do not convert and assume Ruby code
69
+ end
70
+ end
71
+
72
+ def default_value_in_string
73
+ initialize_method.parameters.find { |p| p.first == "#{name}:" }&.last
74
+ end
75
+
76
+ # tty-prompt's `value` field requires String
77
+ def default_value_in_tty
78
+ if type == 'String'
79
+ JSON.parse(default_value_in_string)
80
+ else
81
+ default_value_in_string
82
+ end
83
+ end
84
+
85
+ def user_input_in_erb
86
+ if enter_ruby?
87
+ user_input
88
+ else
89
+ user_input.inspect
90
+ end
91
+ end
92
+ end
93
+
94
+ class ConfigPrompt
95
+ def initialize
96
+ @prompt = TTY::Prompt.new
97
+
98
+ registry = Natsukantou::Setup::Registry.instance
99
+
100
+ @translators = registry.translators.map do |args|
101
+ Natsukantou::Setup::Component.new(*args)
102
+ end
103
+
104
+ @filters = registry.middlewares.map do |args|
105
+ Natsukantou::Setup::Component.new(*args)
106
+ end
107
+
108
+ @config = {}
109
+ end
110
+
111
+ attr_reader :prompt, :translators, :filters, :config
112
+ attr_reader :know_ruby
113
+
114
+ def execute
115
+ ask
116
+ output
117
+ end
118
+
119
+ def ask
120
+ @know_ruby = prompt.yes?('Do you know Ruby or JSON syntax?')
121
+
122
+ translator = prompt.select(
123
+ "Choose translation engine:",
124
+ translators.map { |t| [t.name, t] }.to_h
125
+ )
126
+
127
+ config[:translator] = {
128
+ component: translator,
129
+ parameters: ask_initialize_parameters(translator)
130
+ }
131
+
132
+ config[:filters] = []
133
+
134
+ selected_filters = prompt.multi_select(
135
+ "Select one or many filters (using spacebar):",
136
+ filters.map { |t| [t.name, t] }.to_h
137
+ )
138
+
139
+ selected_filters.each do |filter|
140
+ config[:filters] << {
141
+ component: filter,
142
+ parameters: ask_initialize_parameters(filter)
143
+ }
144
+ end
145
+ end
146
+
147
+ def output
148
+ template = ERB.new(
149
+ File.read((File.expand_path 'output.erb', __dir__)),
150
+ trim_mode: "<>-"
151
+ )
152
+ template.result(binding)
153
+ end
154
+
155
+ private
156
+
157
+ def ask_initialize_parameters(component)
158
+ puts "\n## Parameters for #{component.name} ---\n"
159
+
160
+ answers = []
161
+
162
+ component.initialize_method_param_tags.each do |tag|
163
+ next if tag.enter_ruby? && !know_ruby
164
+
165
+ ask_params = {}
166
+
167
+ printf "Enter '#{tag.name}', #{tag.text}"
168
+
169
+ ask_params[:convert] = tag.convert if tag.convert
170
+
171
+ default = tag.default_value_in_string
172
+ if default == 'nil'
173
+ printf " (optional)"
174
+ elsif default
175
+ printf " (with default)"
176
+ ask_params[:value] = tag.default_value_in_tty
177
+ else
178
+ ask_params[:required] = true
179
+ printf " (required)"
180
+ end
181
+
182
+ if tag.enter_ruby?
183
+ printf("\nEnter Ruby expression (#{tag.type}), or press Enter to skip")
184
+ end
185
+
186
+ printf ":\n"
187
+
188
+ result = prompt.ask(**ask_params)
189
+
190
+ next if result.nil?
191
+
192
+ # Check if provided default value was changed, and if not,
193
+ # the param can be ignored by relying on method declaration's default value.
194
+ next if ask_params.key?(:value) && result == tag.default_value_in_tty
195
+
196
+ tag.user_input = result
197
+ answers << tag
198
+ end
199
+
200
+ answers
201
+ end
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,10 @@
1
+ ::NatsukantouTranslator = Middleware::Builder.new do
2
+ <% config[:filters].each do |component| -%>
3
+ use Natsukantou::<%= component[:component].name -%>
4
+ <% component[:parameters].each do |tag| -%>, <%= tag.name %>: <%= tag.user_input_in_erb -%><% end %>
5
+ <% end %>
6
+ <% config[:translator].tap do |component| -%>
7
+ use Natsukantou::<%= component[:component].name -%>
8
+ <% component[:parameters].each do |tag| -%>, <%= tag.name %>: <%= tag.user_input_in_erb -%><% end %>
9
+ <% end -%>
10
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'singleton'
4
+
5
+ module Natsukantou
6
+ module Setup
7
+ class Registry
8
+ include Singleton
9
+
10
+ attr_reader :translators
11
+ attr_reader :middlewares
12
+
13
+ def initialize
14
+ @translators = []
15
+ @middlewares = []
16
+ end
17
+
18
+ def register(type, klass, path)
19
+ case type
20
+ when :translator
21
+ translators << [klass, path]
22
+ when :middleware
23
+ middlewares << [klass, path]
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/ext/string'
4
+
5
+ module Natsukantou
6
+ module Setup
7
+ class WritePrompt
8
+ def execute(prompt:, content:, origin_path:, lang_to:)
9
+ result_path = prompt.ask(
10
+ 'Where do you want to save the result?',
11
+ value: origin_path.pathmap("%X.#{lang_to}%x")
12
+ )
13
+
14
+ File.write(result_path, content)
15
+ rescue Errno::EISDIR
16
+ warn 'Specified path is a directory.'
17
+ retry
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tsv'
4
+
5
+ # Glossary isn't supported in some language
6
+ # combinations (e.g. DeepL ja to zh).
7
+ # We can however substitude terms prior to translation
8
+ # and mark it as translated.
9
+ #
10
+ # Support TSV format
11
+ module Natsukantou
12
+ class SubstitudeGlossary
13
+ include UtilityBase
14
+
15
+ LANGUAGES_WITHOUT_WORD_DIVIDERS = %w{
16
+ ja zh th lo
17
+ }.freeze
18
+
19
+ # @param filepath [String] path to TSV glossary file.
20
+ # @param glossary [Array(Array(String, String))] array representing glossary, e.g. [['book', '本']]
21
+ # @fields_to_input_ruby glossary
22
+ def initialize(app, filepath: nil, glossary: [])
23
+ @app = app
24
+
25
+ @glossary = glossary
26
+ @glossary.concat(parse_tsv(filepath)) if filepath
27
+
28
+ @glossary.uniq! { |row| row[0] }
29
+
30
+ # Longer term has higher priority
31
+ @glossary.sort_by! { |row| -row[0].length }
32
+ end
33
+
34
+ attr_reader :glossary, :regex, :replacement_mapping
35
+
36
+ def call(env)
37
+ prepare_regular_expression(env)
38
+
39
+ env[:dom].each_node(&method(:process_node))
40
+
41
+ @app.call(env)
42
+ end
43
+
44
+ def prepare_regular_expression(env)
45
+ source_terms = glossary.map(&:first)
46
+ source_terms.map! { |w| /\b#{w}\b/ } if with_space_divider?(env)
47
+ @regex = Regexp.union(*source_terms)
48
+
49
+ @replacement_mapping = glossary.to_h
50
+ @replacement_mapping.transform_values! { |v| "<skip>#{v}</skip>" }
51
+ end
52
+
53
+ private
54
+
55
+ def parse_tsv(path)
56
+ TSV.parse_file(path).without_header.map do |row|
57
+ [row[0], row[1]]
58
+ end
59
+ end
60
+
61
+ def process_node(node)
62
+ return if !node.is_a?(Oga::XML::Text)
63
+
64
+ new_text = node.text.gsub(regex, replacement_mapping)
65
+
66
+ return if new_text == node.text
67
+
68
+ dom(new_text).children.reverse_each do |new_node|
69
+ node.after(new_node)
70
+ end
71
+ node.remove
72
+ end
73
+
74
+ def with_space_divider?(env)
75
+ LANGUAGES_WITHOUT_WORD_DIVIDERS.none? do |lang|
76
+ env[:lang_from].is?(lang)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+
5
+ # Environment of the middleware,
6
+ # which is just a specialized hash with convenience methods,
7
+ # and converts language settings for ease of comparison.
8
+ module Natsukantou
9
+ class Env < SimpleDelegator
10
+ # @param [Hash] env
11
+ # @option env [Oga::XML::Document] dom
12
+ # @option lang_from [String] Language code in ISO 639-1 to be translated from.
13
+ # @option lang_to [String] Language code in ISO 639-1 to be translated to.
14
+ def initialize(env)
15
+ # Convert languages
16
+ [:lang_from, :lang_to].each do |key|
17
+ env[key] = LanguageCode.new(env.fetch(key))
18
+ end
19
+
20
+ # Required keys
21
+ [:dom].each do |key|
22
+ env.fetch(key)
23
+ end
24
+
25
+ super(env)
26
+ end
27
+
28
+ def dom
29
+ self[:dom]
30
+ end
31
+
32
+ def lang_from
33
+ self[:lang_from]
34
+ end
35
+
36
+ def lang_to
37
+ self[:lang_to]
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ class LanguageCode
5
+ def initialize(code)
6
+ @code = code.downcase
7
+ @main, @sub = @code.split('-')
8
+
9
+ if @main.length != 2
10
+ raise 'Language code shoul be in the form of xx or xx-yy'
11
+ end
12
+ end
13
+
14
+ attr_reader :code, :main, :sub
15
+
16
+ def is?(other_code)
17
+ other = self.class.new(other_code)
18
+
19
+ return false if other.main != main
20
+
21
+ return true if other.sub.nil?
22
+
23
+ sub ? other.sub == sub : false
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ module Logger
5
+ def logger
6
+ ::Natsukantou.logger
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ module ParseXml
5
+ # @param xml [String]
6
+ # @return [Oga::XML::Document]
7
+ def dom(xml)
8
+ Oga.parse_xml(xml, strict: true)
9
+ rescue => e
10
+ logger.error(e.message)
11
+
12
+ Oga.parse_xml(xml)
13
+ end
14
+
15
+ # @param xml [String]
16
+ # @return [Oga::XML::Node]
17
+ def dom_node(xml)
18
+ dom = dom(xml)
19
+
20
+ if dom.children.size != 1
21
+ logger.error("Parsed XML contains more than one root element:")
22
+ logger.error(xml)
23
+ end
24
+
25
+ dom.children.first
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'parse_xml'
4
+ require_relative 'logger'
5
+
6
+ module Natsukantou
7
+ module UtilityBase
8
+ include ParseXml
9
+ include Logger
10
+ end
11
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Natsukantou
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'middleware'
5
+ require 'oga'
6
+
7
+ require_relative "natsukantou/version"
8
+ require_relative "natsukantou/setup/registry"
9
+ require_relative "natsukantou/utility/language_code"
10
+ require_relative "natsukantou/utility/env"
11
+
12
+ module Natsukantou
13
+ class Error < StandardError; end
14
+
15
+ # While autoloading in Ruby, also keep it in the local registry,
16
+ # which can be used in setup prompt.
17
+ def self.autoload_and_register(type, constant, path)
18
+ Setup::Registry.instance.register(
19
+ type,
20
+ constant,
21
+ (File.expand_path "../#{path}.rb", __FILE__)
22
+ )
23
+
24
+ autoload(constant, path)
25
+ end
26
+
27
+ # Misc
28
+ autoload :UtilityBase, "natsukantou/utility/utility_base"
29
+
30
+ # Translators
31
+ autoload_and_register :translator, :DeepL, "natsukantou/deep_l"
32
+ autoload_and_register :translator, :Minhon, "natsukantou/minhon"
33
+
34
+ # Middlewares
35
+ autoload_and_register :middleware, :SubstitudeGlossary, "natsukantou/substitude_glossary"
36
+ autoload_and_register :middleware, :HandleRubyMarkup, "natsukantou/handle_ruby_markup"
37
+
38
+ def self.logger
39
+ @logger ||= ::Logger.new($stdout)
40
+ end
41
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/natsukantou/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "natsukantou"
7
+ spec.version = Natsukantou::VERSION
8
+ spec.authors = ["lulalala"]
9
+ spec.email = ["mark@goodlife.tw"]
10
+
11
+ spec.summary = "human language translation library for XML documents"
12
+ spec.description = "human language translation library for XML documents, supporting DeepL"
13
+ spec.homepage = "https://gitlab.com/lulalala/natsukantou"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 2.7.0"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = spec.homepage
19
+ spec.metadata["changelog_uri"] = 'https://gitlab.com/lulalala/natsukantou/-/releases'
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(__dir__) do
24
+ `git ls-files -z`.split("\x0").reject do |f|
25
+ (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
26
+ end
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+
32
+ spec.add_dependency "middleware", "~> 0.1.0"
33
+ spec.add_dependency "oga", "~> 3.4"
34
+ spec.add_dependency "tsv", "~> 1.0"
35
+
36
+ spec.add_dependency "tty-prompt", "~> 0.23"
37
+ spec.add_dependency "yard", "~> 0.9"
38
+
39
+ # DeepL
40
+ spec.add_dependency "deepl-rb", "~> 2.5"
41
+
42
+ # Minhon
43
+ spec.add_dependency 'oauth', '~> 0.5.1'
44
+
45
+ # For more information and examples about making a new gem, check out our
46
+ # guide at: https://bundler.io/guides/creating_gem.html
47
+ end
@@ -0,0 +1,4 @@
1
+ module Natsukantou
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: natsukantou
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - lulalala
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2022-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: middleware
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.1.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: oga
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: tsv
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: tty-prompt
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.23'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.23'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.9'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: deepl-rb
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.5'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.5'
97
+ - !ruby/object:Gem::Dependency
98
+ name: oauth
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.5.1
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.5.1
111
+ description: human language translation library for XML documents, supporting DeepL
112
+ email:
113
+ - mark@goodlife.tw
114
+ executables:
115
+ - natsukantou
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".rspec"
120
+ - ".rubocop.yml"
121
+ - CHANGELOG.md
122
+ - Gemfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - exe/natsukantou
127
+ - lib/natsukantou.rb
128
+ - lib/natsukantou/deep_l.rb
129
+ - lib/natsukantou/handle_ruby_markup.rb
130
+ - lib/natsukantou/minhon.rb
131
+ - lib/natsukantou/setup/config_load_or_prompt.rb
132
+ - lib/natsukantou/setup/config_prompt.rb
133
+ - lib/natsukantou/setup/output.erb
134
+ - lib/natsukantou/setup/registry.rb
135
+ - lib/natsukantou/setup/write_prompt.rb
136
+ - lib/natsukantou/substitude_glossary.rb
137
+ - lib/natsukantou/utility/env.rb
138
+ - lib/natsukantou/utility/language_code.rb
139
+ - lib/natsukantou/utility/logger.rb
140
+ - lib/natsukantou/utility/parse_xml.rb
141
+ - lib/natsukantou/utility/utility_base.rb
142
+ - lib/natsukantou/version.rb
143
+ - natsukantou.gemspec
144
+ - sig/natsukantou.rbs
145
+ homepage: https://gitlab.com/lulalala/natsukantou
146
+ licenses:
147
+ - MIT
148
+ metadata:
149
+ homepage_uri: https://gitlab.com/lulalala/natsukantou
150
+ source_code_uri: https://gitlab.com/lulalala/natsukantou
151
+ changelog_uri: https://gitlab.com/lulalala/natsukantou/-/releases
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: 2.7.0
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubygems_version: 3.1.6
168
+ signing_key:
169
+ specification_version: 4
170
+ summary: human language translation library for XML documents
171
+ test_files: []