konjak 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 620fb76e02f4a35a2e085fb25e50d2cf5924d4a3
4
- data.tar.gz: 13a5d704eceda0ab92f7bc73a7af9f42024f5fcb
3
+ metadata.gz: 3d1f7b71cef0bca6b16b61cccb6dae7fe49c28fc
4
+ data.tar.gz: 58e133fc23d9789b8075e4b1d0b497259193d436
5
5
  SHA512:
6
- metadata.gz: f792f56294bcd16a38a8f71ad876d3fef54d5f1b00b53a93eb21f6456a7abce9229c541b8e16d81296d2bf4c580861fccc9d17c39421b418864c13aab50f90ce
7
- data.tar.gz: 45cbd09c3b26c03d9f8236a9ea31bfc0d3116d1a54a124d7dbb7c82ab5ababfcae58e3b4556c17d884b13c315ac2e3e23bbbef070dbfadc921edc715fcf614b3
6
+ metadata.gz: 961ec5b4b2635480938016110b07b654b112353fe8c84dd3260bb812d2e06c9163c69dd957686b65ba619f3b2b634420850a203e8be41a070bd441fe55c4b884
7
+ data.tar.gz: 0d9385eda3e902e2519f5b34d7d0a9af8397859b70d8d2a687c9b75536a8904cbea68d0a7aaa10b8528baeb9b7655902259ea6d6e94a6da5c990937df2903945
data/.travis.yml CHANGED
@@ -1,3 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.2
3
+ - 2.2
4
+ env:
5
+ global:
6
+ secure: V8yG0G03Dsabc5mK4ubx3wSd0FyW0tazC/4DsJrvQTXSNytab1tXpwQbaN1XUyWTGnOj58mSZAPk/36n/5sGEwq3HIYCvPbJrqrCRODTDzmYy5uA1N+/Mo2uA55lPBjCbfcEvO1oy9tQIcH3I3R29nqiTsIRXax8XoFXHlJdP7I=
data/Gemfile CHANGED
@@ -2,3 +2,7 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in konjak.gemspec
4
4
  gemspec
5
+
6
+ if ENV['CODECLIMATE_REPO_TOKEN']
7
+ gem "codeclimate-test-reporter", group: :test, require: nil
8
+ end
data/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Konjak
2
2
 
3
+ [![Build Status](https://travis-ci.org/hanachin/konjak.svg)](https://travis-ci.org/hanachin/konjak)
4
+ [![Code Climate](https://codeclimate.com/github/hanachin/konjak/badges/gpa.svg)](https://codeclimate.com/github/hanachin/konjak)
5
+ [![Test Coverage](https://codeclimate.com/github/hanachin/konjak/badges/coverage.svg)](https://codeclimate.com/github/hanachin/konjak/coverage)
6
+
3
7
  TMX(Translation Memory eXchange) tools for ruby
4
8
 
5
9
  ## Installation
@@ -18,7 +22,7 @@ Or install it yourself as:
18
22
 
19
23
  ## Usage
20
24
 
21
- $ konjak translate file.tmx file.txt src target
25
+ $ konjak translate src target file.tmx file.txt
22
26
 
23
27
  ## Contributing
24
28
 
data/Rakefile CHANGED
@@ -1 +1,11 @@
1
1
  require "bundler/gem_tasks"
2
+
3
+ begin
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task :default => :spec
9
+ rescue LoadError
10
+ # no rspec available
11
+ end
data/bin/konjak CHANGED
@@ -13,4 +13,4 @@ _command, src, target, tmx_path, target_path = ARGV
13
13
 
14
14
  tmx = File.read(tmx_path)
15
15
  doc = File.read(target_path)
16
- puts Konjak.translate(doc, Konjak.parse(tmx, gtt: true), src, target)
16
+ puts Konjak.translate(doc, Konjak.parse(tmx, gtt: true), src, target, format: :gtt_html)
data/lib/konjak/text.rb CHANGED
@@ -1,15 +1,4 @@
1
1
  module Konjak
2
- class Text
3
- def initialize(text)
4
- @text = text
5
- end
6
-
7
- def to_s
8
- @text
9
- end
10
-
11
- def length
12
- @text.length
13
- end
2
+ class Text < String
14
3
  end
15
4
  end
@@ -30,6 +30,22 @@ module Konjak
30
30
  child_elements.count {|e| TranslationUnitVariant === e } >= 2
31
31
  end
32
32
 
33
+ def has_translation?(src_lang, target_lang)
34
+ src_lang?(src_lang) && has_variant_lang?(src_lang) && has_variant_lang?(target_lang)
35
+ end
36
+
37
+ def src_lang?(src_lang)
38
+ !self.src_lang || self.src_lang == '*all*' || self.src_lang == src_lang
39
+ end
40
+
41
+ def has_variant_lang?(lang)
42
+ variants.any? {|v| v.xml_lang == lang }
43
+ end
44
+
45
+ def variant(lang)
46
+ variants.detect {|v| v.xml_lang == lang }
47
+ end
48
+
33
49
  # FIXME
34
50
  # Zero, one or more <note>, or <prop> elements in any order, followed by
35
51
  # One or more <tuv> elements.
@@ -0,0 +1,47 @@
1
+ module Konjak
2
+ class Translator
3
+ module GttHtmlTranslate
4
+ refine(Text) do
5
+ def gtt_tag_ns
6
+ scan(/\{(\d+)\}/).flatten.uniq
7
+ end
8
+
9
+ def compile_gtt_html_pattern
10
+ regexp = Regexp.escape(self)
11
+ gtt_tag_ns.each do |n|
12
+ regexp = regexp.sub(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
13
+ regexp = regexp.gsub(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
14
+ regexp = regexp.gsub(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
15
+ end
16
+ Regexp.compile(regexp)
17
+ end
18
+
19
+ def interpolate_gtt_html_pattern(match_data)
20
+ new_text = dup
21
+ gtt_tag_ns.each do |n|
22
+ new_text = new_text.gsub("{#{n}}", match_data["n#{n}"])
23
+ new_text = new_text.gsub("{/#{n}}", "</#{match_data["_#{n}"]}>")
24
+ end
25
+ new_text
26
+ end
27
+ end
28
+
29
+ refine(TranslationUnit) do
30
+ def translate(src_lang, target_lang, text)
31
+ pattern = variant(src_lang).segment.text.compile_gtt_html_pattern
32
+ target_text = variant(target_lang).segment.text
33
+
34
+ texts = []
35
+ while true
36
+ head, match, tail = text.partition(pattern)
37
+ break if match.empty?
38
+ texts << head unless head.empty?
39
+ texts << TranslatedString.new(target_text.interpolate_gtt_html_pattern($~))
40
+ text = tail
41
+ end
42
+ texts << text
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,22 @@
1
+ module Konjak
2
+ class Translator
3
+ module TextTranslate
4
+ refine(TranslationUnit) do
5
+ def translate(src_lang, target_lang, text)
6
+ s = variant(src_lang).segment.text
7
+ t = variant(target_lang).segment.text
8
+
9
+ texts = []
10
+ while true
11
+ head, match, tail = text.partition(s)
12
+ break if match.empty?
13
+ texts << head unless head.empty?
14
+ texts << TranslatedString.new(t)
15
+ text = tail
16
+ end
17
+ texts << text
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,6 @@
1
+ module Konjak
2
+ class Translator
3
+ class TranslatedString < String
4
+ end
5
+ end
6
+ end
@@ -1,39 +1,34 @@
1
1
  require 'mem'
2
+ require 'konjak/translator/gtt_html_translate'
3
+ require 'konjak/translator/text_translate'
4
+ require 'konjak/translator/translated_string'
2
5
 
3
6
  module Konjak
4
7
  class Translator
8
+
5
9
  include Mem
6
10
 
7
- attr_reader :tmx, :src_lang, :target_lang
11
+ attr_reader :tmx, :src_lang, :target_lang, :options
8
12
 
9
- def initialize(tmx, src_lang, target_lang)
10
- @tmx = tmx
11
- @src_lang = src_lang
13
+ def initialize(tmx, src_lang, target_lang, **options)
14
+ @tmx = tmx
15
+ @src_lang = src_lang
12
16
  @target_lang = target_lang
17
+ @options = options
13
18
  end
14
19
 
15
20
  def translate(doc)
16
21
  translated_docs = [doc.dup]
17
22
  translation_units.each do |tu|
18
- s = tu.variants.detect { |v| v.xml_lang == src_lang }.segment.text.to_s
19
- t = tu.variants.detect { |v| v.xml_lang == target_lang }.segment.text.to_s
20
- translated_docs.map! { |d|
21
- next d if d.respond_to?(:translated)
22
- next d if !d.include?(s)
23
-
24
- ds = []
25
- tail = nil
26
- loop do
27
- head, match, tail = d.partition(s)
28
- ds << head
29
- ds << t.dup.tap {|t| def t.translated; true; end }
30
-
31
- break unless tail.include?(s)
32
-
33
- d = tail
34
- end
35
- ds << tail
36
- ds
23
+ translated_docs.map! { |text|
24
+ next text if text.is_a?(TranslatedString)
25
+
26
+ env = translate_env.dup
27
+ env.local_variable_set(:tu, tu)
28
+ env.local_variable_set(:src_lang, src_lang)
29
+ env.local_variable_set(:target_lang, target_lang)
30
+ env.local_variable_set(:text, text)
31
+ eval('tu.translate(src_lang, target_lang, text)', env)
37
32
  }.flatten!
38
33
  end
39
34
  translated_docs.join
@@ -41,13 +36,28 @@ module Konjak
41
36
 
42
37
  private
43
38
 
39
+ TRANSLATE_ENVS= {
40
+ text: Class.new { using TextTranslate; break binding },
41
+ gtt_html: Class.new { using GttHtmlTranslate; break binding }
42
+ }
43
+
44
+ def format
45
+ if TRANSLATE_ENVS.has_key?(options[:format])
46
+ options[:format]
47
+ else
48
+ :text
49
+ end
50
+ end
51
+
52
+ def translate_env
53
+ TRANSLATE_ENVS[format]
54
+ end
55
+
44
56
  def translation_units
45
57
  tmx.body.translation_units.select { |tu|
46
- (!tu.src_lang || tu.src_lang == src_lang || tu.src_lang == '*all*') &&
47
- tu.variants.any? {|v| v.xml_lang == src_lang } &&
48
- tu.variants.any? {|v| v.xml_lang == target_lang }
58
+ tu.has_translation?(src_lang, target_lang)
49
59
  }.sort_by {|tu|
50
- -tu.variants.detect { |v| v.xml_lang == src_lang }.segment.text.length
60
+ -tu.variant(src_lang).segment.text.length
51
61
  }
52
62
  end
53
63
  memoize :translation_units
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/konjak.rb CHANGED
@@ -36,13 +36,13 @@ require 'konjak/translator'
36
36
 
37
37
  module Konjak
38
38
  class << self
39
- def parse(xml, **kw)
40
- Parser.new.parse(xml, **kw)
39
+ def parse(xml, **options)
40
+ Parser.new.parse(xml, **options)
41
41
  end
42
42
 
43
- def translate(doc, xml_or_tmx, src_lang, target_lang)
43
+ def translate(doc, xml_or_tmx, src_lang, target_lang, **options)
44
44
  tmx = xml_or_tmx.kind_of?(Tmx) ? xml_or_tmx : parse(xml_or_tmx)
45
- Translator.new(tmx, src_lang, target_lang).translate(doc)
45
+ Translator.new(tmx, src_lang, target_lang, **options).translate(doc)
46
46
  end
47
47
  end
48
48
  end
@@ -162,7 +162,7 @@ describe Konjak do
162
162
  describe 'gtt' do
163
163
  let(:xml) { File.read('spec/fixtures/gtt.tmx') }
164
164
 
165
- subject { tmx.body.translation_units.detect {|tu| tu.variants.detect {|v| v.segment.text.to_s == "\n\n& it's also example." } } }
165
+ subject { tmx.body.translation_units.detect {|tu| tu.variants.detect {|v| v.segment.text == "\n\n& it's also example." } } }
166
166
 
167
167
  context 'gtt: true' do
168
168
  let(:tmx) { Konjak.parse(xml, gtt: true) }
@@ -24,4 +24,21 @@ this is données (avec un caractère non standard: ).
24
24
  EXPECT
25
25
  end
26
26
 
27
+ context 'when format is GTT' do
28
+ let(:gtt_tmx) { Konjak.parse(File.read('spec/fixtures/gtt.tmx')) }
29
+
30
+ let(:doc) { <<GTT_HTML }
31
+ This is <a href="http://example.com">example</a>.
32
+ And This is <b>example</b>. Yey.
33
+ And This is example.
34
+ GTT_HTML
35
+
36
+ subject { Konjak.translate(doc, gtt_tmx, 'en', 'ja', format: :gtt_html) }
37
+
38
+ it { is_expected.to eq <<EXPECT }
39
+ これは、 <a href="http://example.com">例</a> 。
40
+ And これは、 <b>例</b> 。 Yey.
41
+ And This is example.
42
+ EXPECT
43
+ end
27
44
  end
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,8 @@
1
+ if ENV['CODECLIMATE_REPO_TOKEN']
2
+ require "codeclimate-test-reporter"
3
+ CodeClimate::TestReporter.start
4
+ end
5
+
1
6
  require 'konjak'
2
7
 
3
8
  # spec/fixtures/sample.tmx from http://www.ttt.org/oscarstandards/tmx/#AppSample
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa
@@ -162,6 +162,9 @@ files:
162
162
  - lib/konjak/translation_unit.rb
163
163
  - lib/konjak/translation_unit_variant.rb
164
164
  - lib/konjak/translator.rb
165
+ - lib/konjak/translator/gtt_html_translate.rb
166
+ - lib/konjak/translator/text_translate.rb
167
+ - lib/konjak/translator/translated_string.rb
165
168
  - lib/konjak/unknown_tag.rb
166
169
  - lib/konjak/user_defined_encoding.rb
167
170
  - lib/konjak/version.rb