konjak 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -1
- data/Gemfile +4 -0
- data/README.md +5 -1
- data/Rakefile +10 -0
- data/bin/konjak +1 -1
- data/lib/konjak/text.rb +1 -12
- data/lib/konjak/translation_unit.rb +16 -0
- data/lib/konjak/translator/gtt_html_translate.rb +47 -0
- data/lib/konjak/translator/text_translate.rb +22 -0
- data/lib/konjak/translator/translated_string.rb +6 -0
- data/lib/konjak/translator.rb +37 -27
- data/lib/konjak/version.rb +1 -1
- data/lib/konjak.rb +4 -4
- data/spec/konjak_parse_spec.rb +1 -1
- data/spec/konjak_translate_spec.rb +17 -0
- data/spec/spec_helper.rb +5 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d1f7b71cef0bca6b16b61cccb6dae7fe49c28fc
|
4
|
+
data.tar.gz: 58e133fc23d9789b8075e4b1d0b497259193d436
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 961ec5b4b2635480938016110b07b654b112353fe8c84dd3260bb812d2e06c9163c69dd957686b65ba619f3b2b634420850a203e8be41a070bd441fe55c4b884
|
7
|
+
data.tar.gz: 0d9385eda3e902e2519f5b34d7d0a9af8397859b70d8d2a687c9b75536a8904cbea68d0a7aaa10b8528baeb9b7655902259ea6d6e94a6da5c990937df2903945
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# Konjak
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/hanachin/konjak.svg)](https://travis-ci.org/hanachin/konjak)
|
4
|
+
[![Code Climate](https://codeclimate.com/github/hanachin/konjak/badges/gpa.svg)](https://codeclimate.com/github/hanachin/konjak)
|
5
|
+
[![Test Coverage](https://codeclimate.com/github/hanachin/konjak/badges/coverage.svg)](https://codeclimate.com/github/hanachin/konjak/coverage)
|
6
|
+
|
3
7
|
TMX(Translation Memory eXchange) tools for ruby
|
4
8
|
|
5
9
|
## Installation
|
@@ -18,7 +22,7 @@ Or install it yourself as:
|
|
18
22
|
|
19
23
|
## Usage
|
20
24
|
|
21
|
-
$ konjak translate file.tmx file.txt
|
25
|
+
$ konjak translate src target file.tmx file.txt
|
22
26
|
|
23
27
|
## Contributing
|
24
28
|
|
data/Rakefile
CHANGED
data/bin/konjak
CHANGED
@@ -13,4 +13,4 @@ _command, src, target, tmx_path, target_path = ARGV
|
|
13
13
|
|
14
14
|
tmx = File.read(tmx_path)
|
15
15
|
doc = File.read(target_path)
|
16
|
-
puts Konjak.translate(doc, Konjak.parse(tmx, gtt: true), src, target)
|
16
|
+
puts Konjak.translate(doc, Konjak.parse(tmx, gtt: true), src, target, format: :gtt_html)
|
data/lib/konjak/text.rb
CHANGED
@@ -30,6 +30,22 @@ module Konjak
|
|
30
30
|
child_elements.count {|e| TranslationUnitVariant === e } >= 2
|
31
31
|
end
|
32
32
|
|
33
|
+
def has_translation?(src_lang, target_lang)
|
34
|
+
src_lang?(src_lang) && has_variant_lang?(src_lang) && has_variant_lang?(target_lang)
|
35
|
+
end
|
36
|
+
|
37
|
+
def src_lang?(src_lang)
|
38
|
+
!self.src_lang || self.src_lang == '*all*' || self.src_lang == src_lang
|
39
|
+
end
|
40
|
+
|
41
|
+
def has_variant_lang?(lang)
|
42
|
+
variants.any? {|v| v.xml_lang == lang }
|
43
|
+
end
|
44
|
+
|
45
|
+
def variant(lang)
|
46
|
+
variants.detect {|v| v.xml_lang == lang }
|
47
|
+
end
|
48
|
+
|
33
49
|
# FIXME
|
34
50
|
# Zero, one or more <note>, or <prop> elements in any order, followed by
|
35
51
|
# One or more <tuv> elements.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Konjak
|
2
|
+
class Translator
|
3
|
+
module GttHtmlTranslate
|
4
|
+
refine(Text) do
|
5
|
+
def gtt_tag_ns
|
6
|
+
scan(/\{(\d+)\}/).flatten.uniq
|
7
|
+
end
|
8
|
+
|
9
|
+
def compile_gtt_html_pattern
|
10
|
+
regexp = Regexp.escape(self)
|
11
|
+
gtt_tag_ns.each do |n|
|
12
|
+
regexp = regexp.sub(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
|
13
|
+
regexp = regexp.gsub(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
|
14
|
+
regexp = regexp.gsub(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
|
15
|
+
end
|
16
|
+
Regexp.compile(regexp)
|
17
|
+
end
|
18
|
+
|
19
|
+
def interpolate_gtt_html_pattern(match_data)
|
20
|
+
new_text = dup
|
21
|
+
gtt_tag_ns.each do |n|
|
22
|
+
new_text = new_text.gsub("{#{n}}", match_data["n#{n}"])
|
23
|
+
new_text = new_text.gsub("{/#{n}}", "</#{match_data["_#{n}"]}>")
|
24
|
+
end
|
25
|
+
new_text
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
refine(TranslationUnit) do
|
30
|
+
def translate(src_lang, target_lang, text)
|
31
|
+
pattern = variant(src_lang).segment.text.compile_gtt_html_pattern
|
32
|
+
target_text = variant(target_lang).segment.text
|
33
|
+
|
34
|
+
texts = []
|
35
|
+
while true
|
36
|
+
head, match, tail = text.partition(pattern)
|
37
|
+
break if match.empty?
|
38
|
+
texts << head unless head.empty?
|
39
|
+
texts << TranslatedString.new(target_text.interpolate_gtt_html_pattern($~))
|
40
|
+
text = tail
|
41
|
+
end
|
42
|
+
texts << text
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Konjak
|
2
|
+
class Translator
|
3
|
+
module TextTranslate
|
4
|
+
refine(TranslationUnit) do
|
5
|
+
def translate(src_lang, target_lang, text)
|
6
|
+
s = variant(src_lang).segment.text
|
7
|
+
t = variant(target_lang).segment.text
|
8
|
+
|
9
|
+
texts = []
|
10
|
+
while true
|
11
|
+
head, match, tail = text.partition(s)
|
12
|
+
break if match.empty?
|
13
|
+
texts << head unless head.empty?
|
14
|
+
texts << TranslatedString.new(t)
|
15
|
+
text = tail
|
16
|
+
end
|
17
|
+
texts << text
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/konjak/translator.rb
CHANGED
@@ -1,39 +1,34 @@
|
|
1
1
|
require 'mem'
|
2
|
+
require 'konjak/translator/gtt_html_translate'
|
3
|
+
require 'konjak/translator/text_translate'
|
4
|
+
require 'konjak/translator/translated_string'
|
2
5
|
|
3
6
|
module Konjak
|
4
7
|
class Translator
|
8
|
+
|
5
9
|
include Mem
|
6
10
|
|
7
|
-
attr_reader :tmx, :src_lang, :target_lang
|
11
|
+
attr_reader :tmx, :src_lang, :target_lang, :options
|
8
12
|
|
9
|
-
def initialize(tmx, src_lang, target_lang)
|
10
|
-
@tmx
|
11
|
-
@src_lang
|
13
|
+
def initialize(tmx, src_lang, target_lang, **options)
|
14
|
+
@tmx = tmx
|
15
|
+
@src_lang = src_lang
|
12
16
|
@target_lang = target_lang
|
17
|
+
@options = options
|
13
18
|
end
|
14
19
|
|
15
20
|
def translate(doc)
|
16
21
|
translated_docs = [doc.dup]
|
17
22
|
translation_units.each do |tu|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
head, match, tail = d.partition(s)
|
28
|
-
ds << head
|
29
|
-
ds << t.dup.tap {|t| def t.translated; true; end }
|
30
|
-
|
31
|
-
break unless tail.include?(s)
|
32
|
-
|
33
|
-
d = tail
|
34
|
-
end
|
35
|
-
ds << tail
|
36
|
-
ds
|
23
|
+
translated_docs.map! { |text|
|
24
|
+
next text if text.is_a?(TranslatedString)
|
25
|
+
|
26
|
+
env = translate_env.dup
|
27
|
+
env.local_variable_set(:tu, tu)
|
28
|
+
env.local_variable_set(:src_lang, src_lang)
|
29
|
+
env.local_variable_set(:target_lang, target_lang)
|
30
|
+
env.local_variable_set(:text, text)
|
31
|
+
eval('tu.translate(src_lang, target_lang, text)', env)
|
37
32
|
}.flatten!
|
38
33
|
end
|
39
34
|
translated_docs.join
|
@@ -41,13 +36,28 @@ module Konjak
|
|
41
36
|
|
42
37
|
private
|
43
38
|
|
39
|
+
TRANSLATE_ENVS= {
|
40
|
+
text: Class.new { using TextTranslate; break binding },
|
41
|
+
gtt_html: Class.new { using GttHtmlTranslate; break binding }
|
42
|
+
}
|
43
|
+
|
44
|
+
def format
|
45
|
+
if TRANSLATE_ENVS.has_key?(options[:format])
|
46
|
+
options[:format]
|
47
|
+
else
|
48
|
+
:text
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def translate_env
|
53
|
+
TRANSLATE_ENVS[format]
|
54
|
+
end
|
55
|
+
|
44
56
|
def translation_units
|
45
57
|
tmx.body.translation_units.select { |tu|
|
46
|
-
|
47
|
-
tu.variants.any? {|v| v.xml_lang == src_lang } &&
|
48
|
-
tu.variants.any? {|v| v.xml_lang == target_lang }
|
58
|
+
tu.has_translation?(src_lang, target_lang)
|
49
59
|
}.sort_by {|tu|
|
50
|
-
-tu.
|
60
|
+
-tu.variant(src_lang).segment.text.length
|
51
61
|
}
|
52
62
|
end
|
53
63
|
memoize :translation_units
|
data/lib/konjak/version.rb
CHANGED
data/lib/konjak.rb
CHANGED
@@ -36,13 +36,13 @@ require 'konjak/translator'
|
|
36
36
|
|
37
37
|
module Konjak
|
38
38
|
class << self
|
39
|
-
def parse(xml, **
|
40
|
-
Parser.new.parse(xml, **
|
39
|
+
def parse(xml, **options)
|
40
|
+
Parser.new.parse(xml, **options)
|
41
41
|
end
|
42
42
|
|
43
|
-
def translate(doc, xml_or_tmx, src_lang, target_lang)
|
43
|
+
def translate(doc, xml_or_tmx, src_lang, target_lang, **options)
|
44
44
|
tmx = xml_or_tmx.kind_of?(Tmx) ? xml_or_tmx : parse(xml_or_tmx)
|
45
|
-
Translator.new(tmx, src_lang, target_lang).translate(doc)
|
45
|
+
Translator.new(tmx, src_lang, target_lang, **options).translate(doc)
|
46
46
|
end
|
47
47
|
end
|
48
48
|
end
|
data/spec/konjak_parse_spec.rb
CHANGED
@@ -162,7 +162,7 @@ describe Konjak do
|
|
162
162
|
describe 'gtt' do
|
163
163
|
let(:xml) { File.read('spec/fixtures/gtt.tmx') }
|
164
164
|
|
165
|
-
subject { tmx.body.translation_units.detect {|tu| tu.variants.detect {|v| v.segment.text
|
165
|
+
subject { tmx.body.translation_units.detect {|tu| tu.variants.detect {|v| v.segment.text == "\n\n& it's also example." } } }
|
166
166
|
|
167
167
|
context 'gtt: true' do
|
168
168
|
let(:tmx) { Konjak.parse(xml, gtt: true) }
|
@@ -24,4 +24,21 @@ this is données (avec un caractère non standard: ).
|
|
24
24
|
EXPECT
|
25
25
|
end
|
26
26
|
|
27
|
+
context 'when format is GTT' do
|
28
|
+
let(:gtt_tmx) { Konjak.parse(File.read('spec/fixtures/gtt.tmx')) }
|
29
|
+
|
30
|
+
let(:doc) { <<GTT_HTML }
|
31
|
+
This is <a href="http://example.com">example</a>.
|
32
|
+
And This is <b>example</b>. Yey.
|
33
|
+
And This is example.
|
34
|
+
GTT_HTML
|
35
|
+
|
36
|
+
subject { Konjak.translate(doc, gtt_tmx, 'en', 'ja', format: :gtt_html) }
|
37
|
+
|
38
|
+
it { is_expected.to eq <<EXPECT }
|
39
|
+
これは、 <a href="http://example.com">例</a> 。
|
40
|
+
And これは、 <b>例</b> 。 Yey.
|
41
|
+
And This is example.
|
42
|
+
EXPECT
|
43
|
+
end
|
27
44
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seiei Higa
|
@@ -162,6 +162,9 @@ files:
|
|
162
162
|
- lib/konjak/translation_unit.rb
|
163
163
|
- lib/konjak/translation_unit_variant.rb
|
164
164
|
- lib/konjak/translator.rb
|
165
|
+
- lib/konjak/translator/gtt_html_translate.rb
|
166
|
+
- lib/konjak/translator/text_translate.rb
|
167
|
+
- lib/konjak/translator/translated_string.rb
|
165
168
|
- lib/konjak/unknown_tag.rb
|
166
169
|
- lib/konjak/user_defined_encoding.rb
|
167
170
|
- lib/konjak/version.rb
|