konjak 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -1
- data/Gemfile +4 -0
- data/README.md +5 -1
- data/Rakefile +10 -0
- data/bin/konjak +1 -1
- data/lib/konjak/text.rb +1 -12
- data/lib/konjak/translation_unit.rb +16 -0
- data/lib/konjak/translator/gtt_html_translate.rb +47 -0
- data/lib/konjak/translator/text_translate.rb +22 -0
- data/lib/konjak/translator/translated_string.rb +6 -0
- data/lib/konjak/translator.rb +37 -27
- data/lib/konjak/version.rb +1 -1
- data/lib/konjak.rb +4 -4
- data/spec/konjak_parse_spec.rb +1 -1
- data/spec/konjak_translate_spec.rb +17 -0
- data/spec/spec_helper.rb +5 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d1f7b71cef0bca6b16b61cccb6dae7fe49c28fc
|
4
|
+
data.tar.gz: 58e133fc23d9789b8075e4b1d0b497259193d436
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 961ec5b4b2635480938016110b07b654b112353fe8c84dd3260bb812d2e06c9163c69dd957686b65ba619f3b2b634420850a203e8be41a070bd441fe55c4b884
|
7
|
+
data.tar.gz: 0d9385eda3e902e2519f5b34d7d0a9af8397859b70d8d2a687c9b75536a8904cbea68d0a7aaa10b8528baeb9b7655902259ea6d6e94a6da5c990937df2903945
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
# Konjak
|
2
2
|
|
3
|
+
[](https://travis-ci.org/hanachin/konjak)
|
4
|
+
[](https://codeclimate.com/github/hanachin/konjak)
|
5
|
+
[](https://codeclimate.com/github/hanachin/konjak/coverage)
|
6
|
+
|
3
7
|
TMX(Translation Memory eXchange) tools for ruby
|
4
8
|
|
5
9
|
## Installation
|
@@ -18,7 +22,7 @@ Or install it yourself as:
|
|
18
22
|
|
19
23
|
## Usage
|
20
24
|
|
21
|
-
$ konjak translate file.tmx file.txt
|
25
|
+
$ konjak translate src target file.tmx file.txt
|
22
26
|
|
23
27
|
## Contributing
|
24
28
|
|
data/Rakefile
CHANGED
data/bin/konjak
CHANGED
@@ -13,4 +13,4 @@ _command, src, target, tmx_path, target_path = ARGV
|
|
13
13
|
|
14
14
|
tmx = File.read(tmx_path)
|
15
15
|
doc = File.read(target_path)
|
16
|
-
puts Konjak.translate(doc, Konjak.parse(tmx, gtt: true), src, target)
|
16
|
+
puts Konjak.translate(doc, Konjak.parse(tmx, gtt: true), src, target, format: :gtt_html)
|
data/lib/konjak/text.rb
CHANGED
@@ -30,6 +30,22 @@ module Konjak
|
|
30
30
|
child_elements.count {|e| TranslationUnitVariant === e } >= 2
|
31
31
|
end
|
32
32
|
|
33
|
+
def has_translation?(src_lang, target_lang)
|
34
|
+
src_lang?(src_lang) && has_variant_lang?(src_lang) && has_variant_lang?(target_lang)
|
35
|
+
end
|
36
|
+
|
37
|
+
def src_lang?(src_lang)
|
38
|
+
!self.src_lang || self.src_lang == '*all*' || self.src_lang == src_lang
|
39
|
+
end
|
40
|
+
|
41
|
+
def has_variant_lang?(lang)
|
42
|
+
variants.any? {|v| v.xml_lang == lang }
|
43
|
+
end
|
44
|
+
|
45
|
+
def variant(lang)
|
46
|
+
variants.detect {|v| v.xml_lang == lang }
|
47
|
+
end
|
48
|
+
|
33
49
|
# FIXME
|
34
50
|
# Zero, one or more <note>, or <prop> elements in any order, followed by
|
35
51
|
# One or more <tuv> elements.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Konjak
|
2
|
+
class Translator
|
3
|
+
module GttHtmlTranslate
|
4
|
+
refine(Text) do
|
5
|
+
def gtt_tag_ns
|
6
|
+
scan(/\{(\d+)\}/).flatten.uniq
|
7
|
+
end
|
8
|
+
|
9
|
+
def compile_gtt_html_pattern
|
10
|
+
regexp = Regexp.escape(self)
|
11
|
+
gtt_tag_ns.each do |n|
|
12
|
+
regexp = regexp.sub(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
|
13
|
+
regexp = regexp.gsub(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
|
14
|
+
regexp = regexp.gsub(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
|
15
|
+
end
|
16
|
+
Regexp.compile(regexp)
|
17
|
+
end
|
18
|
+
|
19
|
+
def interpolate_gtt_html_pattern(match_data)
|
20
|
+
new_text = dup
|
21
|
+
gtt_tag_ns.each do |n|
|
22
|
+
new_text = new_text.gsub("{#{n}}", match_data["n#{n}"])
|
23
|
+
new_text = new_text.gsub("{/#{n}}", "</#{match_data["_#{n}"]}>")
|
24
|
+
end
|
25
|
+
new_text
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
refine(TranslationUnit) do
|
30
|
+
def translate(src_lang, target_lang, text)
|
31
|
+
pattern = variant(src_lang).segment.text.compile_gtt_html_pattern
|
32
|
+
target_text = variant(target_lang).segment.text
|
33
|
+
|
34
|
+
texts = []
|
35
|
+
while true
|
36
|
+
head, match, tail = text.partition(pattern)
|
37
|
+
break if match.empty?
|
38
|
+
texts << head unless head.empty?
|
39
|
+
texts << TranslatedString.new(target_text.interpolate_gtt_html_pattern($~))
|
40
|
+
text = tail
|
41
|
+
end
|
42
|
+
texts << text
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Konjak
|
2
|
+
class Translator
|
3
|
+
module TextTranslate
|
4
|
+
refine(TranslationUnit) do
|
5
|
+
def translate(src_lang, target_lang, text)
|
6
|
+
s = variant(src_lang).segment.text
|
7
|
+
t = variant(target_lang).segment.text
|
8
|
+
|
9
|
+
texts = []
|
10
|
+
while true
|
11
|
+
head, match, tail = text.partition(s)
|
12
|
+
break if match.empty?
|
13
|
+
texts << head unless head.empty?
|
14
|
+
texts << TranslatedString.new(t)
|
15
|
+
text = tail
|
16
|
+
end
|
17
|
+
texts << text
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/konjak/translator.rb
CHANGED
@@ -1,39 +1,34 @@
|
|
1
1
|
require 'mem'
|
2
|
+
require 'konjak/translator/gtt_html_translate'
|
3
|
+
require 'konjak/translator/text_translate'
|
4
|
+
require 'konjak/translator/translated_string'
|
2
5
|
|
3
6
|
module Konjak
|
4
7
|
class Translator
|
8
|
+
|
5
9
|
include Mem
|
6
10
|
|
7
|
-
attr_reader :tmx, :src_lang, :target_lang
|
11
|
+
attr_reader :tmx, :src_lang, :target_lang, :options
|
8
12
|
|
9
|
-
def initialize(tmx, src_lang, target_lang)
|
10
|
-
@tmx
|
11
|
-
@src_lang
|
13
|
+
def initialize(tmx, src_lang, target_lang, **options)
|
14
|
+
@tmx = tmx
|
15
|
+
@src_lang = src_lang
|
12
16
|
@target_lang = target_lang
|
17
|
+
@options = options
|
13
18
|
end
|
14
19
|
|
15
20
|
def translate(doc)
|
16
21
|
translated_docs = [doc.dup]
|
17
22
|
translation_units.each do |tu|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
head, match, tail = d.partition(s)
|
28
|
-
ds << head
|
29
|
-
ds << t.dup.tap {|t| def t.translated; true; end }
|
30
|
-
|
31
|
-
break unless tail.include?(s)
|
32
|
-
|
33
|
-
d = tail
|
34
|
-
end
|
35
|
-
ds << tail
|
36
|
-
ds
|
23
|
+
translated_docs.map! { |text|
|
24
|
+
next text if text.is_a?(TranslatedString)
|
25
|
+
|
26
|
+
env = translate_env.dup
|
27
|
+
env.local_variable_set(:tu, tu)
|
28
|
+
env.local_variable_set(:src_lang, src_lang)
|
29
|
+
env.local_variable_set(:target_lang, target_lang)
|
30
|
+
env.local_variable_set(:text, text)
|
31
|
+
eval('tu.translate(src_lang, target_lang, text)', env)
|
37
32
|
}.flatten!
|
38
33
|
end
|
39
34
|
translated_docs.join
|
@@ -41,13 +36,28 @@ module Konjak
|
|
41
36
|
|
42
37
|
private
|
43
38
|
|
39
|
+
TRANSLATE_ENVS= {
|
40
|
+
text: Class.new { using TextTranslate; break binding },
|
41
|
+
gtt_html: Class.new { using GttHtmlTranslate; break binding }
|
42
|
+
}
|
43
|
+
|
44
|
+
def format
|
45
|
+
if TRANSLATE_ENVS.has_key?(options[:format])
|
46
|
+
options[:format]
|
47
|
+
else
|
48
|
+
:text
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def translate_env
|
53
|
+
TRANSLATE_ENVS[format]
|
54
|
+
end
|
55
|
+
|
44
56
|
def translation_units
|
45
57
|
tmx.body.translation_units.select { |tu|
|
46
|
-
|
47
|
-
tu.variants.any? {|v| v.xml_lang == src_lang } &&
|
48
|
-
tu.variants.any? {|v| v.xml_lang == target_lang }
|
58
|
+
tu.has_translation?(src_lang, target_lang)
|
49
59
|
}.sort_by {|tu|
|
50
|
-
-tu.
|
60
|
+
-tu.variant(src_lang).segment.text.length
|
51
61
|
}
|
52
62
|
end
|
53
63
|
memoize :translation_units
|
data/lib/konjak/version.rb
CHANGED
data/lib/konjak.rb
CHANGED
@@ -36,13 +36,13 @@ require 'konjak/translator'
|
|
36
36
|
|
37
37
|
module Konjak
|
38
38
|
class << self
|
39
|
-
def parse(xml, **
|
40
|
-
Parser.new.parse(xml, **
|
39
|
+
def parse(xml, **options)
|
40
|
+
Parser.new.parse(xml, **options)
|
41
41
|
end
|
42
42
|
|
43
|
-
def translate(doc, xml_or_tmx, src_lang, target_lang)
|
43
|
+
def translate(doc, xml_or_tmx, src_lang, target_lang, **options)
|
44
44
|
tmx = xml_or_tmx.kind_of?(Tmx) ? xml_or_tmx : parse(xml_or_tmx)
|
45
|
-
Translator.new(tmx, src_lang, target_lang).translate(doc)
|
45
|
+
Translator.new(tmx, src_lang, target_lang, **options).translate(doc)
|
46
46
|
end
|
47
47
|
end
|
48
48
|
end
|
data/spec/konjak_parse_spec.rb
CHANGED
@@ -162,7 +162,7 @@ describe Konjak do
|
|
162
162
|
describe 'gtt' do
|
163
163
|
let(:xml) { File.read('spec/fixtures/gtt.tmx') }
|
164
164
|
|
165
|
-
subject { tmx.body.translation_units.detect {|tu| tu.variants.detect {|v| v.segment.text
|
165
|
+
subject { tmx.body.translation_units.detect {|tu| tu.variants.detect {|v| v.segment.text == "\n\n& it's also example." } } }
|
166
166
|
|
167
167
|
context 'gtt: true' do
|
168
168
|
let(:tmx) { Konjak.parse(xml, gtt: true) }
|
@@ -24,4 +24,21 @@ this is données (avec un caractère non standard: ).
|
|
24
24
|
EXPECT
|
25
25
|
end
|
26
26
|
|
27
|
+
context 'when format is GTT' do
|
28
|
+
let(:gtt_tmx) { Konjak.parse(File.read('spec/fixtures/gtt.tmx')) }
|
29
|
+
|
30
|
+
let(:doc) { <<GTT_HTML }
|
31
|
+
This is <a href="http://example.com">example</a>.
|
32
|
+
And This is <b>example</b>. Yey.
|
33
|
+
And This is example.
|
34
|
+
GTT_HTML
|
35
|
+
|
36
|
+
subject { Konjak.translate(doc, gtt_tmx, 'en', 'ja', format: :gtt_html) }
|
37
|
+
|
38
|
+
it { is_expected.to eq <<EXPECT }
|
39
|
+
これは、 <a href="http://example.com">例</a> 。
|
40
|
+
And これは、 <b>例</b> 。 Yey.
|
41
|
+
And This is example.
|
42
|
+
EXPECT
|
43
|
+
end
|
27
44
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seiei Higa
|
@@ -162,6 +162,9 @@ files:
|
|
162
162
|
- lib/konjak/translation_unit.rb
|
163
163
|
- lib/konjak/translation_unit_variant.rb
|
164
164
|
- lib/konjak/translator.rb
|
165
|
+
- lib/konjak/translator/gtt_html_translate.rb
|
166
|
+
- lib/konjak/translator/text_translate.rb
|
167
|
+
- lib/konjak/translator/translated_string.rb
|
165
168
|
- lib/konjak/unknown_tag.rb
|
166
169
|
- lib/konjak/user_defined_encoding.rb
|
167
170
|
- lib/konjak/version.rb
|