konjak 0.0.17 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/konjak/tmx_segmentor/strategy.rb +46 -12
- data/lib/konjak/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4ffc2d0548951d62d07f5612bd4399e8a8f6519
|
4
|
+
data.tar.gz: 77a83feb0b806e2e6a87b56246c7062fa175f364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c92f4f5d4328b6fe29e5a92451b7e648796c37ec7c044a58ecfbb70e7620cf67494fce6a8052bbce6901caa85a529a5d35001ccafe236c7dd0b97efcd7e399d3
|
7
|
+
data.tar.gz: 995318581a91f8fbc41ddf97258777ba3942e6e7c69ab1b303208be8d7f1aecca44c2b0561c1a10adc70f5055e583223f97ad8fcca7787a8165f85ff37f8a8e8
|
@@ -17,7 +17,7 @@ module Konjak
|
|
17
17
|
|
18
18
|
def segmentize(text)
|
19
19
|
segments = [text]
|
20
|
-
translation_units.each do |translation_unit|
|
20
|
+
translation_units(text).each do |translation_unit|
|
21
21
|
segment = translation_unit.variant(@lang).segment
|
22
22
|
|
23
23
|
pat = compile_pattern(segment)
|
@@ -35,13 +35,20 @@ module Konjak
|
|
35
35
|
private
|
36
36
|
|
37
37
|
def default_options
|
38
|
-
{
|
38
|
+
{
|
39
|
+
min_segment_length: 10,
|
40
|
+
max_segment_length: nil
|
41
|
+
}
|
39
42
|
end
|
40
43
|
|
41
44
|
def min_segment_length
|
42
45
|
@options[:min_segment_length]
|
43
46
|
end
|
44
47
|
|
48
|
+
def max_segment_length
|
49
|
+
@options[:max_segment_length]
|
50
|
+
end
|
51
|
+
|
45
52
|
def split(pat, segment, text)
|
46
53
|
texts = []
|
47
54
|
while true
|
@@ -62,26 +69,53 @@ module Konjak
|
|
62
69
|
texts << text
|
63
70
|
end
|
64
71
|
|
65
|
-
def translation_units
|
66
|
-
@tmx.body.translation_units
|
72
|
+
def translation_units(text)
|
73
|
+
tus = @tmx.body.translation_units
|
74
|
+
|
75
|
+
tus.select! {|tu|
|
76
|
+
segment = tu.variant(@lang).segment
|
77
|
+
segment_length = segment.text.length
|
78
|
+
|
79
|
+
next false if segment_length < min_segment_length
|
80
|
+
next false if max_segment_length && max_segment_length < segment_length
|
81
|
+
|
82
|
+
text =~ compile_pattern(tu.variant(@lang).segment)
|
83
|
+
}
|
84
|
+
|
85
|
+
simular_translation_units_map = {}
|
86
|
+
|
87
|
+
tus.sort_by! {|tu|
|
88
|
+
tu_segment = tu.variant(@lang).segment
|
89
|
+
segment_text = tu_segment.text
|
90
|
+
|
91
|
+
unless simular_translation_units_map[segment_text]
|
92
|
+
simular_translation_units = tus.select {|tu2|
|
93
|
+
tu2.variant(@lang).segment.text.include?(segment_text)
|
94
|
+
}.sort_by! {|tu2| tu2.variant(@lang).segment.text.size }
|
95
|
+
|
96
|
+
simular_translation_units.each do |tu2|
|
97
|
+
simular_translation_units_map[tu2.variant(@lang).segment.text] = simular_translation_units
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
rank = simular_translation_units_map[segment_text].index {|tu2|
|
102
|
+
tu2.variant(@lang).segment.text == segment_text
|
103
|
+
}
|
104
|
+
|
67
105
|
# GTTの場合
|
68
106
|
translation_timestamp = nil
|
69
|
-
|
70
107
|
if tm_entry = tu.at('entry_metadata').try(:at, 'tm_entry')
|
71
108
|
source_info = tm_entry.at('source_info')
|
72
|
-
if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) ==
|
109
|
+
if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) == segment_text
|
73
110
|
translation_timestamp = tm_entry.at('translation').try(:attr, 'translation_timestamp').to_i
|
74
111
|
end
|
75
112
|
end
|
76
|
-
|
77
113
|
translation_timestamp ||= 0
|
78
114
|
|
79
|
-
|
80
|
-
|
81
|
-
[-translation_timestamp, -segment_length]
|
82
|
-
}.reject {|tu|
|
83
|
-
tu.variant(@lang).segment.text.length < min_segment_length
|
115
|
+
[-rank, -translation_timestamp, -segment_text.length]
|
84
116
|
}
|
117
|
+
|
118
|
+
tus
|
85
119
|
end
|
86
120
|
end
|
87
121
|
end
|
data/lib/konjak/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seiei Higa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
216
|
version: '0'
|
217
217
|
requirements: []
|
218
218
|
rubyforge_project:
|
219
|
-
rubygems_version: 2.4.5
|
219
|
+
rubygems_version: 2.4.5.1
|
220
220
|
signing_key:
|
221
221
|
specification_version: 4
|
222
222
|
summary: TMX(Translation Memory exChange) tools for ruby
|