konjak 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/konjak/tmx_segmentor/strategy.rb +46 -12
- data/lib/konjak/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4ffc2d0548951d62d07f5612bd4399e8a8f6519
|
4
|
+
data.tar.gz: 77a83feb0b806e2e6a87b56246c7062fa175f364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c92f4f5d4328b6fe29e5a92451b7e648796c37ec7c044a58ecfbb70e7620cf67494fce6a8052bbce6901caa85a529a5d35001ccafe236c7dd0b97efcd7e399d3
|
7
|
+
data.tar.gz: 995318581a91f8fbc41ddf97258777ba3942e6e7c69ab1b303208be8d7f1aecca44c2b0561c1a10adc70f5055e583223f97ad8fcca7787a8165f85ff37f8a8e8
|
@@ -17,7 +17,7 @@ module Konjak
|
|
17
17
|
|
18
18
|
def segmentize(text)
|
19
19
|
segments = [text]
|
20
|
-
translation_units.each do |translation_unit|
|
20
|
+
translation_units(text).each do |translation_unit|
|
21
21
|
segment = translation_unit.variant(@lang).segment
|
22
22
|
|
23
23
|
pat = compile_pattern(segment)
|
@@ -35,13 +35,20 @@ module Konjak
|
|
35
35
|
private
|
36
36
|
|
37
37
|
def default_options
|
38
|
-
{
|
38
|
+
{
|
39
|
+
min_segment_length: 10,
|
40
|
+
max_segment_length: nil
|
41
|
+
}
|
39
42
|
end
|
40
43
|
|
41
44
|
def min_segment_length
|
42
45
|
@options[:min_segment_length]
|
43
46
|
end
|
44
47
|
|
48
|
+
def max_segment_length
|
49
|
+
@options[:max_segment_length]
|
50
|
+
end
|
51
|
+
|
45
52
|
def split(pat, segment, text)
|
46
53
|
texts = []
|
47
54
|
while true
|
@@ -62,26 +69,53 @@ module Konjak
|
|
62
69
|
texts << text
|
63
70
|
end
|
64
71
|
|
65
|
-
def translation_units
|
66
|
-
@tmx.body.translation_units
|
72
|
+
def translation_units(text)
|
73
|
+
tus = @tmx.body.translation_units
|
74
|
+
|
75
|
+
tus.select! {|tu|
|
76
|
+
segment = tu.variant(@lang).segment
|
77
|
+
segment_length = segment.text.length
|
78
|
+
|
79
|
+
next false if segment_length < min_segment_length
|
80
|
+
next false if max_segment_length && max_segment_length < segment_length
|
81
|
+
|
82
|
+
text =~ compile_pattern(tu.variant(@lang).segment)
|
83
|
+
}
|
84
|
+
|
85
|
+
simular_translation_units_map = {}
|
86
|
+
|
87
|
+
tus.sort_by! {|tu|
|
88
|
+
tu_segment = tu.variant(@lang).segment
|
89
|
+
segment_text = tu_segment.text
|
90
|
+
|
91
|
+
unless simular_translation_units_map[segment_text]
|
92
|
+
simular_translation_units = tus.select {|tu2|
|
93
|
+
tu2.variant(@lang).segment.text.include?(segment_text)
|
94
|
+
}.sort_by! {|tu2| tu2.variant(@lang).segment.text.size }
|
95
|
+
|
96
|
+
simular_translation_units.each do |tu2|
|
97
|
+
simular_translation_units_map[tu2.variant(@lang).segment.text] = simular_translation_units
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
rank = simular_translation_units_map[segment_text].index {|tu2|
|
102
|
+
tu2.variant(@lang).segment.text == segment_text
|
103
|
+
}
|
104
|
+
|
67
105
|
# GTTの場合
|
68
106
|
translation_timestamp = nil
|
69
|
-
|
70
107
|
if tm_entry = tu.at('entry_metadata').try(:at, 'tm_entry')
|
71
108
|
source_info = tm_entry.at('source_info')
|
72
|
-
if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) ==
|
109
|
+
if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) == segment_text
|
73
110
|
translation_timestamp = tm_entry.at('translation').try(:attr, 'translation_timestamp').to_i
|
74
111
|
end
|
75
112
|
end
|
76
|
-
|
77
113
|
translation_timestamp ||= 0
|
78
114
|
|
79
|
-
|
80
|
-
|
81
|
-
[-translation_timestamp, -segment_length]
|
82
|
-
}.reject {|tu|
|
83
|
-
tu.variant(@lang).segment.text.length < min_segment_length
|
115
|
+
[-rank, -translation_timestamp, -segment_text.length]
|
84
116
|
}
|
117
|
+
|
118
|
+
tus
|
85
119
|
end
|
86
120
|
end
|
87
121
|
end
|
data/lib/konjak/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seiei Higa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
216
216
|
version: '0'
|
217
217
|
requirements: []
|
218
218
|
rubyforge_project:
|
219
|
-
rubygems_version: 2.4.5
|
219
|
+
rubygems_version: 2.4.5.1
|
220
220
|
signing_key:
|
221
221
|
specification_version: 4
|
222
222
|
summary: TMX(Translation Memory exChange) tools for ruby
|