konjak 0.0.19 → 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/konjak/tmx_segmentor/strategy.rb +71 -67
- data/lib/konjak/tmx_segmentor.rb +2 -2
- data/lib/konjak/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffdb00c55006924feabdbc94d04742f2469f047b
|
4
|
+
data.tar.gz: d8df2981f8610a26295b044893f1409b3f3c0284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a6205a76ce7c53e181b14d75109e12bfb6d3d55781b30f29d1895824a47261ac85277fdf9aa100c91b5e628d8eeb15445defaf3e4ebe74a796ea6e3af854c62
|
7
|
+
data.tar.gz: 1bdec79054206addf2b551ddc53ebc22d4e70a8492a526655b8d81d5504d64d8f61e67e49e62789b5353ba52436302be04d99895f2a8d947de43aec42d0dfc03
|
@@ -7,117 +7,121 @@ require 'konjak/tmx_segmentor/segment_string'
|
|
7
7
|
module Konjak
|
8
8
|
class TmxSegmentor < Segmentor
|
9
9
|
class Strategy
|
10
|
+
Edge = Struct.new(:prev, :current)
|
11
|
+
Node = Struct.new(:range, :segment)
|
12
|
+
Node::None = -1
|
13
|
+
|
10
14
|
include Mem
|
11
15
|
|
12
|
-
def initialize(tmx, lang, options = {})
|
13
|
-
@tmx
|
14
|
-
@lang
|
16
|
+
def initialize(tmx, lang, text, options = {})
|
17
|
+
@tmx = tmx
|
18
|
+
@lang = lang
|
19
|
+
@text = text
|
15
20
|
@options = default_options.merge(options)
|
16
21
|
end
|
17
22
|
|
18
|
-
def
|
19
|
-
range_segment_pairs = []
|
20
|
-
|
21
|
-
translation_units.each {|tu|
|
22
|
-
segment = tu.variant(@lang).segment
|
23
|
-
text.scan(compile_pattern(segment)) {
|
24
|
-
range_segment_pairs << [($~.begin(0)...$~.end(0)), segment]
|
25
|
-
}
|
26
|
-
}
|
27
|
-
|
23
|
+
def segments
|
28
24
|
# Can't split text
|
29
|
-
return [text] if
|
30
|
-
|
31
|
-
range_segment_pairs.uniq! {|rsp| [rsp[0], rsp[1].text] }
|
32
|
-
range_segment_pairs.sort_by! {|(m, s)|
|
33
|
-
[m.begin, -s.text.size]
|
34
|
-
}
|
35
|
-
|
36
|
-
max_weight_range_segments = max_weight_range_segments(range_segment_pairs)
|
25
|
+
return [@text] if nodes.empty?
|
37
26
|
|
38
27
|
segments = []
|
39
28
|
prev_text_index = 0
|
40
|
-
|
41
|
-
|
29
|
+
max_cost_path.each do |node|
|
30
|
+
range = node.range
|
31
|
+
segment = node.segment
|
32
|
+
prev_text = @text[prev_text_index...range.begin]
|
42
33
|
|
43
34
|
segments << prev_text unless prev_text.empty?
|
44
35
|
|
45
|
-
segments << SegmentString.new(text[range.begin, range.size], segment)
|
36
|
+
segments << SegmentString.new(@text[range.begin, range.size], segment)
|
46
37
|
|
47
38
|
prev_text_index = range.end
|
48
39
|
end
|
49
|
-
after_text = text[prev_text_index..-1]
|
40
|
+
after_text = @text[prev_text_index..-1]
|
50
41
|
segments << after_text unless after_text.empty?
|
51
42
|
segments
|
52
43
|
end
|
53
44
|
|
54
45
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
weights = range_segment_pairs.map {|rsp| rsp[0].size }
|
46
|
+
def max_cost_path
|
47
|
+
prev_nodes = nodes.map {|node| [node, Node::None] }.to_h
|
48
|
+
costs = nodes.map {|node| [node, node.range.size] }.to_h
|
59
49
|
|
60
|
-
|
61
|
-
|
62
|
-
|
50
|
+
edges.each do |edge|
|
51
|
+
node, node2 = edge.prev, edge.current
|
52
|
+
new_node2_cost = costs[node] + calc_edge_cost(edge)
|
63
53
|
|
64
|
-
|
65
|
-
|
66
|
-
|
54
|
+
if costs[node2] < new_node2_cost
|
55
|
+
costs[node2] = new_node2_cost
|
56
|
+
prev_nodes[node2] = node
|
67
57
|
end
|
68
58
|
end
|
69
59
|
|
70
|
-
|
71
|
-
new_rsp2_weight = weights[rsp_i] + range_segment_pairs[rsp2_i][0].size
|
60
|
+
node, _ = costs.max_by {|_, cost| cost }
|
72
61
|
|
73
|
-
|
74
|
-
|
75
|
-
|
62
|
+
max_cost_node_indexes = Enumerator.new {|y|
|
63
|
+
loop do
|
64
|
+
break if node == Node::None
|
65
|
+
y << node
|
66
|
+
node = prev_nodes[node]
|
76
67
|
end
|
77
|
-
|
68
|
+
}.to_a.reverse
|
69
|
+
end
|
78
70
|
|
79
|
-
|
71
|
+
private
|
80
72
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
73
|
+
def edges
|
74
|
+
return @edges if @edges
|
75
|
+
|
76
|
+
@edges = []
|
77
|
+
nodes.each_with_index do |node, node_i|
|
78
|
+
((node_i + 1)...nodes.size).each do |node2_i|
|
79
|
+
node2 = nodes[node2_i]
|
80
|
+
|
81
|
+
next if node2.range.begin < node.range.end
|
82
|
+
|
83
|
+
@edges << Edge.new(node, node2)
|
86
84
|
end
|
87
|
-
|
85
|
+
end
|
88
86
|
|
89
|
-
|
90
|
-
range_segment_pairs[i]
|
91
|
-
}
|
87
|
+
@edges
|
92
88
|
end
|
93
89
|
|
94
|
-
|
90
|
+
def nodes
|
91
|
+
return @nodes if @nodes
|
92
|
+
|
93
|
+
@nodes = []
|
94
|
+
|
95
|
+
translation_units.each {|tu|
|
96
|
+
segment = tu.variant(@lang).segment
|
97
|
+
@text.scan(compile_pattern(segment)) {
|
98
|
+
@nodes << Node.new(($~.begin(0)...$~.end(0)), segment)
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
@nodes.uniq! {|node| [node.range, node.segment.text] }
|
103
|
+
@nodes.sort_by! {|node| [node.range.begin, -node.segment.text.size] }
|
104
|
+
|
105
|
+
@nodes
|
106
|
+
end
|
95
107
|
|
96
108
|
def default_options
|
97
109
|
{
|
98
|
-
|
99
|
-
|
110
|
+
translation_unit_filter: -> (tu) { true },
|
111
|
+
calc_edge_cost: -> (edge) { edge.current.size }
|
100
112
|
}
|
101
113
|
end
|
102
114
|
|
103
|
-
def
|
104
|
-
@options[:
|
115
|
+
def calc_edge_cost(edge)
|
116
|
+
@options[:calc_edge_cost].call(edge)
|
105
117
|
end
|
106
118
|
|
107
|
-
def
|
108
|
-
@options[:
|
119
|
+
def translation_unit_filter
|
120
|
+
@options[:translation_unit_filter]
|
109
121
|
end
|
110
122
|
|
111
123
|
def translation_units
|
112
|
-
@translation_units ||= @tmx.body.translation_units.select
|
113
|
-
segment = tu.variant(@lang).segment
|
114
|
-
segment_length = segment.text.length
|
115
|
-
|
116
|
-
next false if segment_length < min_segment_length
|
117
|
-
next false if max_segment_length && max_segment_length < segment_length
|
118
|
-
|
119
|
-
true
|
120
|
-
}
|
124
|
+
@translation_units ||= @tmx.body.translation_units.select(&translation_unit_filter)
|
121
125
|
end
|
122
126
|
end
|
123
127
|
end
|
data/lib/konjak/tmx_segmentor.rb
CHANGED
@@ -11,7 +11,7 @@ module Konjak
|
|
11
11
|
}
|
12
12
|
|
13
13
|
def segments
|
14
|
-
strategy.
|
14
|
+
strategy.segments
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
@@ -33,7 +33,7 @@ module Konjak
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def strategy
|
36
|
-
STRATEGIES[format].new(tmx, lang, @options)
|
36
|
+
STRATEGIES[format].new(tmx, lang, content, @options)
|
37
37
|
end
|
38
38
|
|
39
39
|
end
|
data/lib/konjak/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seiei Higa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|