konjak 0.0.19 → 0.0.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/konjak/tmx_segmentor/strategy.rb +71 -67
- data/lib/konjak/tmx_segmentor.rb +2 -2
- data/lib/konjak/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffdb00c55006924feabdbc94d04742f2469f047b
|
4
|
+
data.tar.gz: d8df2981f8610a26295b044893f1409b3f3c0284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a6205a76ce7c53e181b14d75109e12bfb6d3d55781b30f29d1895824a47261ac85277fdf9aa100c91b5e628d8eeb15445defaf3e4ebe74a796ea6e3af854c62
|
7
|
+
data.tar.gz: 1bdec79054206addf2b551ddc53ebc22d4e70a8492a526655b8d81d5504d64d8f61e67e49e62789b5353ba52436302be04d99895f2a8d947de43aec42d0dfc03
|
@@ -7,117 +7,121 @@ require 'konjak/tmx_segmentor/segment_string'
|
|
7
7
|
module Konjak
|
8
8
|
class TmxSegmentor < Segmentor
|
9
9
|
class Strategy
|
10
|
+
Edge = Struct.new(:prev, :current)
|
11
|
+
Node = Struct.new(:range, :segment)
|
12
|
+
Node::None = -1
|
13
|
+
|
10
14
|
include Mem
|
11
15
|
|
12
|
-
def initialize(tmx, lang, options = {})
|
13
|
-
@tmx
|
14
|
-
@lang
|
16
|
+
def initialize(tmx, lang, text, options = {})
|
17
|
+
@tmx = tmx
|
18
|
+
@lang = lang
|
19
|
+
@text = text
|
15
20
|
@options = default_options.merge(options)
|
16
21
|
end
|
17
22
|
|
18
|
-
def
|
19
|
-
range_segment_pairs = []
|
20
|
-
|
21
|
-
translation_units.each {|tu|
|
22
|
-
segment = tu.variant(@lang).segment
|
23
|
-
text.scan(compile_pattern(segment)) {
|
24
|
-
range_segment_pairs << [($~.begin(0)...$~.end(0)), segment]
|
25
|
-
}
|
26
|
-
}
|
27
|
-
|
23
|
+
def segments
|
28
24
|
# Can't split text
|
29
|
-
return [text] if
|
30
|
-
|
31
|
-
range_segment_pairs.uniq! {|rsp| [rsp[0], rsp[1].text] }
|
32
|
-
range_segment_pairs.sort_by! {|(m, s)|
|
33
|
-
[m.begin, -s.text.size]
|
34
|
-
}
|
35
|
-
|
36
|
-
max_weight_range_segments = max_weight_range_segments(range_segment_pairs)
|
25
|
+
return [@text] if nodes.empty?
|
37
26
|
|
38
27
|
segments = []
|
39
28
|
prev_text_index = 0
|
40
|
-
|
41
|
-
|
29
|
+
max_cost_path.each do |node|
|
30
|
+
range = node.range
|
31
|
+
segment = node.segment
|
32
|
+
prev_text = @text[prev_text_index...range.begin]
|
42
33
|
|
43
34
|
segments << prev_text unless prev_text.empty?
|
44
35
|
|
45
|
-
segments << SegmentString.new(text[range.begin, range.size], segment)
|
36
|
+
segments << SegmentString.new(@text[range.begin, range.size], segment)
|
46
37
|
|
47
38
|
prev_text_index = range.end
|
48
39
|
end
|
49
|
-
after_text = text[prev_text_index..-1]
|
40
|
+
after_text = @text[prev_text_index..-1]
|
50
41
|
segments << after_text unless after_text.empty?
|
51
42
|
segments
|
52
43
|
end
|
53
44
|
|
54
45
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
weights = range_segment_pairs.map {|rsp| rsp[0].size }
|
46
|
+
def max_cost_path
|
47
|
+
prev_nodes = nodes.map {|node| [node, Node::None] }.to_h
|
48
|
+
costs = nodes.map {|node| [node, node.range.size] }.to_h
|
59
49
|
|
60
|
-
|
61
|
-
|
62
|
-
|
50
|
+
edges.each do |edge|
|
51
|
+
node, node2 = edge.prev, edge.current
|
52
|
+
new_node2_cost = costs[node] + calc_edge_cost(edge)
|
63
53
|
|
64
|
-
|
65
|
-
|
66
|
-
|
54
|
+
if costs[node2] < new_node2_cost
|
55
|
+
costs[node2] = new_node2_cost
|
56
|
+
prev_nodes[node2] = node
|
67
57
|
end
|
68
58
|
end
|
69
59
|
|
70
|
-
|
71
|
-
new_rsp2_weight = weights[rsp_i] + range_segment_pairs[rsp2_i][0].size
|
60
|
+
node, _ = costs.max_by {|_, cost| cost }
|
72
61
|
|
73
|
-
|
74
|
-
|
75
|
-
|
62
|
+
max_cost_node_indexes = Enumerator.new {|y|
|
63
|
+
loop do
|
64
|
+
break if node == Node::None
|
65
|
+
y << node
|
66
|
+
node = prev_nodes[node]
|
76
67
|
end
|
77
|
-
|
68
|
+
}.to_a.reverse
|
69
|
+
end
|
78
70
|
|
79
|
-
|
71
|
+
private
|
80
72
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
73
|
+
def edges
|
74
|
+
return @edges if @edges
|
75
|
+
|
76
|
+
@edges = []
|
77
|
+
nodes.each_with_index do |node, node_i|
|
78
|
+
((node_i + 1)...nodes.size).each do |node2_i|
|
79
|
+
node2 = nodes[node2_i]
|
80
|
+
|
81
|
+
next if node2.range.begin < node.range.end
|
82
|
+
|
83
|
+
@edges << Edge.new(node, node2)
|
86
84
|
end
|
87
|
-
|
85
|
+
end
|
88
86
|
|
89
|
-
|
90
|
-
range_segment_pairs[i]
|
91
|
-
}
|
87
|
+
@edges
|
92
88
|
end
|
93
89
|
|
94
|
-
|
90
|
+
def nodes
|
91
|
+
return @nodes if @nodes
|
92
|
+
|
93
|
+
@nodes = []
|
94
|
+
|
95
|
+
translation_units.each {|tu|
|
96
|
+
segment = tu.variant(@lang).segment
|
97
|
+
@text.scan(compile_pattern(segment)) {
|
98
|
+
@nodes << Node.new(($~.begin(0)...$~.end(0)), segment)
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
@nodes.uniq! {|node| [node.range, node.segment.text] }
|
103
|
+
@nodes.sort_by! {|node| [node.range.begin, -node.segment.text.size] }
|
104
|
+
|
105
|
+
@nodes
|
106
|
+
end
|
95
107
|
|
96
108
|
def default_options
|
97
109
|
{
|
98
|
-
|
99
|
-
|
110
|
+
translation_unit_filter: -> (tu) { true },
|
111
|
+
calc_edge_cost: -> (edge) { edge.current.size }
|
100
112
|
}
|
101
113
|
end
|
102
114
|
|
103
|
-
def
|
104
|
-
@options[:
|
115
|
+
def calc_edge_cost(edge)
|
116
|
+
@options[:calc_edge_cost].call(edge)
|
105
117
|
end
|
106
118
|
|
107
|
-
def
|
108
|
-
@options[:
|
119
|
+
def translation_unit_filter
|
120
|
+
@options[:translation_unit_filter]
|
109
121
|
end
|
110
122
|
|
111
123
|
def translation_units
|
112
|
-
@translation_units ||= @tmx.body.translation_units.select
|
113
|
-
segment = tu.variant(@lang).segment
|
114
|
-
segment_length = segment.text.length
|
115
|
-
|
116
|
-
next false if segment_length < min_segment_length
|
117
|
-
next false if max_segment_length && max_segment_length < segment_length
|
118
|
-
|
119
|
-
true
|
120
|
-
}
|
124
|
+
@translation_units ||= @tmx.body.translation_units.select(&translation_unit_filter)
|
121
125
|
end
|
122
126
|
end
|
123
127
|
end
|
data/lib/konjak/tmx_segmentor.rb
CHANGED
@@ -11,7 +11,7 @@ module Konjak
|
|
11
11
|
}
|
12
12
|
|
13
13
|
def segments
|
14
|
-
strategy.
|
14
|
+
strategy.segments
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
@@ -33,7 +33,7 @@ module Konjak
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def strategy
|
36
|
-
STRATEGIES[format].new(tmx, lang, @options)
|
36
|
+
STRATEGIES[format].new(tmx, lang, content, @options)
|
37
37
|
end
|
38
38
|
|
39
39
|
end
|
data/lib/konjak/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: konjak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seiei Higa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|