konjak 0.0.19 → 0.0.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 768f8768ce6cf9b74a7efdbc099fe192d8315263
4
- data.tar.gz: 6ea6d582ac4df35043317bc274bf9391716fe5f3
3
+ metadata.gz: ffdb00c55006924feabdbc94d04742f2469f047b
4
+ data.tar.gz: d8df2981f8610a26295b044893f1409b3f3c0284
5
5
  SHA512:
6
- metadata.gz: 90a225fc3be21ca3a273ff7304c5c803d72ed372449a53143090909b48e4272ba8cf9f47bc0522dc502e50582ebcd0c76c3dffce4ef605c7322e3448c9a0ab48
7
- data.tar.gz: cdf1f709324975a459b808e7c855efcfa981cee5f96839905ab3744cd52fa80a7247f199957617059b1f509211582aa32558c1a8e666b827ae303a503f29f6b2
6
+ metadata.gz: 0a6205a76ce7c53e181b14d75109e12bfb6d3d55781b30f29d1895824a47261ac85277fdf9aa100c91b5e628d8eeb15445defaf3e4ebe74a796ea6e3af854c62
7
+ data.tar.gz: 1bdec79054206addf2b551ddc53ebc22d4e70a8492a526655b8d81d5504d64d8f61e67e49e62789b5353ba52436302be04d99895f2a8d947de43aec42d0dfc03
@@ -7,117 +7,121 @@ require 'konjak/tmx_segmentor/segment_string'
7
7
  module Konjak
8
8
  class TmxSegmentor < Segmentor
9
9
  class Strategy
10
+ Edge = Struct.new(:prev, :current)
11
+ Node = Struct.new(:range, :segment)
12
+ Node::None = -1
13
+
10
14
  include Mem
11
15
 
12
- def initialize(tmx, lang, options = {})
13
- @tmx = tmx
14
- @lang = lang
16
+ def initialize(tmx, lang, text, options = {})
17
+ @tmx = tmx
18
+ @lang = lang
19
+ @text = text
15
20
  @options = default_options.merge(options)
16
21
  end
17
22
 
18
- def segmentize(text)
19
- range_segment_pairs = []
20
-
21
- translation_units.each {|tu|
22
- segment = tu.variant(@lang).segment
23
- text.scan(compile_pattern(segment)) {
24
- range_segment_pairs << [($~.begin(0)...$~.end(0)), segment]
25
- }
26
- }
27
-
23
+ def segments
28
24
  # Can't split text
29
- return [text] if range_segment_pairs.empty?
30
-
31
- range_segment_pairs.uniq! {|rsp| [rsp[0], rsp[1].text] }
32
- range_segment_pairs.sort_by! {|(m, s)|
33
- [m.begin, -s.text.size]
34
- }
35
-
36
- max_weight_range_segments = max_weight_range_segments(range_segment_pairs)
25
+ return [@text] if nodes.empty?
37
26
 
38
27
  segments = []
39
28
  prev_text_index = 0
40
- max_weight_range_segments.each do |(range, segment)|
41
- prev_text = text[prev_text_index...range.begin]
29
+ max_cost_path.each do |node|
30
+ range = node.range
31
+ segment = node.segment
32
+ prev_text = @text[prev_text_index...range.begin]
42
33
 
43
34
  segments << prev_text unless prev_text.empty?
44
35
 
45
- segments << SegmentString.new(text[range.begin, range.size], segment)
36
+ segments << SegmentString.new(@text[range.begin, range.size], segment)
46
37
 
47
38
  prev_text_index = range.end
48
39
  end
49
- after_text = text[prev_text_index..-1]
40
+ after_text = @text[prev_text_index..-1]
50
41
  segments << after_text unless after_text.empty?
51
42
  segments
52
43
  end
53
44
 
54
45
 
55
- def max_weight_range_segments(range_segment_pairs)
56
- edges = []
57
- prev_nodes = Array.new(range_segment_pairs.size, -1)
58
- weights = range_segment_pairs.map {|rsp| rsp[0].size }
46
+ def max_cost_path
47
+ prev_nodes = nodes.map {|node| [node, Node::None] }.to_h
48
+ costs = nodes.map {|node| [node, node.range.size] }.to_h
59
49
 
60
- range_segment_pairs.each_with_index do |rsp, rsp_i|
61
- ((rsp_i + 1)...range_segment_pairs.size).each do |rsp2_i|
62
- rsp2 = range_segment_pairs[rsp2_i]
50
+ edges.each do |edge|
51
+ node, node2 = edge.prev, edge.current
52
+ new_node2_cost = costs[node] + calc_edge_cost(edge)
63
53
 
64
- next if rsp2[0].begin < rsp[0].end
65
-
66
- edges << [rsp_i, rsp2_i]
54
+ if costs[node2] < new_node2_cost
55
+ costs[node2] = new_node2_cost
56
+ prev_nodes[node2] = node
67
57
  end
68
58
  end
69
59
 
70
- edges.each do |(rsp_i, rsp2_i)|
71
- new_rsp2_weight = weights[rsp_i] + range_segment_pairs[rsp2_i][0].size
60
+ node, _ = costs.max_by {|_, cost| cost }
72
61
 
73
- if weights[rsp2_i] < new_rsp2_weight
74
- weights[rsp2_i] = new_rsp2_weight
75
- prev_nodes[rsp2_i] = rsp_i
62
+ max_cost_node_indexes = Enumerator.new {|y|
63
+ loop do
64
+ break if node == Node::None
65
+ y << node
66
+ node = prev_nodes[node]
76
67
  end
77
- end
68
+ }.to_a.reverse
69
+ end
78
70
 
79
- node_index = weights.index(weights.max)
71
+ private
80
72
 
81
- max_weight_range_segment_indexes = Enumerator.new {|y|
82
- loop do
83
- break if node_index == -1
84
- y << node_index
85
- node_index = prev_nodes[node_index]
73
+ def edges
74
+ return @edges if @edges
75
+
76
+ @edges = []
77
+ nodes.each_with_index do |node, node_i|
78
+ ((node_i + 1)...nodes.size).each do |node2_i|
79
+ node2 = nodes[node2_i]
80
+
81
+ next if node2.range.begin < node.range.end
82
+
83
+ @edges << Edge.new(node, node2)
86
84
  end
87
- }.to_a.reverse
85
+ end
88
86
 
89
- max_weight_range_segment_indexes.map {|i|
90
- range_segment_pairs[i]
91
- }
87
+ @edges
92
88
  end
93
89
 
94
- private
90
+ def nodes
91
+ return @nodes if @nodes
92
+
93
+ @nodes = []
94
+
95
+ translation_units.each {|tu|
96
+ segment = tu.variant(@lang).segment
97
+ @text.scan(compile_pattern(segment)) {
98
+ @nodes << Node.new(($~.begin(0)...$~.end(0)), segment)
99
+ }
100
+ }
101
+
102
+ @nodes.uniq! {|node| [node.range, node.segment.text] }
103
+ @nodes.sort_by! {|node| [node.range.begin, -node.segment.text.size] }
104
+
105
+ @nodes
106
+ end
95
107
 
96
108
  def default_options
97
109
  {
98
- min_segment_length: 10,
99
- max_segment_length: nil
110
+ translation_unit_filter: -> (tu) { true },
111
+ calc_edge_cost: -> (edge) { edge.current.size }
100
112
  }
101
113
  end
102
114
 
103
- def min_segment_length
104
- @options[:min_segment_length]
115
+ def calc_edge_cost(edge)
116
+ @options[:calc_edge_cost].call(edge)
105
117
  end
106
118
 
107
- def max_segment_length
108
- @options[:max_segment_length]
119
+ def translation_unit_filter
120
+ @options[:translation_unit_filter]
109
121
  end
110
122
 
111
123
  def translation_units
112
- @translation_units ||= @tmx.body.translation_units.select {|tu|
113
- segment = tu.variant(@lang).segment
114
- segment_length = segment.text.length
115
-
116
- next false if segment_length < min_segment_length
117
- next false if max_segment_length && max_segment_length < segment_length
118
-
119
- true
120
- }
124
+ @translation_units ||= @tmx.body.translation_units.select(&translation_unit_filter)
121
125
  end
122
126
  end
123
127
  end
@@ -11,7 +11,7 @@ module Konjak
11
11
  }
12
12
 
13
13
  def segments
14
- strategy.segmentize(content)
14
+ strategy.segments
15
15
  end
16
16
 
17
17
  private
@@ -33,7 +33,7 @@ module Konjak
33
33
  end
34
34
 
35
35
  def strategy
36
- STRATEGIES[format].new(tmx, lang, @options)
36
+ STRATEGIES[format].new(tmx, lang, content, @options)
37
37
  end
38
38
 
39
39
  end
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.19"
2
+ VERSION = "0.0.20"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.19
4
+ version: 0.0.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-29 00:00:00.000000000 Z
11
+ date: 2015-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport