konjak 0.0.19 → 0.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 768f8768ce6cf9b74a7efdbc099fe192d8315263
4
- data.tar.gz: 6ea6d582ac4df35043317bc274bf9391716fe5f3
3
+ metadata.gz: ffdb00c55006924feabdbc94d04742f2469f047b
4
+ data.tar.gz: d8df2981f8610a26295b044893f1409b3f3c0284
5
5
  SHA512:
6
- metadata.gz: 90a225fc3be21ca3a273ff7304c5c803d72ed372449a53143090909b48e4272ba8cf9f47bc0522dc502e50582ebcd0c76c3dffce4ef605c7322e3448c9a0ab48
7
- data.tar.gz: cdf1f709324975a459b808e7c855efcfa981cee5f96839905ab3744cd52fa80a7247f199957617059b1f509211582aa32558c1a8e666b827ae303a503f29f6b2
6
+ metadata.gz: 0a6205a76ce7c53e181b14d75109e12bfb6d3d55781b30f29d1895824a47261ac85277fdf9aa100c91b5e628d8eeb15445defaf3e4ebe74a796ea6e3af854c62
7
+ data.tar.gz: 1bdec79054206addf2b551ddc53ebc22d4e70a8492a526655b8d81d5504d64d8f61e67e49e62789b5353ba52436302be04d99895f2a8d947de43aec42d0dfc03
@@ -7,117 +7,121 @@ require 'konjak/tmx_segmentor/segment_string'
7
7
  module Konjak
8
8
  class TmxSegmentor < Segmentor
9
9
  class Strategy
10
+ Edge = Struct.new(:prev, :current)
11
+ Node = Struct.new(:range, :segment)
12
+ Node::None = -1
13
+
10
14
  include Mem
11
15
 
12
- def initialize(tmx, lang, options = {})
13
- @tmx = tmx
14
- @lang = lang
16
+ def initialize(tmx, lang, text, options = {})
17
+ @tmx = tmx
18
+ @lang = lang
19
+ @text = text
15
20
  @options = default_options.merge(options)
16
21
  end
17
22
 
18
- def segmentize(text)
19
- range_segment_pairs = []
20
-
21
- translation_units.each {|tu|
22
- segment = tu.variant(@lang).segment
23
- text.scan(compile_pattern(segment)) {
24
- range_segment_pairs << [($~.begin(0)...$~.end(0)), segment]
25
- }
26
- }
27
-
23
+ def segments
28
24
  # Can't split text
29
- return [text] if range_segment_pairs.empty?
30
-
31
- range_segment_pairs.uniq! {|rsp| [rsp[0], rsp[1].text] }
32
- range_segment_pairs.sort_by! {|(m, s)|
33
- [m.begin, -s.text.size]
34
- }
35
-
36
- max_weight_range_segments = max_weight_range_segments(range_segment_pairs)
25
+ return [@text] if nodes.empty?
37
26
 
38
27
  segments = []
39
28
  prev_text_index = 0
40
- max_weight_range_segments.each do |(range, segment)|
41
- prev_text = text[prev_text_index...range.begin]
29
+ max_cost_path.each do |node|
30
+ range = node.range
31
+ segment = node.segment
32
+ prev_text = @text[prev_text_index...range.begin]
42
33
 
43
34
  segments << prev_text unless prev_text.empty?
44
35
 
45
- segments << SegmentString.new(text[range.begin, range.size], segment)
36
+ segments << SegmentString.new(@text[range.begin, range.size], segment)
46
37
 
47
38
  prev_text_index = range.end
48
39
  end
49
- after_text = text[prev_text_index..-1]
40
+ after_text = @text[prev_text_index..-1]
50
41
  segments << after_text unless after_text.empty?
51
42
  segments
52
43
  end
53
44
 
54
45
 
55
- def max_weight_range_segments(range_segment_pairs)
56
- edges = []
57
- prev_nodes = Array.new(range_segment_pairs.size, -1)
58
- weights = range_segment_pairs.map {|rsp| rsp[0].size }
46
+ def max_cost_path
47
+ prev_nodes = nodes.map {|node| [node, Node::None] }.to_h
48
+ costs = nodes.map {|node| [node, node.range.size] }.to_h
59
49
 
60
- range_segment_pairs.each_with_index do |rsp, rsp_i|
61
- ((rsp_i + 1)...range_segment_pairs.size).each do |rsp2_i|
62
- rsp2 = range_segment_pairs[rsp2_i]
50
+ edges.each do |edge|
51
+ node, node2 = edge.prev, edge.current
52
+ new_node2_cost = costs[node] + calc_edge_cost(edge)
63
53
 
64
- next if rsp2[0].begin < rsp[0].end
65
-
66
- edges << [rsp_i, rsp2_i]
54
+ if costs[node2] < new_node2_cost
55
+ costs[node2] = new_node2_cost
56
+ prev_nodes[node2] = node
67
57
  end
68
58
  end
69
59
 
70
- edges.each do |(rsp_i, rsp2_i)|
71
- new_rsp2_weight = weights[rsp_i] + range_segment_pairs[rsp2_i][0].size
60
+ node, _ = costs.max_by {|_, cost| cost }
72
61
 
73
- if weights[rsp2_i] < new_rsp2_weight
74
- weights[rsp2_i] = new_rsp2_weight
75
- prev_nodes[rsp2_i] = rsp_i
62
+ max_cost_node_indexes = Enumerator.new {|y|
63
+ loop do
64
+ break if node == Node::None
65
+ y << node
66
+ node = prev_nodes[node]
76
67
  end
77
- end
68
+ }.to_a.reverse
69
+ end
78
70
 
79
- node_index = weights.index(weights.max)
71
+ private
80
72
 
81
- max_weight_range_segment_indexes = Enumerator.new {|y|
82
- loop do
83
- break if node_index == -1
84
- y << node_index
85
- node_index = prev_nodes[node_index]
73
+ def edges
74
+ return @edges if @edges
75
+
76
+ @edges = []
77
+ nodes.each_with_index do |node, node_i|
78
+ ((node_i + 1)...nodes.size).each do |node2_i|
79
+ node2 = nodes[node2_i]
80
+
81
+ next if node2.range.begin < node.range.end
82
+
83
+ @edges << Edge.new(node, node2)
86
84
  end
87
- }.to_a.reverse
85
+ end
88
86
 
89
- max_weight_range_segment_indexes.map {|i|
90
- range_segment_pairs[i]
91
- }
87
+ @edges
92
88
  end
93
89
 
94
- private
90
+ def nodes
91
+ return @nodes if @nodes
92
+
93
+ @nodes = []
94
+
95
+ translation_units.each {|tu|
96
+ segment = tu.variant(@lang).segment
97
+ @text.scan(compile_pattern(segment)) {
98
+ @nodes << Node.new(($~.begin(0)...$~.end(0)), segment)
99
+ }
100
+ }
101
+
102
+ @nodes.uniq! {|node| [node.range, node.segment.text] }
103
+ @nodes.sort_by! {|node| [node.range.begin, -node.segment.text.size] }
104
+
105
+ @nodes
106
+ end
95
107
 
96
108
  def default_options
97
109
  {
98
- min_segment_length: 10,
99
- max_segment_length: nil
110
+ translation_unit_filter: -> (tu) { true },
111
+ calc_edge_cost: -> (edge) { edge.current.size }
100
112
  }
101
113
  end
102
114
 
103
- def min_segment_length
104
- @options[:min_segment_length]
115
+ def calc_edge_cost(edge)
116
+ @options[:calc_edge_cost].call(edge)
105
117
  end
106
118
 
107
- def max_segment_length
108
- @options[:max_segment_length]
119
+ def translation_unit_filter
120
+ @options[:translation_unit_filter]
109
121
  end
110
122
 
111
123
  def translation_units
112
- @translation_units ||= @tmx.body.translation_units.select {|tu|
113
- segment = tu.variant(@lang).segment
114
- segment_length = segment.text.length
115
-
116
- next false if segment_length < min_segment_length
117
- next false if max_segment_length && max_segment_length < segment_length
118
-
119
- true
120
- }
124
+ @translation_units ||= @tmx.body.translation_units.select(&translation_unit_filter)
121
125
  end
122
126
  end
123
127
  end
@@ -11,7 +11,7 @@ module Konjak
11
11
  }
12
12
 
13
13
  def segments
14
- strategy.segmentize(content)
14
+ strategy.segments
15
15
  end
16
16
 
17
17
  private
@@ -33,7 +33,7 @@ module Konjak
33
33
  end
34
34
 
35
35
  def strategy
36
- STRATEGIES[format].new(tmx, lang, @options)
36
+ STRATEGIES[format].new(tmx, lang, content, @options)
37
37
  end
38
38
 
39
39
  end
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.19"
2
+ VERSION = "0.0.20"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.19
4
+ version: 0.0.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-29 00:00:00.000000000 Z
11
+ date: 2015-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport