konjak 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b3737eb910538d760657c5e527b35be1fc9599f
4
- data.tar.gz: 85f858f6a152cf53b32e6b831b737d8729712437
3
+ metadata.gz: cd181dadc5d9128c27be3312f50274acffbcfce8
4
+ data.tar.gz: 29fa12fb01c4409b7587496413b0fda0c46ab0a4
5
5
  SHA512:
6
- metadata.gz: 38622d0a0d77bb334554e0cb5fde0cf8226c1eab06bfec5e86e5a202174c61acd9b83de34ec297299acd4974b1cc85198500591685daaf4e2d7adf77c435ed1f
7
- data.tar.gz: bb75de764a2a6722e5435be2f3ca3acd6d39147013630892a8072fadeefadc3bee28f3614327668d98f5843d3ac74756600b1168dfa56def824b70a8253e4430
6
+ metadata.gz: 188010cd203698454c7f2204c20e6a6e19a18a7eb25640d0c9555e011568ff87fe96ba521ab04c1c42975c59636e634992144a825980f9398abb12e72f913996
7
+ data.tar.gz: ba9cf03f27fe8599815060771ea437fb852b95e0a5f3f1e06524e9ef0c5930b0a373c9fca08cbffd83c44b5238acd51403058775a83fe328a3550c7424cd88ee
@@ -1,18 +1,26 @@
1
+ require 'mem'
2
+
1
3
  module Konjak
2
4
  class Segment < StructuralElement
3
5
  module GTT
6
+ include Mem
7
+
4
8
  Tag = Struct.new(:gtt, :html)
5
9
 
6
10
  def compile_gtt_html_pattern
7
11
  regexp = Regexp.escape(text)
8
12
  gtt_tag_ns.each do |n|
9
- regexp = regexp.sub(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
10
- regexp = regexp.gsub(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
11
- regexp = regexp.gsub(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
13
+ regexp.sub!(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
14
+ regexp.gsub!(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
15
+ regexp.gsub!(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
12
16
  end
13
- regexp = regexp.gsub(/(?:\\\s|\n)/m) { '\s' }
17
+ regexp.gsub!(/(?:\\\s|\n)/m) { '\s' }
18
+ regexp.gsub!(/(?:\\s)+/m) {|s| s + '++' }
19
+ regexp.gsub!(/^(?<s>(?:\\s)+)\+\+/) { $~[:s] }
20
+ regexp.gsub!(/(?<s>(?:\\s)+)\+\+$/) { $~[:s] }
14
21
  Regexp.compile(regexp)
15
22
  end
23
+ memoize :compile_gtt_html_pattern
16
24
 
17
25
  def extract_gtt_tags_from(text)
18
26
  m = text.match(compile_gtt_html_pattern)
@@ -1,9 +1,11 @@
1
1
  require 'konjak/segment/gtt'
2
+ require 'mem'
2
3
 
3
4
  module Konjak
4
5
  # container
5
6
  class Segment < StructuralElement
6
7
  include GTT
8
+ include Mem
7
9
 
8
10
  # children
9
11
  def text
@@ -17,9 +19,13 @@ module Konjak
17
19
 
18
20
  def compile_pattern
19
21
  regexp = Regexp.escape(text)
20
- regexp = regexp.gsub(/(?:\\\s|\n)/m) { '\s' }
22
+ regexp = regexp.gsub(/(?:\\\s|\n)/m) { '\s' }
23
+ regexp = regexp.gsub(/(?:\\s)+/m) {|s| s + '++' }
24
+ regexp = regexp.gsub(/^(?<s>(?:\\s)+)\+\+/) { $~[:s] }
25
+ regexp = regexp.gsub(/(?<s>(?:\\s)+)\+\+$/) { $~[:s] }
21
26
  Regexp.compile(regexp)
22
27
  end
28
+ memoize :compile_pattern
23
29
 
24
30
  def translation_unit
25
31
  TranslationUnit.new(translation_unit_variant.parent)
@@ -7,21 +7,8 @@ module Konjak
7
7
 
8
8
  private
9
9
 
10
- def split(translation_unit, text)
11
- segment = translation_unit.variant(@lang).segment
12
- pattern = segment.compile_gtt_html_pattern
13
-
14
- texts = []
15
- while true
16
- head, match, tail = text.partition(pattern)
17
- break if match.empty? || text.length < min_segment_length
18
- texts << head unless head.empty?
19
-
20
- texts << SegmentString.new(match, segment)
21
-
22
- text = tail
23
- end
24
- texts << text
10
+ def compile_pattern(segment)
11
+ segment.compile_gtt_html_pattern
25
12
  end
26
13
  end
27
14
  end
@@ -16,10 +16,13 @@ module Konjak
16
16
  def segmentize(text)
17
17
  segments = [text]
18
18
  translation_units.each do |translation_unit|
19
+ segment = translation_unit.variant(@lang).segment
20
+
19
21
  segments.map! {|text|
22
+ next text if text.length < min_segment_length
20
23
  next text if text.is_a?(SegmentString)
21
24
 
22
- split(translation_unit, text)
25
+ split(segment, text)
23
26
  }.flatten!
24
27
  end
25
28
  segments
@@ -35,6 +38,23 @@ module Konjak
35
38
  @options[:min_segment_length]
36
39
  end
37
40
 
41
+ def split(segment, text)
42
+ texts = []
43
+ while true
44
+ break if text.length < min_segment_length
45
+
46
+ head, match, tail = text.partition(compile_pattern(segment))
47
+ break if match.empty?
48
+
49
+ texts << head unless head.empty?
50
+
51
+ texts << SegmentString.new(match, segment)
52
+
53
+ text = tail
54
+ end
55
+ texts << text
56
+ end
57
+
38
58
  def translation_units
39
59
  @tmx.body.translation_units.sort_by {|tu|
40
60
  -tu.variant(@lang).segment.text.length
@@ -7,21 +7,8 @@ module Konjak
7
7
 
8
8
  private
9
9
 
10
- def split(translation_unit, text)
11
- segment = translation_unit.variant(@lang).segment
12
- pattern = segment.compile_pattern
13
-
14
- texts = []
15
- while true
16
- head, match, tail = text.partition(pattern)
17
- break if match.empty? || text.length < min_segment_length
18
- texts << head unless head.empty?
19
-
20
- texts << SegmentString.new(match, segment)
21
-
22
- text = tail
23
- end
24
- texts << text
10
+ def compile_pattern(segment)
11
+ segment.compile_pattern
25
12
  end
26
13
  end
27
14
  end
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.12"
2
+ VERSION = "0.0.13"
3
3
  end
@@ -19,8 +19,10 @@ EXPECT
19
19
 
20
20
  context 'when blanks between words is not exactly match' do
21
21
  let(:doc) { <<DOC }
22
- this is data
23
- (with a non-standard character: ).
22
+ this is data (with a non-standard
23
+
24
+
25
+ character: ).
24
26
  this is data (with a non-standard character: ).
25
27
  DOC
26
28
 
@@ -54,7 +56,7 @@ EXPECT
54
56
  context 'when blanks between words is not exactly match' do
55
57
  let(:doc) { <<GTT_HTML }
56
58
  This is
57
- <a href="http://example.com">example</a>.
59
+ <a href="http://example.com">example</a>.
58
60
  And This
59
61
  is
60
62
  <b>example</b>. Yey.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa