konjak 0.0.12 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b3737eb910538d760657c5e527b35be1fc9599f
4
- data.tar.gz: 85f858f6a152cf53b32e6b831b737d8729712437
3
+ metadata.gz: cd181dadc5d9128c27be3312f50274acffbcfce8
4
+ data.tar.gz: 29fa12fb01c4409b7587496413b0fda0c46ab0a4
5
5
  SHA512:
6
- metadata.gz: 38622d0a0d77bb334554e0cb5fde0cf8226c1eab06bfec5e86e5a202174c61acd9b83de34ec297299acd4974b1cc85198500591685daaf4e2d7adf77c435ed1f
7
- data.tar.gz: bb75de764a2a6722e5435be2f3ca3acd6d39147013630892a8072fadeefadc3bee28f3614327668d98f5843d3ac74756600b1168dfa56def824b70a8253e4430
6
+ metadata.gz: 188010cd203698454c7f2204c20e6a6e19a18a7eb25640d0c9555e011568ff87fe96ba521ab04c1c42975c59636e634992144a825980f9398abb12e72f913996
7
+ data.tar.gz: ba9cf03f27fe8599815060771ea437fb852b95e0a5f3f1e06524e9ef0c5930b0a373c9fca08cbffd83c44b5238acd51403058775a83fe328a3550c7424cd88ee
@@ -1,18 +1,26 @@
1
+ require 'mem'
2
+
1
3
  module Konjak
2
4
  class Segment < StructuralElement
3
5
  module GTT
6
+ include Mem
7
+
4
8
  Tag = Struct.new(:gtt, :html)
5
9
 
6
10
  def compile_gtt_html_pattern
7
11
  regexp = Regexp.escape(text)
8
12
  gtt_tag_ns.each do |n|
9
- regexp = regexp.sub(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
10
- regexp = regexp.gsub(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
11
- regexp = regexp.gsub(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
13
+ regexp.sub!(/\\\{#{n}\\\}/) { "(?<n#{n}><(?<_#{n}>\\w+)[^>]*>)" }
14
+ regexp.gsub!(/\\\{#{n}\\\}/) { "\\k<n#{n}>" }
15
+ regexp.gsub!(/\\\{\/#{n}\\\}/) { "</\\k<_#{n}>>" }
12
16
  end
13
- regexp = regexp.gsub(/(?:\\\s|\n)/m) { '\s' }
17
+ regexp.gsub!(/(?:\\\s|\n)/m) { '\s' }
18
+ regexp.gsub!(/(?:\\s)+/m) {|s| s + '++' }
19
+ regexp.gsub!(/^(?<s>(?:\\s)+)\+\+/) { $~[:s] }
20
+ regexp.gsub!(/(?<s>(?:\\s)+)\+\+$/) { $~[:s] }
14
21
  Regexp.compile(regexp)
15
22
  end
23
+ memoize :compile_gtt_html_pattern
16
24
 
17
25
  def extract_gtt_tags_from(text)
18
26
  m = text.match(compile_gtt_html_pattern)
@@ -1,9 +1,11 @@
1
1
  require 'konjak/segment/gtt'
2
+ require 'mem'
2
3
 
3
4
  module Konjak
4
5
  # container
5
6
  class Segment < StructuralElement
6
7
  include GTT
8
+ include Mem
7
9
 
8
10
  # children
9
11
  def text
@@ -17,9 +19,13 @@ module Konjak
17
19
 
18
20
  def compile_pattern
19
21
  regexp = Regexp.escape(text)
20
- regexp = regexp.gsub(/(?:\\\s|\n)/m) { '\s' }
22
+ regexp = regexp.gsub(/(?:\\\s|\n)/m) { '\s' }
23
+ regexp = regexp.gsub(/(?:\\s)+/m) {|s| s + '++' }
24
+ regexp = regexp.gsub(/^(?<s>(?:\\s)+)\+\+/) { $~[:s] }
25
+ regexp = regexp.gsub(/(?<s>(?:\\s)+)\+\+$/) { $~[:s] }
21
26
  Regexp.compile(regexp)
22
27
  end
28
+ memoize :compile_pattern
23
29
 
24
30
  def translation_unit
25
31
  TranslationUnit.new(translation_unit_variant.parent)
@@ -7,21 +7,8 @@ module Konjak
7
7
 
8
8
  private
9
9
 
10
- def split(translation_unit, text)
11
- segment = translation_unit.variant(@lang).segment
12
- pattern = segment.compile_gtt_html_pattern
13
-
14
- texts = []
15
- while true
16
- head, match, tail = text.partition(pattern)
17
- break if match.empty? || text.length < min_segment_length
18
- texts << head unless head.empty?
19
-
20
- texts << SegmentString.new(match, segment)
21
-
22
- text = tail
23
- end
24
- texts << text
10
+ def compile_pattern(segment)
11
+ segment.compile_gtt_html_pattern
25
12
  end
26
13
  end
27
14
  end
@@ -16,10 +16,13 @@ module Konjak
16
16
  def segmentize(text)
17
17
  segments = [text]
18
18
  translation_units.each do |translation_unit|
19
+ segment = translation_unit.variant(@lang).segment
20
+
19
21
  segments.map! {|text|
22
+ next text if text.length < min_segment_length
20
23
  next text if text.is_a?(SegmentString)
21
24
 
22
- split(translation_unit, text)
25
+ split(segment, text)
23
26
  }.flatten!
24
27
  end
25
28
  segments
@@ -35,6 +38,23 @@ module Konjak
35
38
  @options[:min_segment_length]
36
39
  end
37
40
 
41
+ def split(segment, text)
42
+ texts = []
43
+ while true
44
+ break if text.length < min_segment_length
45
+
46
+ head, match, tail = text.partition(compile_pattern(segment))
47
+ break if match.empty?
48
+
49
+ texts << head unless head.empty?
50
+
51
+ texts << SegmentString.new(match, segment)
52
+
53
+ text = tail
54
+ end
55
+ texts << text
56
+ end
57
+
38
58
  def translation_units
39
59
  @tmx.body.translation_units.sort_by {|tu|
40
60
  -tu.variant(@lang).segment.text.length
@@ -7,21 +7,8 @@ module Konjak
7
7
 
8
8
  private
9
9
 
10
- def split(translation_unit, text)
11
- segment = translation_unit.variant(@lang).segment
12
- pattern = segment.compile_pattern
13
-
14
- texts = []
15
- while true
16
- head, match, tail = text.partition(pattern)
17
- break if match.empty? || text.length < min_segment_length
18
- texts << head unless head.empty?
19
-
20
- texts << SegmentString.new(match, segment)
21
-
22
- text = tail
23
- end
24
- texts << text
10
+ def compile_pattern(segment)
11
+ segment.compile_pattern
25
12
  end
26
13
  end
27
14
  end
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.12"
2
+ VERSION = "0.0.13"
3
3
  end
@@ -19,8 +19,10 @@ EXPECT
19
19
 
20
20
  context 'when blanks between words is not exactly match' do
21
21
  let(:doc) { <<DOC }
22
- this is data
23
- (with a non-standard character: ).
22
+ this is data (with a non-standard
23
+
24
+
25
+ character: ).
24
26
  this is data (with a non-standard character: ).
25
27
  DOC
26
28
 
@@ -54,7 +56,7 @@ EXPECT
54
56
  context 'when blanks between words is not exactly match' do
55
57
  let(:doc) { <<GTT_HTML }
56
58
  This is
57
- <a href="http://example.com">example</a>.
59
+ <a href="http://example.com">example</a>.
58
60
  And This
59
61
  is
60
62
  <b>example</b>. Yey.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa