konjak 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7b6348fb462dc7f40f7199c009882b589b3013f1
4
- data.tar.gz: fcca5beda91ef23b2386e22367697e76d5e37428
3
+ metadata.gz: 219a842656b4743ac6f782fdca2167dcf85e51ab
4
+ data.tar.gz: 2a1c74e8771791b5ac9dab000a7d556f88a14335
5
5
  SHA512:
6
- metadata.gz: d46050f4b09475b2fb426b60551ce2a4f1dd9697878d131033e9ce53936336d7024a4ed102cdd307d6ca4533f0f40d134bd2781a931d11abc47f19bbc32be9d4
7
- data.tar.gz: 8597872f8d75b6ad1c28e48e86aca55a727eb5f4237182af87d1eece9ddf7657831ad2b5463cdd67d2739e7fbcfc65c935858fddbb0b30f94a31591b769f613a
6
+ metadata.gz: f26891f463c259af8eee10698208a6df50c92b49c5b0f0a5065feadcccd10fe45733b8598eec5e47d35286feb519156aa8b82600576eeccd2d6cf39ecaf29daf
7
+ data.tar.gz: c88fe8b6fdbab45f4f5ddcc8c851869e47b62fc7bfc800676dba0f63352e193ffb453b15f95244eb364398b5c841629f2ce2d9df5b9edeca2a7b23e7c5c8caad
data/lib/konjak/body.rb CHANGED
@@ -1,8 +1,10 @@
1
1
  module Konjak
2
2
  class Body < StructuralElement
3
+ TAG_NAME = 'body'
4
+
3
5
  # childrens
4
6
  def translation_units
5
- children.select {|c| c.name == 'tu' }.map {|tu| TranslationUnit.new(tu) }
7
+ children.select {|c| c.name == TranslationUnit::TAG_NAME }.map! {|tu| TranslationUnit.new(tu) }
6
8
  end
7
9
 
8
10
  # methods
@@ -1,7 +1,10 @@
1
+ require 'mem'
1
2
  require 'delegate'
2
3
 
3
4
  module Konjak
4
5
  class Element < Delegator
6
+ include Mem
7
+
5
8
  def initialize(__element__)
6
9
  @__element__ = __element__
7
10
  end
data/lib/konjak/header.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class Header < StructuralElement
3
+ TAG_NAME = 'header'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:creation_tool, :creationtool, required: true)
5
7
  tmx_attr_accessor(:creation_tool_version, :creationtoolversion, required: true)
@@ -18,15 +20,15 @@ module Konjak
18
20
 
19
21
  # childrens
20
22
  def notes
21
- children.select {|c| c.name == 'note' }.map {|n| Note.new(n) }
23
+ children.select {|c| c.name == Note::TAG_NAME }.map! {|n| Note.new(n) }
22
24
  end
23
25
 
24
26
  def user_defined_encodings
25
- children.select {|c| c.name == 'ude' }.map {|n| UserDefinedEncoding.new(n) }
27
+ children.select {|c| c.name == UserDefinedEncoding::TAG_NAME }.map! {|n| UserDefinedEncoding.new(n) }
26
28
  end
27
29
 
28
30
  def properties
29
- children.select {|c| c.name == 'prop' }.map {|n| Property.new(n) }
31
+ children.select {|c| c.name == Property::TAG_NAME }.map! {|n| Property.new(n) }
30
32
  end
31
33
 
32
34
  # methods
@@ -11,7 +11,7 @@ module Konjak
11
11
  # Zero, one or more of the following elements: <bpt>, <ept>, <it>, <ph>, and <hi>.
12
12
  # They can be in any order, except that each <bpt> element must have a subsequent corresponding <ept> element.
13
13
  def can_contain?(element)
14
- [Text, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
14
+ [String, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
15
15
  end
16
16
  end
17
17
  end
@@ -1,18 +1,19 @@
1
1
  module Konjak
2
2
  class HtmlSegmentor < Segmentor
3
+ SEGMENTS_PATTERNS = [
4
+ %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td)>(.*?)</\k<start>>}m,
5
+ %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td) [^>]*?>(.*?)</\k<start>>}m,
6
+ %r{<div>(.*?)</div>}m,
7
+ %r{<div [^>]*?>(.*?)</div>}m
8
+ ]
9
+
3
10
  def segments
4
11
  segments = [content.dup]
5
12
 
6
13
  begin
7
14
  size = segments.size
8
15
 
9
- segments_patterns = [
10
- %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td)>(.*?)</\k<start>>}m,
11
- %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td) [^>]*?>(.*?)</\k<start>>}m,
12
- %r{<div>(.*?)</div>}m,
13
- %r{<div [^>]*?>(.*?)</div>}m
14
- ]
15
- segments_patterns.each do |pattern|
16
+ SEGMENTS_PATTERNS.each do |pattern|
16
17
  segments.map! do |s|
17
18
  s.partition(pattern)
18
19
  end
data/lib/konjak/map.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class Map < StructuralElement
3
+ TAG_NAME = 'map'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:unicode, required: true)
5
7
 
data/lib/konjak/note.rb CHANGED
@@ -1,17 +1,19 @@
1
1
  module Konjak
2
2
  class Note < StructuralElement
3
+ TAG_NAME = 'note'
4
+
3
5
  # optional attrs
4
6
  tmx_attr_accessor(:xml_lang, :'xml:lang')
5
7
  tmx_attr_accessor(:o_encoding, :"o-encoding")
6
8
 
7
9
  # childrens
8
10
  def text
9
- Text.new(super)
11
+ super
10
12
  end
11
13
 
12
14
  # methods
13
15
  def can_contain?(element)
14
- Text === element
16
+ String === element
15
17
  end
16
18
  end
17
19
  end
@@ -1,31 +1,33 @@
1
1
  module Konjak
2
2
  class PolytexSegmentor < Segmentor
3
+
4
+ SEGMENTS_PATTERNS = [
5
+ /\\begin\{(?<start>[^\}]+)\}([\n.]*?)\\end\{\k<start>\}/m,
6
+ /(?<=\\chapter\{)[^\}]+(?=\})/,
7
+ /(?<=\\section\{)[^\}]+(?=\})/,
8
+ /(?<=\\subsection\{)[^\}]+(?=\})/,
9
+ /\\footnote\{(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+\}/m,
10
+ /(?<=\\footnote\{)(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+(?=\})/m,
11
+ /(?<=\\codecaption\{).+(?= \\|\}$)/,
12
+ /(?<=\\caption\{).+(?=\\label\{.*\}\}$)/,
13
+ /(?<=\n)^.*$(?=\n)/m,
14
+ /# .*$/,
15
+ /(?<=^).+?[\.\?\!](?= |\n|\t)/,
16
+ /(?<=\()[^\.\n]+[\.\?\!](?=\))/,
17
+ /^ (?=[\w\\]+)/,
18
+ /^\s+% .*$/,
19
+ /^$/,
20
+ /\\noindent /,
21
+ /\\item /,
22
+ ]
23
+
3
24
  def segments
4
25
  segments = [content.dup]
5
26
 
6
27
  begin
7
28
  size = segments.size
8
29
 
9
- segments_patterns = [
10
- /\\begin\{(?<start>[^\}]+)\}([\n.]*?)\\end\{\k<start>\}/m,
11
- /(?<=\\chapter\{)[^\}]+(?=\})/,
12
- /(?<=\\section\{)[^\}]+(?=\})/,
13
- /(?<=\\subsection\{)[^\}]+(?=\})/,
14
- /\\footnote\{(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+\}/m,
15
- /(?<=\\footnote\{)(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+(?=\})/m,
16
- /(?<=\\codecaption\{).+(?= \\|\}$)/,
17
- /(?<=\\caption\{).+(?=\\label\{.*\}\}$)/,
18
- /(?<=\n)^.*$(?=\n)/m,
19
- /# .*$/,
20
- /(?<=^).+?[\.\?\!](?= |\n|\t)/,
21
- /(?<=\()[^\.\n]+[\.\?\!](?=\))/,
22
- /^ (?=[\w\\]+)/,
23
- /^\s+% .*$/,
24
- /^$/,
25
- /\\noindent /,
26
- /\\item /,
27
- ]
28
- segments_patterns.each do |pattern|
30
+ SEGMENTS_PATTERNS.each do |pattern|
29
31
  segments.map! do |s|
30
32
  s.partition(pattern)
31
33
  end
@@ -37,4 +39,5 @@ module Konjak
37
39
  segments
38
40
  end
39
41
  end
42
+
40
43
  end
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class Property < StructuralElement
3
+ TAG_NAME = 'prop'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:type, required: true)
5
7
 
@@ -9,14 +11,14 @@ module Konjak
9
11
 
10
12
  # childrens
11
13
  def text
12
- Text.new(super)
14
+ super
13
15
  end
14
16
 
15
17
  # methods
16
18
  def can_contain?(element)
17
19
  # FIXME
18
20
  # Tool-specific data or text.
19
- Text === element
21
+ String === element
20
22
  end
21
23
 
22
24
  def unpublished?
@@ -18,7 +18,6 @@ module Konjak
18
18
  regexp.gsub!(/(?<!^)(?:\\s)+(?!$)/) {|s| s + '++' }
19
19
  Regexp.compile(regexp)
20
20
  end
21
- memoize :compile_gtt_html_pattern
22
21
 
23
22
  def extract_gtt_tags_from(text)
24
23
  m = text.match(compile_gtt_html_pattern)
@@ -4,26 +4,29 @@ require 'mem'
4
4
  module Konjak
5
5
  # container
6
6
  class Segment < StructuralElement
7
+ TAG_NAME = 'seg'
8
+ WHITE_SPACE_PATTERN_TEXT = '\s'
9
+ POSSESSIVE_QUALIFIER = '++'
10
+
7
11
  include GTT
8
12
  include Mem
9
13
 
10
14
  # children
11
15
  def text
12
- Text.new(super)
16
+ super
13
17
  end
14
18
 
15
19
  # methods
16
20
  def can_contain?(element)
17
- [Text, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Highlight].any? {|c| c === element }
21
+ [String, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Highlight].any? {|c| c === element }
18
22
  end
19
23
 
20
24
  def compile_pattern
21
25
  regexp = Regexp.escape(text)
22
- regexp.gsub!(/(?<!^)\\\s/) { '\s' }
23
- regexp.gsub!(/(?<!^)(?:\\s)+(?!$)/) {|s| s + '++' }
26
+ regexp.gsub!(/(?<!^)\\\s/) { WHITE_SPACE_PATTERN_TEXT }
27
+ regexp.gsub!(/(?<!^)(?:\\s)+(?!$)/) {|s| s + POSSESSIVE_QUALIFIER }
24
28
  Regexp.compile(regexp)
25
29
  end
26
- memoize :compile_pattern
27
30
 
28
31
  def translation_unit
29
32
  TranslationUnit.new(translation_unit_variant.parent)
@@ -12,7 +12,7 @@ module Konjak
12
12
  # They can be in any order, except that each <bpt> element must have a subsequent corresponding <ept> element.
13
13
 
14
14
  def can_contain?(element)
15
- [Text, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
15
+ [String, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
16
16
  end
17
17
  end
18
18
  end
data/lib/konjak/tmx.rb CHANGED
@@ -7,12 +7,12 @@ module Konjak
7
7
 
8
8
  # required element
9
9
  def header
10
- Header.new(root.at_xpath('header'))
10
+ Header.new(root.at_xpath(Header::TAG_NAME))
11
11
  end
12
12
 
13
13
  # required element
14
14
  def body
15
- Body.new(root.at_xpath('body'))
15
+ Body.new(root.at_xpath(Body::TAG_NAME))
16
16
  end
17
17
 
18
18
  # FIXME
@@ -18,11 +18,13 @@ module Konjak
18
18
  translation_units.each do |translation_unit|
19
19
  segment = translation_unit.variant(@lang).segment
20
20
 
21
+ pat = compile_pattern(segment)
22
+
21
23
  segments.map! {|text|
22
24
  next text if text.length < min_segment_length
23
25
  next text if text.is_a?(SegmentString)
24
26
 
25
- split(segment, text)
27
+ split(pat, segment, text)
26
28
  }.flatten!
27
29
  end
28
30
  segments
@@ -38,13 +40,16 @@ module Konjak
38
40
  @options[:min_segment_length]
39
41
  end
40
42
 
41
- def split(segment, text)
43
+ def split(pat, segment, text)
42
44
  texts = []
43
45
  while true
44
46
  break if text.length < min_segment_length
45
47
 
46
- head, match, tail = text.partition(compile_pattern(segment))
47
- break if match.empty?
48
+ break unless text =~ pat
49
+
50
+ head = $`
51
+ match = $&
52
+ tail = $'
48
53
 
49
54
  texts << head unless head.empty?
50
55
 
@@ -62,7 +67,6 @@ module Konjak
62
67
  tu.variant(@lang).segment.text.length < min_segment_length
63
68
  }
64
69
  end
65
- memoize :translation_units
66
70
  end
67
71
  end
68
72
  end
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class TranslationUnit < StructuralElement
3
+ TAG_NAME = 'tu'
4
+
3
5
  # optional attrs
4
6
  tmx_attr_accessor(:tuid)
5
7
  tmx_attr_accessor(:o_encoding, :"o-encoding")
@@ -19,8 +21,9 @@ module Konjak
19
21
 
20
22
  # childrens
21
23
  def variants
22
- children.select {|c| c.name == 'tuv' }.map {|tuv| TranslationUnitVariant.new(tuv) }
24
+ children.select {|c| c.name == TranslationUnitVariant::TAG_NAME }.map! {|tuv| TranslationUnitVariant.new(tuv) }
23
25
  end
26
+ memoize :variants
24
27
 
25
28
  # methods
26
29
  def can_contain?(element)
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class TranslationUnitVariant < StructuralElement
3
+ TAG_NAME = 'tuv'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:xml_lang, :'xml:lang', required: true)
5
7
 
@@ -18,16 +20,17 @@ module Konjak
18
20
 
19
21
  # childrens
20
22
  def notes
21
- children.select {|c| c.name == 'note' }.map {|n| Note.new(n) }
23
+ children.select {|c| c.name == 'note' }.map! {|n| Note.new(n) }
22
24
  end
23
25
 
24
26
  def properties
25
- children.select {|c| c.name == 'prop' }.map {|n| Property.new(n) }
27
+ children.select {|c| c.name == 'prop' }.map! {|n| Property.new(n) }
26
28
  end
27
29
 
28
30
  def segment
29
- Segment.new(children.detect {|c| c.name == 'seg' })
31
+ Segment.new(children.detect {|c| c.name == Segment::TAG_NAME })
30
32
  end
33
+ memoize :segment
31
34
 
32
35
  # methods
33
36
 
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class UserDefinedEncoding < StructuralElement
3
+ TAG_NAME = 'ude'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:name, required: true)
5
7
 
@@ -10,7 +12,7 @@ module Konjak
10
12
 
11
13
  # childrens
12
14
  def maps
13
- children.select {|c| c.name == 'map' }.map {|n| Map.new(n) }
15
+ children.select {|c| c.name == Map::TAG_NAME }.map! {|n| Map.new(n) }
14
16
  end
15
17
 
16
18
  # methods
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.14"
2
+ VERSION = "0.0.15"
3
3
  end
data/lib/konjak.rb CHANGED
@@ -3,7 +3,6 @@ require 'konjak/parser'
3
3
 
4
4
  # not elements
5
5
  require 'konjak/code_data'
6
- require 'konjak/text'
7
6
 
8
7
  # elements
9
8
  require 'konjak/element'
@@ -38,7 +38,7 @@ describe Konjak do
38
38
 
39
39
  its(:xml_lang) { is_expected.to eq 'en' }
40
40
  its(:o_encoding) { is_expected.to eq 'iso-8859-1' }
41
- its(:text) { is_expected.to be_instance_of Konjak::Text }
41
+ its(:text) { is_expected.to be_instance_of String }
42
42
 
43
43
  describe 'text' do
44
44
  subject { super().text }
@@ -90,7 +90,7 @@ describe Konjak do
90
90
  its(:xml_lang) { is_expected.to eq 'en' }
91
91
  its(:o_encoding) { is_expected.to eq 'iso-8859-1' }
92
92
  its(:type) { is_expected.to eq 'RTFPreamble' }
93
- its(:text) { is_expected.to be_instance_of Konjak::Text }
93
+ its(:text) { is_expected.to be_instance_of String }
94
94
 
95
95
  describe '.text' do
96
96
  subject { super().text }
@@ -141,7 +141,7 @@ describe Konjak do
141
141
  describe '.segment' do
142
142
  subject { super().segment }
143
143
 
144
- its(:text) { is_expected.to be_instance_of Konjak::Text }
144
+ its(:text) { is_expected.to be_instance_of String }
145
145
 
146
146
  describe '.text' do
147
147
  subject { super().text }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-25 00:00:00.000000000 Z
11
+ date: 2015-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mem
@@ -161,7 +161,6 @@ files:
161
161
  - lib/konjak/segmentor.rb
162
162
  - lib/konjak/structural_element.rb
163
163
  - lib/konjak/sub_flow.rb
164
- - lib/konjak/text.rb
165
164
  - lib/konjak/tmx.rb
166
165
  - lib/konjak/tmx_segmentor.rb
167
166
  - lib/konjak/tmx_segmentor/gtt_html_strategy.rb
@@ -216,4 +215,3 @@ test_files:
216
215
  - spec/konjak_translate_spec.rb
217
216
  - spec/spec_helper.rb
218
217
  - spec/support/equal_xml_matcher.rb
219
- has_rdoc:
data/lib/konjak/text.rb DELETED
@@ -1,4 +0,0 @@
1
- module Konjak
2
- class Text < String
3
- end
4
- end