konjak 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7b6348fb462dc7f40f7199c009882b589b3013f1
4
- data.tar.gz: fcca5beda91ef23b2386e22367697e76d5e37428
3
+ metadata.gz: 219a842656b4743ac6f782fdca2167dcf85e51ab
4
+ data.tar.gz: 2a1c74e8771791b5ac9dab000a7d556f88a14335
5
5
  SHA512:
6
- metadata.gz: d46050f4b09475b2fb426b60551ce2a4f1dd9697878d131033e9ce53936336d7024a4ed102cdd307d6ca4533f0f40d134bd2781a931d11abc47f19bbc32be9d4
7
- data.tar.gz: 8597872f8d75b6ad1c28e48e86aca55a727eb5f4237182af87d1eece9ddf7657831ad2b5463cdd67d2739e7fbcfc65c935858fddbb0b30f94a31591b769f613a
6
+ metadata.gz: f26891f463c259af8eee10698208a6df50c92b49c5b0f0a5065feadcccd10fe45733b8598eec5e47d35286feb519156aa8b82600576eeccd2d6cf39ecaf29daf
7
+ data.tar.gz: c88fe8b6fdbab45f4f5ddcc8c851869e47b62fc7bfc800676dba0f63352e193ffb453b15f95244eb364398b5c841629f2ce2d9df5b9edeca2a7b23e7c5c8caad
data/lib/konjak/body.rb CHANGED
@@ -1,8 +1,10 @@
1
1
  module Konjak
2
2
  class Body < StructuralElement
3
+ TAG_NAME = 'body'
4
+
3
5
  # childrens
4
6
  def translation_units
5
- children.select {|c| c.name == 'tu' }.map {|tu| TranslationUnit.new(tu) }
7
+ children.select {|c| c.name == TranslationUnit::TAG_NAME }.map! {|tu| TranslationUnit.new(tu) }
6
8
  end
7
9
 
8
10
  # methods
@@ -1,7 +1,10 @@
1
+ require 'mem'
1
2
  require 'delegate'
2
3
 
3
4
  module Konjak
4
5
  class Element < Delegator
6
+ include Mem
7
+
5
8
  def initialize(__element__)
6
9
  @__element__ = __element__
7
10
  end
data/lib/konjak/header.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class Header < StructuralElement
3
+ TAG_NAME = 'header'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:creation_tool, :creationtool, required: true)
5
7
  tmx_attr_accessor(:creation_tool_version, :creationtoolversion, required: true)
@@ -18,15 +20,15 @@ module Konjak
18
20
 
19
21
  # childrens
20
22
  def notes
21
- children.select {|c| c.name == 'note' }.map {|n| Note.new(n) }
23
+ children.select {|c| c.name == Note::TAG_NAME }.map! {|n| Note.new(n) }
22
24
  end
23
25
 
24
26
  def user_defined_encodings
25
- children.select {|c| c.name == 'ude' }.map {|n| UserDefinedEncoding.new(n) }
27
+ children.select {|c| c.name == UserDefinedEncoding::TAG_NAME }.map! {|n| UserDefinedEncoding.new(n) }
26
28
  end
27
29
 
28
30
  def properties
29
- children.select {|c| c.name == 'prop' }.map {|n| Property.new(n) }
31
+ children.select {|c| c.name == Property::TAG_NAME }.map! {|n| Property.new(n) }
30
32
  end
31
33
 
32
34
  # methods
@@ -11,7 +11,7 @@ module Konjak
11
11
  # Zero, one or more of the following elements: <bpt>, <ept>, <it>, <ph>, and <hi>.
12
12
  # They can be in any order, except that each <bpt> element must have a subsequent corresponding <ept> element.
13
13
  def can_contain?(element)
14
- [Text, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
14
+ [String, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
15
15
  end
16
16
  end
17
17
  end
@@ -1,18 +1,19 @@
1
1
  module Konjak
2
2
  class HtmlSegmentor < Segmentor
3
+ SEGMENTS_PATTERNS = [
4
+ %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td)>(.*?)</\k<start>>}m,
5
+ %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td) [^>]*?>(.*?)</\k<start>>}m,
6
+ %r{<div>(.*?)</div>}m,
7
+ %r{<div [^>]*?>(.*?)</div>}m
8
+ ]
9
+
3
10
  def segments
4
11
  segments = [content.dup]
5
12
 
6
13
  begin
7
14
  size = segments.size
8
15
 
9
- segments_patterns = [
10
- %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td)>(.*?)</\k<start>>}m,
11
- %r{<(?<start>p|h1|h2|h3|h4|h5|h6|li|title|td) [^>]*?>(.*?)</\k<start>>}m,
12
- %r{<div>(.*?)</div>}m,
13
- %r{<div [^>]*?>(.*?)</div>}m
14
- ]
15
- segments_patterns.each do |pattern|
16
+ SEGMENTS_PATTERNS.each do |pattern|
16
17
  segments.map! do |s|
17
18
  s.partition(pattern)
18
19
  end
data/lib/konjak/map.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class Map < StructuralElement
3
+ TAG_NAME = 'map'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:unicode, required: true)
5
7
 
data/lib/konjak/note.rb CHANGED
@@ -1,17 +1,19 @@
1
1
  module Konjak
2
2
  class Note < StructuralElement
3
+ TAG_NAME = 'note'
4
+
3
5
  # optional attrs
4
6
  tmx_attr_accessor(:xml_lang, :'xml:lang')
5
7
  tmx_attr_accessor(:o_encoding, :"o-encoding")
6
8
 
7
9
  # childrens
8
10
  def text
9
- Text.new(super)
11
+ super
10
12
  end
11
13
 
12
14
  # methods
13
15
  def can_contain?(element)
14
- Text === element
16
+ String === element
15
17
  end
16
18
  end
17
19
  end
@@ -1,31 +1,33 @@
1
1
  module Konjak
2
2
  class PolytexSegmentor < Segmentor
3
+
4
+ SEGMENTS_PATTERNS = [
5
+ /\\begin\{(?<start>[^\}]+)\}([\n.]*?)\\end\{\k<start>\}/m,
6
+ /(?<=\\chapter\{)[^\}]+(?=\})/,
7
+ /(?<=\\section\{)[^\}]+(?=\})/,
8
+ /(?<=\\subsection\{)[^\}]+(?=\})/,
9
+ /\\footnote\{(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+\}/m,
10
+ /(?<=\\footnote\{)(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+(?=\})/m,
11
+ /(?<=\\codecaption\{).+(?= \\|\}$)/,
12
+ /(?<=\\caption\{).+(?=\\label\{.*\}\}$)/,
13
+ /(?<=\n)^.*$(?=\n)/m,
14
+ /# .*$/,
15
+ /(?<=^).+?[\.\?\!](?= |\n|\t)/,
16
+ /(?<=\()[^\.\n]+[\.\?\!](?=\))/,
17
+ /^ (?=[\w\\]+)/,
18
+ /^\s+% .*$/,
19
+ /^$/,
20
+ /\\noindent /,
21
+ /\\item /,
22
+ ]
23
+
3
24
  def segments
4
25
  segments = [content.dup]
5
26
 
6
27
  begin
7
28
  size = segments.size
8
29
 
9
- segments_patterns = [
10
- /\\begin\{(?<start>[^\}]+)\}([\n.]*?)\\end\{\k<start>\}/m,
11
- /(?<=\\chapter\{)[^\}]+(?=\})/,
12
- /(?<=\\section\{)[^\}]+(?=\})/,
13
- /(?<=\\subsection\{)[^\}]+(?=\})/,
14
- /\\footnote\{(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+\}/m,
15
- /(?<=\\footnote\{)(?<gr>\\(?!footnote)[^\{]+\{[^\}]+\}(?:\{[^\}]+\})?\g<gr>|[^{])+(?=\})/m,
16
- /(?<=\\codecaption\{).+(?= \\|\}$)/,
17
- /(?<=\\caption\{).+(?=\\label\{.*\}\}$)/,
18
- /(?<=\n)^.*$(?=\n)/m,
19
- /# .*$/,
20
- /(?<=^).+?[\.\?\!](?= |\n|\t)/,
21
- /(?<=\()[^\.\n]+[\.\?\!](?=\))/,
22
- /^ (?=[\w\\]+)/,
23
- /^\s+% .*$/,
24
- /^$/,
25
- /\\noindent /,
26
- /\\item /,
27
- ]
28
- segments_patterns.each do |pattern|
30
+ SEGMENTS_PATTERNS.each do |pattern|
29
31
  segments.map! do |s|
30
32
  s.partition(pattern)
31
33
  end
@@ -37,4 +39,5 @@ module Konjak
37
39
  segments
38
40
  end
39
41
  end
42
+
40
43
  end
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class Property < StructuralElement
3
+ TAG_NAME = 'prop'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:type, required: true)
5
7
 
@@ -9,14 +11,14 @@ module Konjak
9
11
 
10
12
  # childrens
11
13
  def text
12
- Text.new(super)
14
+ super
13
15
  end
14
16
 
15
17
  # methods
16
18
  def can_contain?(element)
17
19
  # FIXME
18
20
  # Tool-specific data or text.
19
- Text === element
21
+ String === element
20
22
  end
21
23
 
22
24
  def unpublished?
@@ -18,7 +18,6 @@ module Konjak
18
18
  regexp.gsub!(/(?<!^)(?:\\s)+(?!$)/) {|s| s + '++' }
19
19
  Regexp.compile(regexp)
20
20
  end
21
- memoize :compile_gtt_html_pattern
22
21
 
23
22
  def extract_gtt_tags_from(text)
24
23
  m = text.match(compile_gtt_html_pattern)
@@ -4,26 +4,29 @@ require 'mem'
4
4
  module Konjak
5
5
  # container
6
6
  class Segment < StructuralElement
7
+ TAG_NAME = 'seg'
8
+ WHITE_SPACE_PATTERN_TEXT = '\s'
9
+ POSSESSIVE_QUALIFIER = '++'
10
+
7
11
  include GTT
8
12
  include Mem
9
13
 
10
14
  # children
11
15
  def text
12
- Text.new(super)
16
+ super
13
17
  end
14
18
 
15
19
  # methods
16
20
  def can_contain?(element)
17
- [Text, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Highlight].any? {|c| c === element }
21
+ [String, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Highlight].any? {|c| c === element }
18
22
  end
19
23
 
20
24
  def compile_pattern
21
25
  regexp = Regexp.escape(text)
22
- regexp.gsub!(/(?<!^)\\\s/) { '\s' }
23
- regexp.gsub!(/(?<!^)(?:\\s)+(?!$)/) {|s| s + '++' }
26
+ regexp.gsub!(/(?<!^)\\\s/) { WHITE_SPACE_PATTERN_TEXT }
27
+ regexp.gsub!(/(?<!^)(?:\\s)+(?!$)/) {|s| s + POSSESSIVE_QUALIFIER }
24
28
  Regexp.compile(regexp)
25
29
  end
26
- memoize :compile_pattern
27
30
 
28
31
  def translation_unit
29
32
  TranslationUnit.new(translation_unit_variant.parent)
@@ -12,7 +12,7 @@ module Konjak
12
12
  # They can be in any order, except that each <bpt> element must have a subsequent corresponding <ept> element.
13
13
 
14
14
  def can_contain?(element)
15
- [Text, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
15
+ [String, BeginPairedTag, EndPairedTag, IsolatedTag, Placeholder, Hilight].any? {|c| c === element }
16
16
  end
17
17
  end
18
18
  end
data/lib/konjak/tmx.rb CHANGED
@@ -7,12 +7,12 @@ module Konjak
7
7
 
8
8
  # required element
9
9
  def header
10
- Header.new(root.at_xpath('header'))
10
+ Header.new(root.at_xpath(Header::TAG_NAME))
11
11
  end
12
12
 
13
13
  # required element
14
14
  def body
15
- Body.new(root.at_xpath('body'))
15
+ Body.new(root.at_xpath(Body::TAG_NAME))
16
16
  end
17
17
 
18
18
  # FIXME
@@ -18,11 +18,13 @@ module Konjak
18
18
  translation_units.each do |translation_unit|
19
19
  segment = translation_unit.variant(@lang).segment
20
20
 
21
+ pat = compile_pattern(segment)
22
+
21
23
  segments.map! {|text|
22
24
  next text if text.length < min_segment_length
23
25
  next text if text.is_a?(SegmentString)
24
26
 
25
- split(segment, text)
27
+ split(pat, segment, text)
26
28
  }.flatten!
27
29
  end
28
30
  segments
@@ -38,13 +40,16 @@ module Konjak
38
40
  @options[:min_segment_length]
39
41
  end
40
42
 
41
- def split(segment, text)
43
+ def split(pat, segment, text)
42
44
  texts = []
43
45
  while true
44
46
  break if text.length < min_segment_length
45
47
 
46
- head, match, tail = text.partition(compile_pattern(segment))
47
- break if match.empty?
48
+ break unless text =~ pat
49
+
50
+ head = $`
51
+ match = $&
52
+ tail = $'
48
53
 
49
54
  texts << head unless head.empty?
50
55
 
@@ -62,7 +67,6 @@ module Konjak
62
67
  tu.variant(@lang).segment.text.length < min_segment_length
63
68
  }
64
69
  end
65
- memoize :translation_units
66
70
  end
67
71
  end
68
72
  end
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class TranslationUnit < StructuralElement
3
+ TAG_NAME = 'tu'
4
+
3
5
  # optional attrs
4
6
  tmx_attr_accessor(:tuid)
5
7
  tmx_attr_accessor(:o_encoding, :"o-encoding")
@@ -19,8 +21,9 @@ module Konjak
19
21
 
20
22
  # childrens
21
23
  def variants
22
- children.select {|c| c.name == 'tuv' }.map {|tuv| TranslationUnitVariant.new(tuv) }
24
+ children.select {|c| c.name == TranslationUnitVariant::TAG_NAME }.map! {|tuv| TranslationUnitVariant.new(tuv) }
23
25
  end
26
+ memoize :variants
24
27
 
25
28
  # methods
26
29
  def can_contain?(element)
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class TranslationUnitVariant < StructuralElement
3
+ TAG_NAME = 'tuv'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:xml_lang, :'xml:lang', required: true)
5
7
 
@@ -18,16 +20,17 @@ module Konjak
18
20
 
19
21
  # childrens
20
22
  def notes
21
- children.select {|c| c.name == 'note' }.map {|n| Note.new(n) }
23
+ children.select {|c| c.name == 'note' }.map! {|n| Note.new(n) }
22
24
  end
23
25
 
24
26
  def properties
25
- children.select {|c| c.name == 'prop' }.map {|n| Property.new(n) }
27
+ children.select {|c| c.name == 'prop' }.map! {|n| Property.new(n) }
26
28
  end
27
29
 
28
30
  def segment
29
- Segment.new(children.detect {|c| c.name == 'seg' })
31
+ Segment.new(children.detect {|c| c.name == Segment::TAG_NAME })
30
32
  end
33
+ memoize :segment
31
34
 
32
35
  # methods
33
36
 
@@ -1,5 +1,7 @@
1
1
  module Konjak
2
2
  class UserDefinedEncoding < StructuralElement
3
+ TAG_NAME = 'ude'
4
+
3
5
  # required attrs
4
6
  tmx_attr_accessor(:name, required: true)
5
7
 
@@ -10,7 +12,7 @@ module Konjak
10
12
 
11
13
  # childrens
12
14
  def maps
13
- children.select {|c| c.name == 'map' }.map {|n| Map.new(n) }
15
+ children.select {|c| c.name == Map::TAG_NAME }.map! {|n| Map.new(n) }
14
16
  end
15
17
 
16
18
  # methods
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.14"
2
+ VERSION = "0.0.15"
3
3
  end
data/lib/konjak.rb CHANGED
@@ -3,7 +3,6 @@ require 'konjak/parser'
3
3
 
4
4
  # not elements
5
5
  require 'konjak/code_data'
6
- require 'konjak/text'
7
6
 
8
7
  # elements
9
8
  require 'konjak/element'
@@ -38,7 +38,7 @@ describe Konjak do
38
38
 
39
39
  its(:xml_lang) { is_expected.to eq 'en' }
40
40
  its(:o_encoding) { is_expected.to eq 'iso-8859-1' }
41
- its(:text) { is_expected.to be_instance_of Konjak::Text }
41
+ its(:text) { is_expected.to be_instance_of String }
42
42
 
43
43
  describe 'text' do
44
44
  subject { super().text }
@@ -90,7 +90,7 @@ describe Konjak do
90
90
  its(:xml_lang) { is_expected.to eq 'en' }
91
91
  its(:o_encoding) { is_expected.to eq 'iso-8859-1' }
92
92
  its(:type) { is_expected.to eq 'RTFPreamble' }
93
- its(:text) { is_expected.to be_instance_of Konjak::Text }
93
+ its(:text) { is_expected.to be_instance_of String }
94
94
 
95
95
  describe '.text' do
96
96
  subject { super().text }
@@ -141,7 +141,7 @@ describe Konjak do
141
141
  describe '.segment' do
142
142
  subject { super().segment }
143
143
 
144
- its(:text) { is_expected.to be_instance_of Konjak::Text }
144
+ its(:text) { is_expected.to be_instance_of String }
145
145
 
146
146
  describe '.text' do
147
147
  subject { super().text }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-25 00:00:00.000000000 Z
11
+ date: 2015-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mem
@@ -161,7 +161,6 @@ files:
161
161
  - lib/konjak/segmentor.rb
162
162
  - lib/konjak/structural_element.rb
163
163
  - lib/konjak/sub_flow.rb
164
- - lib/konjak/text.rb
165
164
  - lib/konjak/tmx.rb
166
165
  - lib/konjak/tmx_segmentor.rb
167
166
  - lib/konjak/tmx_segmentor/gtt_html_strategy.rb
@@ -216,4 +215,3 @@ test_files:
216
215
  - spec/konjak_translate_spec.rb
217
216
  - spec/spec_helper.rb
218
217
  - spec/support/equal_xml_matcher.rb
219
- has_rdoc:
data/lib/konjak/text.rb DELETED
@@ -1,4 +0,0 @@
1
- module Konjak
2
- class Text < String
3
- end
4
- end