google_translate_diff 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bc126f78f09507a6cdfe8116479b3629d0eac062
4
- data.tar.gz: a92719bc7447df9c530127ec0ce48e4f3e341329
3
+ metadata.gz: cbd316d964f6fdb9f119c66b2087e475bbed2b04
4
+ data.tar.gz: 02ae88623b5b4ddddebbf9b23a9bb02075516d6a
5
5
  SHA512:
6
- metadata.gz: 0d120b67393f006ced3d955ece7afcc9913d7d6c43c4421ee8f87e7f5028743c0b7bfc6add578ccb2ea59bc814a76e7e51a9e04294676c1ae427ff59c433fe4a
7
- data.tar.gz: d7230c98ea93cfc03de94a0496779e14ef3abb7fe70d4cca769a72a8708b08539222e4e369e0423b8ddcd870a995f0608dc83ccbd88b6cbaac95ea3e53944f0a
6
+ metadata.gz: 84d6f5f4287a586455900838a534760a1eec4755551ee920325336d6d513a0b59b0727acfadbcb84a82cc7a17b257bcce4c4ae94d4c8f1199948ce3a2b7be076
7
+ data.tar.gz: da22b6c0d1ddafeff3243f0e6b4aec4f4394451f22b307a8582c4a3581330f565f029d0db30907f1ef58f3c470a7a378e255527e2735998afae3af4f36039a56
@@ -10,3 +10,6 @@ Style/ClassAndModuleChildren:
10
10
  Metrics/BlockLength:
11
11
  Exclude:
12
12
  - spec/**/*
13
+
14
+ Metrics/LineLength:
15
+ Max: 120
@@ -8,16 +8,20 @@ class GoogleTranslateDiff::Tokenizer < ::Ox::Sax
8
8
  @indicies = []
9
9
  end
10
10
 
11
+ def instruct(target)
12
+ start_markup(target)
13
+ end
14
+
15
+ def end_instruct(target)
16
+ end_markup(target)
17
+ end
18
+
11
19
  def start_element(name)
12
- @context << name
13
- @sequence << :markup
14
- @indicies << @pos - 1
20
+ start_markup(name)
15
21
  end
16
22
 
17
23
  def end_element(name)
18
- @context.pop
19
- @sequence << (nontranslate?(name) ? :notranslate : :markup)
20
- @indicies << @pos - 1 unless @pos == @source.bytesize
24
+ end_markup(name)
21
25
  end
22
26
 
23
27
  def attr(name, value)
@@ -33,22 +37,29 @@ class GoogleTranslateDiff::Tokenizer < ::Ox::Sax
33
37
  @indicies << @pos - 1
34
38
  end
35
39
 
36
- # rubocop:disable Metrics/AbcSize
37
40
  def tokens
38
- @tokens ||= raw_tokens.each_with_object([]) do |token, tokens|
39
- if tokens.empty?
41
+ @tokens ||= token_sequences_joined.tap { |tokens| make_sentences_from_last_token(tokens) }
42
+ end
43
+
44
+ private
45
+
46
+ def token_sequences_joined
47
+ raw_tokens.each_with_object([]) do |token, tokens|
48
+ if tokens.empty? # Initial state
40
49
  tokens << token
41
- elsif tokens.last[1] == token[1]
50
+ elsif tokens.last[1] == token[1] # Join series of tokens of the same type into one
42
51
  tokens.last[0].concat(token[0])
43
- else
44
- tokens.concat(sentences(tokens.pop[0])) if tokens.last[1] == :text
52
+ else # If token before :markup is :text we need to split it into sentences
53
+ make_sentences_from_last_token(tokens)
45
54
  tokens << token
46
55
  end
47
56
  end
48
57
  end
49
- # rubocop:enable Metrics/AbcSize
50
58
 
51
- private
59
+ def make_sentences_from_last_token(tokens)
60
+ return if tokens.empty?
61
+ tokens.concat(sentences(tokens.pop[0])) if tokens.last[1] == :text
62
+ end
52
63
 
53
64
  # rubocop: disable Metrics/MethodLength
54
65
  def sentences(value)
@@ -91,13 +102,24 @@ class GoogleTranslateDiff::Tokenizer < ::Ox::Sax
91
102
  @sequence[-2] == :notranslate && name == :span
92
103
  end
93
104
 
105
+ def start_markup(name)
106
+ @context << name
107
+ @sequence << :markup
108
+ @indicies << @pos - 1
109
+ end
110
+
111
+ def end_markup(name)
112
+ @context.pop
113
+ @sequence << (nontranslate?(name) ? :notranslate : :markup)
114
+ @indicies << @pos - 1 unless @pos == @source.bytesize
115
+ end
116
+
94
117
  class << self
95
118
  def tokenize(value)
96
119
  return [] if value.nil?
97
120
  tokenizer = new(value).tap do |h|
98
121
  Ox.sax_parse(h, StringIO.new(value), HTML_OPTIONS)
99
122
  end
100
- puts tokenizer.tokens.inspect
101
123
  tokenizer.tokens
102
124
  end
103
125
  end
@@ -1,3 +1,3 @@
1
1
  module GoogleTranslateDiff
2
- VERSION = "1.0.9".freeze
2
+ VERSION = "1.0.10".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_translate_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.9
4
+ version: 1.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Sokolov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-17 00:00:00.000000000 Z
11
+ date: 2018-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler