google_translate_diff 1.0.9 → 1.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bc126f78f09507a6cdfe8116479b3629d0eac062
4
- data.tar.gz: a92719bc7447df9c530127ec0ce48e4f3e341329
3
+ metadata.gz: cbd316d964f6fdb9f119c66b2087e475bbed2b04
4
+ data.tar.gz: 02ae88623b5b4ddddebbf9b23a9bb02075516d6a
5
5
  SHA512:
6
- metadata.gz: 0d120b67393f006ced3d955ece7afcc9913d7d6c43c4421ee8f87e7f5028743c0b7bfc6add578ccb2ea59bc814a76e7e51a9e04294676c1ae427ff59c433fe4a
7
- data.tar.gz: d7230c98ea93cfc03de94a0496779e14ef3abb7fe70d4cca769a72a8708b08539222e4e369e0423b8ddcd870a995f0608dc83ccbd88b6cbaac95ea3e53944f0a
6
+ metadata.gz: 84d6f5f4287a586455900838a534760a1eec4755551ee920325336d6d513a0b59b0727acfadbcb84a82cc7a17b257bcce4c4ae94d4c8f1199948ce3a2b7be076
7
+ data.tar.gz: da22b6c0d1ddafeff3243f0e6b4aec4f4394451f22b307a8582c4a3581330f565f029d0db30907f1ef58f3c470a7a378e255527e2735998afae3af4f36039a56
@@ -10,3 +10,6 @@ Style/ClassAndModuleChildren:
10
10
  Metrics/BlockLength:
11
11
  Exclude:
12
12
  - spec/**/*
13
+
14
+ Metrics/LineLength:
15
+ Max: 120
@@ -8,16 +8,20 @@ class GoogleTranslateDiff::Tokenizer < ::Ox::Sax
8
8
  @indicies = []
9
9
  end
10
10
 
11
+ def instruct(target)
12
+ start_markup(target)
13
+ end
14
+
15
+ def end_instruct(target)
16
+ end_markup(target)
17
+ end
18
+
11
19
  def start_element(name)
12
- @context << name
13
- @sequence << :markup
14
- @indicies << @pos - 1
20
+ start_markup(name)
15
21
  end
16
22
 
17
23
  def end_element(name)
18
- @context.pop
19
- @sequence << (nontranslate?(name) ? :notranslate : :markup)
20
- @indicies << @pos - 1 unless @pos == @source.bytesize
24
+ end_markup(name)
21
25
  end
22
26
 
23
27
  def attr(name, value)
@@ -33,22 +37,29 @@ class GoogleTranslateDiff::Tokenizer < ::Ox::Sax
33
37
  @indicies << @pos - 1
34
38
  end
35
39
 
36
- # rubocop:disable Metrics/AbcSize
37
40
  def tokens
38
- @tokens ||= raw_tokens.each_with_object([]) do |token, tokens|
39
- if tokens.empty?
41
+ @tokens ||= token_sequences_joined.tap { |tokens| make_sentences_from_last_token(tokens) }
42
+ end
43
+
44
+ private
45
+
46
+ def token_sequences_joined
47
+ raw_tokens.each_with_object([]) do |token, tokens|
48
+ if tokens.empty? # Initial state
40
49
  tokens << token
41
- elsif tokens.last[1] == token[1]
50
+ elsif tokens.last[1] == token[1] # Join series of tokens of the same type into one
42
51
  tokens.last[0].concat(token[0])
43
- else
44
- tokens.concat(sentences(tokens.pop[0])) if tokens.last[1] == :text
52
+ else # If token before :markup is :text we need to split it into sentences
53
+ make_sentences_from_last_token(tokens)
45
54
  tokens << token
46
55
  end
47
56
  end
48
57
  end
49
- # rubocop:enable Metrics/AbcSize
50
58
 
51
- private
59
+ def make_sentences_from_last_token(tokens)
60
+ return if tokens.empty?
61
+ tokens.concat(sentences(tokens.pop[0])) if tokens.last[1] == :text
62
+ end
52
63
 
53
64
  # rubocop: disable Metrics/MethodLength
54
65
  def sentences(value)
@@ -91,13 +102,24 @@ class GoogleTranslateDiff::Tokenizer < ::Ox::Sax
91
102
  @sequence[-2] == :notranslate && name == :span
92
103
  end
93
104
 
105
+ def start_markup(name)
106
+ @context << name
107
+ @sequence << :markup
108
+ @indicies << @pos - 1
109
+ end
110
+
111
+ def end_markup(name)
112
+ @context.pop
113
+ @sequence << (nontranslate?(name) ? :notranslate : :markup)
114
+ @indicies << @pos - 1 unless @pos == @source.bytesize
115
+ end
116
+
94
117
  class << self
95
118
  def tokenize(value)
96
119
  return [] if value.nil?
97
120
  tokenizer = new(value).tap do |h|
98
121
  Ox.sax_parse(h, StringIO.new(value), HTML_OPTIONS)
99
122
  end
100
- puts tokenizer.tokens.inspect
101
123
  tokenizer.tokens
102
124
  end
103
125
  end
@@ -1,3 +1,3 @@
1
1
  module GoogleTranslateDiff
2
- VERSION = "1.0.9".freeze
2
+ VERSION = "1.0.10".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: google_translate_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.9
4
+ version: 1.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Sokolov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-17 00:00:00.000000000 Z
11
+ date: 2018-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler