llt-tokenizer 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0a66574ca8827b73d99ceb5c8dd59a7e1b12040
4
- data.tar.gz: 998792856ca6096a28eb2def54349bd973f30176
3
+ metadata.gz: 75d2abc5e72328a1b4ef2f224931c0656572e71d
4
+ data.tar.gz: b1b155c05e45b87cfec7f0eb4e1535d9f72783b5
5
5
  SHA512:
6
- metadata.gz: 26cc6ceb5702552ba927e4a3c2c38548a083915598998dc9bc77beab412b9c65321fc6da88553d8ade4ac409024aecae55f901024776d6aea65b1613f340200e
7
- data.tar.gz: e6ed6a22cc74fd58305e043109febe235f948a1b2c769415fd1fa471655b87bab924b5458541500e68957a3b7c5c200377ecd6d89831c19d346ce887806e7b6f
6
+ metadata.gz: 1a7f8fbd9be93c7053fe601243d6ea4c3a603b0c1643a5f102727844ae06c6083acbeb805bdf74e870c5ceb9c92e9dfdd796b8b201a434f50ef50d532fb65a93
7
+ data.tar.gz: 37d6ab4e7e39a30b6165e61e0a4d341e4620de0a90521bb17ac8ce141356dbe2dc3a952d39d57dd1e4c90eba224f9077b2bc12ecd5d818c8c9372734c91593fa
data/Gemfile CHANGED
@@ -12,7 +12,6 @@ gem 'llt-db_handler', git: 'git@github.com:latin-language-toolkit/llt-db_handler
12
12
  gem 'llt-helpers', git: 'git@github.com:latin-language-toolkit/llt-helpers.git'
13
13
 
14
14
  # Dependencies of db_handler
15
- gem 'llt-core_extensions', git: 'git@github.com:latin-language-toolkit/llt-core_extensions.git'
16
15
  gem 'llt-form_builder', git: 'git@github.com:latin-language-toolkit/llt-form_builder.git'
17
16
 
18
17
  platform :ruby do
data/README.md CHANGED
@@ -51,6 +51,11 @@ The Tokenizer takes several options upon creation or a call to #tokenize:
51
51
  tokens.map(&:to_s)
52
52
  # => ["Arma", "virum", "--que", "cano", "."]
53
53
 
54
+ # splitting of enclitics can be disabled altogether
55
+ tokens = t.tokenize('Arma virumque cano.', splitting: false)
56
+ tokens.map(&:to_s)
57
+ # => ["Arma", "virumque", "cano", "."]
58
+
54
59
  # indexing determines if each token shall receive a consecutive id
55
60
  tokens = t.tokenize('Arma virumque cano.', indexing: true)
56
61
  tokens.first.id # => 1
@@ -1,14 +1,19 @@
1
+ require 'xml_escape'
2
+
1
3
  module LLT
2
4
  class Token
3
5
  class Punctuation < Token
4
6
  xml_tag 'pc'
5
7
 
8
+ include XmlEscape
9
+
6
10
  attr_accessor :opening, :closing, :other
7
11
 
8
12
  def initialize(string, id = nil)
9
13
  super
10
14
  # this is part of an old interface that is mostly unused
11
15
  # some parts remain - find and delete em
16
+ @string = xml_decode(string)
12
17
  @opening = false
13
18
  @closing = false
14
19
  @other = false
@@ -31,6 +36,10 @@ module LLT
31
36
  def inspect
32
37
  "#{"Punctuation token:".yellow} #{@string}"
33
38
  end
39
+
40
+ def as_xml
41
+ xml_encode(@string)
42
+ end
34
43
  end
35
44
  end
36
45
  end
data/lib/llt/tokenizer.rb CHANGED
@@ -24,6 +24,8 @@ module LLT
24
24
  enclitics_marker: '-',
25
25
  merging: true,
26
26
  indexing: true,
27
+ splitting: true,
28
+ xml: false,
27
29
  }
28
30
  end
29
31
 
@@ -34,7 +36,7 @@ module LLT
34
36
  setup(text, options)
35
37
 
36
38
  find_abbreviations_and_join_strings
37
- split_enklitika_and_change_their_position
39
+ split_enklitika_and_change_their_position if @splitting
38
40
  merge_what_needs_merging if @merging # quam diu => quamdiu
39
41
  tokens = create_tokens
40
42
 
@@ -43,17 +45,20 @@ module LLT
43
45
  end
44
46
 
45
47
  def setup(text, options = {}, worker = [])
46
- @text = text
48
+ @text = text
47
49
  evaluate_metrical_presence(@text)
48
50
  @enclitics_marker = parse_option(:enclitics_marker, options)
49
51
  @merging = parse_option(:merging, options)
50
52
  @shifting = parse_option(:shifting, options)
53
+ @splitting = parse_option(:splitting, options)
51
54
  @indexing = parse_option(:indexing, options)
55
+ @xml = parse_option(:xml, options)
52
56
  @worker = setup_worker(worker)
53
57
  @shift_range = shift_range(@shifting)
54
58
  end
55
59
 
56
- PUNCTUATION = /([\.\?,!;\-:"'”\(\)\[\]]|<\/?.+?>)\1*/
60
+ PUNCTUATION = /&(?:amp|quot|apos|lt|gt);|([\.\?,!;\-:"'”&\(\)\[\]†<>])\1*/
61
+ XML_TAG = /<\/?.+?>/
57
62
 
58
63
  # This is here for two reasons:
59
64
  # 1) easier test setup, when a preliminary result shall be further evaluated
@@ -64,16 +69,15 @@ module LLT
64
69
  # if it's needed - which should perform better, when there
65
70
  # are no metrics involved (the default case)
66
71
  def setup_worker(worker)
67
- if worker.any?
68
- worker
72
+ return worker if worker.any?
73
+
74
+ elements = split_and_space_text
75
+ put_xml_attributes_back_together(elements) if @xml
76
+
77
+ if metrical?
78
+ Worker.new(elements, @enclitics_marker)
69
79
  else
70
- elements = @text.gsub(PUNCTUATION, ' \0 ').split
71
- put_xml_attributes_back_together(elements)
72
- if metrical?
73
- Worker.new(elements, @enclitics_marker)
74
- else
75
- elements
76
- end
80
+ elements
77
81
  end
78
82
  end
79
83
 
@@ -81,19 +85,23 @@ module LLT
81
85
  shifting_enabled ? 0 : 1
82
86
  end
83
87
 
88
+ def split_and_space_text
89
+ regex = @xml ? Regexp.union(XML_TAG, PUNCTUATION) : PUNCTUATION
90
+ @text.gsub(regex, ' \0 ').split
91
+ end
92
+
84
93
  def put_xml_attributes_back_together(elements)
85
- # elements could be like this
86
- # ['<tag', 'attr1="val"', 'attr1="val>']
87
- # and we want the complete xml tag back together
88
94
  as = ArrayScanner.new(elements)
89
95
  loop do
90
- last = as.look_behind
91
- if last && last.start_with?('<') &! last.end_with?('>')
92
- if as.current.match(/\w+=".*"$|>/)
96
+ last = as.look_behind.to_s # catch nil
97
+ if open_xml_tag?(last)
98
+ number_of_xml_elements = as.peek_until do |el|
99
+ el.end_with?('>')
100
+ end.size + 1
101
+
102
+ number_of_xml_elements.times do
93
103
  last << ' ' << as.current
94
104
  elements.delete_at(as.pos)
95
- # we don't need to forward, as we delete an element anyway
96
- next
97
105
  end
98
106
  else
99
107
  as.forward(1)
@@ -102,12 +110,18 @@ module LLT
102
110
  end
103
111
  end
104
112
 
113
+ def open_xml_tag?(str)
114
+ str.start_with?('<') &! str.end_with?('>')
115
+ end
116
+
105
117
 
106
118
  ######################
107
119
 
108
120
  # covers abbreviated Roman praenomen like Ti. in Ti. Claudius Nero
109
121
  # covers Roman date expression like a. d. V. Kal. Apr.
110
122
  ABBREVIATIONS = /^(#{ALL_ABBRS_PIPED})$/
123
+ # covers a list of words which are abbreviated with a ' like satin' for satisne
124
+ APOSTROPHE_WORDS = /^(#{APOSTROPHES_PIPED})$/
111
125
 
112
126
  # %w{ Atque M . Cicero mittit } to %w{ Atque M. Cicero mittit }
113
127
 
@@ -115,7 +129,7 @@ module LLT
115
129
  arr = []
116
130
  @worker.each_with_index do |e, i|
117
131
  n = @worker[i + 1]
118
- if e =~ ABBREVIATIONS && n == "."
132
+ if (n == '.' && e =~ ABBREVIATIONS) || (n == "'" && e =~ APOSTROPHE_WORDS)
119
133
  @worker[i + 1] = n.prepend(e)
120
134
  arr << (i - arr.size)
121
135
  end
@@ -324,7 +338,7 @@ module LLT
324
338
 
325
339
  ABBR_NAME_WITH_DOT = /^(#{NAMES_PIPED})\.$/
326
340
  ROMAN_DATE_EXPR_WITH_DOT = /^(#{DATES_PIPED})\.$/
327
- PUNCT_ITSELF = Regexp.new(PUNCTUATION.source + '$')
341
+ PUNCT_ITSELF = Regexp.new("^(?:#{PUNCTUATION.source})$")
328
342
  XML_TAG = /<\/?.+?>/
329
343
 
330
344
  def create_tokens
@@ -1,5 +1,5 @@
1
1
  module LLT
2
2
  class Tokenizer
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.3"
4
4
  end
5
5
  end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe LLT::Token::Punctuation do
4
+ describe "#initialize" do
5
+ it "normalizes escaped xml characters" do
6
+ punct = LLT::Token::Punctuation.new('&amp;')
7
+ punct.to_s.should == '&'
8
+ end
9
+ end
10
+
11
+ describe "#as_xml" do
12
+ it "overrides LLT::Core::Containable#as_xml to use xml encodings" do
13
+ punct = LLT::Token::Punctuation.new('&')
14
+ punct.as_xml.should == "&amp;"
15
+ end
16
+ end
17
+ end
@@ -18,10 +18,16 @@ describe LLT::Tokenizer do
18
18
  end
19
19
 
20
20
  it "handles all kinds of parens as well as cruces" do
21
- txt = "Marcus (et Claudius) †amici† [sunt]."
21
+ txt = "<Marcus> (et Claudius) †amici† [sunt]."
22
22
  tokens = tokenizer.tokenize(txt)
23
- tokens.should have(12).items
24
- tokens.map(&:to_s).should == %w{ Marcus ( et Claudius ) † amici † [ sunt ] . }
23
+ tokens.should have(14).items
24
+ tokens.map(&:to_s).should == %w{ < Marcus > ( et Claudius ) † amici † [ sunt ] . }
25
+ end
26
+
27
+ it "handles escaped xml characters" do
28
+ txt = "&amp; &quot; &apos; &gt; &lt; ;"
29
+ tokens = tokenizer.tokenize(txt)
30
+ tokens.should have(6).items
25
31
  end
26
32
 
27
33
  describe "takes an optional keyword argument add_to" do
@@ -110,19 +116,11 @@ describe LLT::Tokenizer do
110
116
  end
111
117
  end
112
118
  end
113
-
114
- context "with embedded xml tags" do
115
- it "doesn't break" do
116
- txt = '<grc>text text</grc>'
117
- tokens = tokenizer.tokenize(txt)
118
- tokens.should have(4).items
119
- end
120
- end
121
119
  end
122
120
  end
123
121
 
124
122
  describe "#find_abbreviations_and_join_strings" do
125
- describe "should bring back abbreviation dots" do
123
+ describe "should bring back abbreviation dots and apostrophes" do
126
124
  it "with names" do
127
125
  tokenizer.setup("", {}, %w{ Atque Sex . et M . Cicero . })
128
126
  tokenizer.find_abbreviations_and_join_strings
@@ -134,6 +132,12 @@ describe LLT::Tokenizer do
134
132
  tokenizer.find_abbreviations_and_join_strings
135
133
  tokenizer.preliminary.should == %w{ a. d. V Kal. Apr. }
136
134
  end
135
+
136
+ it "with apostrophe" do
137
+ tokenizer.setup("", {}, %w{ ' Apostrophi ' sunt : po ' min ' vin ' tun' scin ' potin ' satin ' })
138
+ tokenizer.find_abbreviations_and_join_strings
139
+ tokenizer.preliminary.should == %w{ ' Apostrophi ' sunt : po' min' vin' tun' scin' potin' satin' }
140
+ end
137
141
  end
138
142
  end
139
143
 
@@ -252,10 +256,10 @@ describe LLT::Tokenizer do
252
256
  end
253
257
 
254
258
  examples = {
255
- "Word" => %w{ ita Marcus quoque -que },
259
+ "Word" => %w{ ita Marcus quoque -que po' },
256
260
  "Filler" => %w{ M. Sex. App. Ap. Tib. Ti. C. a. d. Kal. Ian. }, #I XI MMC }
257
261
  "XmlTag" => %w{ <grc> </grc> },
258
- "Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' }
262
+ "Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' & < > &amp; &lt; &gt; &apos; &quot; }
259
263
  }
260
264
 
261
265
  examples.each do |klass, elements|
@@ -278,37 +282,11 @@ describe LLT::Tokenizer do
278
282
  tokens.map(&:id).should == [1, 2]
279
283
  end
280
284
 
281
- it "can be disabled" do
285
+ it "id's can be disabled" do
282
286
  txt = 'Cano.'
283
287
  tokens = tokenizer.tokenize(txt, indexing: false)
284
288
  tokens.map(&:id).should == [nil, nil]
285
289
  end
286
-
287
- it "doesn't count plain xml tags" do
288
- txt = '<grc>text text</grc>'
289
- tokens = tokenizer.tokenize(txt)
290
- tokens.map(&:id).should == [nil, 1, 2, nil]
291
- end
292
-
293
- it "doesn't count xml tags when they come with attributes" do
294
- txt = '<foreign lang="lat">Graeca</foreign> lingua est.'
295
- tokens = tokenizer.tokenize(txt).map(&:to_s)
296
- res = ['<foreign lang="lat">', 'Graeca', '</foreign>', 'lingua', 'est', '.']
297
- tokens.should == res
298
- end
299
-
300
- it "handles nested xml as well" do
301
- txt = '<l n="70"><foreign lang="lat">Graeca lingua est.</foreign></l>'
302
- tokens = tokenizer.tokenize(txt).map(&:to_s)
303
- res = ['<l n="70">', '<foreign lang="lat">', 'Graeca', 'lingua', 'est', '.', '</foreign>', '</l>']
304
- tokens.should == res
305
- end
306
-
307
- it "handles text with broken off xml tags (the rest will e.g. be in another sentence)" do
308
- txt = "<lg org=\"uniform\" sample=\"complete\"><l>quem vocet divum populus ruentis</l><l>imperi rebus?"
309
- tokens = tokenizer.tokenize(txt)
310
- tokens.should have(12).items
311
- end
312
290
  end
313
291
 
314
292
  context "with options" do
@@ -346,11 +324,68 @@ describe LLT::Tokenizer do
346
324
  tokens.should == %w{ quam diu cano ? }
347
325
  end
348
326
  end
327
+
328
+ context "with disabled splitting" do
329
+ it "doesn't split enclitics" do
330
+ txt = 'arma virumque cano.'
331
+ opts = { splitting: false }
332
+ tokens = tokenizer.tokenize(txt, opts).map(&:to_s)
333
+ tokens.should == %w{ arma virumque cano . }
334
+ end
335
+ end
336
+
337
+ context "with xml handling enabled" do
338
+ let(:xml_tokenizer) { LLT::Tokenizer.new(db: stub_db, xml: true) }
339
+
340
+ it "doesn't break when xml is embedded" do
341
+ txt = '<grc>text text</grc>'
342
+ tokens = xml_tokenizer.tokenize(txt)
343
+ tokens.should have(4).items
344
+ end
345
+
346
+ it "doesn't count plain xml tags" do
347
+ txt = '<grc>text text</grc>'
348
+ tokens = xml_tokenizer.tokenize(txt)
349
+ tokens.map(&:id).should == [nil, 1, 2, nil]
350
+ end
351
+
352
+ it "doesn't count xml tags when they come with attributes" do
353
+ txt = '<foreign lang="lat">Graeca</foreign> lingua est.'
354
+ tokens = xml_tokenizer.tokenize(txt).map(&:to_s)
355
+ res = ['<foreign lang="lat">', 'Graeca', '</foreign>', 'lingua', 'est', '.']
356
+ tokens.should == res
357
+ end
358
+
359
+ it "handles nested xml as well" do
360
+ txt = '<l n="70"><foreign lang="lat">Graeca lingua est.</foreign></l>'
361
+ tokens = xml_tokenizer.tokenize(txt).map(&:to_s)
362
+ res = ['<l n="70">', '<foreign lang="lat">', 'Graeca', 'lingua', 'est', '.', '</foreign>', '</l>']
363
+ tokens.should == res
364
+ end
365
+
366
+ it "handles text with broken off xml tags (the rest will e.g. be in another sentence)" do
367
+ txt = "<lg org=\"uniform\" sample=\"complete\"><l>quem vocet divum populus ruentis</l><l>imperi rebus?"
368
+ tokens = xml_tokenizer.tokenize(txt)
369
+ tokens.should have(12).items
370
+ end
371
+
372
+ it "doesn't fall with spaces inside of xml attributes" do
373
+ txt = '<test>veni vidi <bad att="a a a">vici</bad></test>'
374
+ tokens = xml_tokenizer.tokenize(txt)
375
+ tokens.should have(7).items
376
+ end
377
+
378
+ it "expects all text chevrons to be escaped, otherwise they are xml tags!" do
379
+ txt = '<test>&lt;veni&gt;</test>'
380
+ tokens = xml_tokenizer.tokenize(txt)
381
+ tokens.should have(5).item
382
+ end
383
+ end
349
384
  end
350
385
  end
351
386
 
352
387
  context "with options on instance creation" do
353
- it "a new instance can receive options, which it will use as it's defaults" do
388
+ it "a new instance can receive options, which it will use as its defaults" do
354
389
  custom_tok = LLT::Tokenizer.new(db: stub_db,
355
390
  shifting: false,
356
391
  enclitics_marker: '')
metadata CHANGED
@@ -1,141 +1,141 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llt-tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - LFDM
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-09 00:00:00.000000000 Z
11
+ date: 2014-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.3'
20
15
  requirement: !ruby/object:Gem::Requirement
21
16
  requirements:
22
- - - ~>
17
+ - - "~>"
23
18
  - !ruby/object:Gem::Version
24
19
  version: '1.3'
25
- prerelease: false
26
20
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
21
+ prerelease: false
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - "~>"
32
25
  - !ruby/object:Gem::Version
33
- version: '0'
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
34
29
  requirement: !ruby/object:Gem::Requirement
35
30
  requirements:
36
- - - '>='
31
+ - - ">="
37
32
  - !ruby/object:Gem::Version
38
33
  version: '0'
39
- prerelease: false
40
34
  type: :development
41
- - !ruby/object:Gem::Dependency
42
- name: rspec
35
+ prerelease: false
43
36
  version_requirements: !ruby/object:Gem::Requirement
44
37
  requirements:
45
- - - '>='
38
+ - - ">="
46
39
  - !ruby/object:Gem::Version
47
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
48
43
  requirement: !ruby/object:Gem::Requirement
49
44
  requirements:
50
- - - '>='
45
+ - - ">="
51
46
  - !ruby/object:Gem::Version
52
47
  version: '0'
53
- prerelease: false
54
48
  type: :development
55
- - !ruby/object:Gem::Dependency
56
- name: simplecov
49
+ prerelease: false
57
50
  version_requirements: !ruby/object:Gem::Requirement
58
51
  requirements:
59
- - - ~>
52
+ - - ">="
60
53
  - !ruby/object:Gem::Version
61
- version: '0.7'
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
62
57
  requirement: !ruby/object:Gem::Requirement
63
58
  requirements:
64
- - - ~>
59
+ - - "~>"
65
60
  - !ruby/object:Gem::Version
66
61
  version: '0.7'
67
- prerelease: false
68
62
  type: :development
69
- - !ruby/object:Gem::Dependency
70
- name: array_scanner
63
+ prerelease: false
71
64
  version_requirements: !ruby/object:Gem::Requirement
72
65
  requirements:
73
- - - '>='
66
+ - - "~>"
74
67
  - !ruby/object:Gem::Version
75
- version: '0'
68
+ version: '0.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: array_scanner
76
71
  requirement: !ruby/object:Gem::Requirement
77
72
  requirements:
78
- - - '>='
73
+ - - ">="
79
74
  - !ruby/object:Gem::Version
80
75
  version: '0'
81
- prerelease: false
82
76
  type: :runtime
83
- - !ruby/object:Gem::Dependency
84
- name: llt-core
77
+ prerelease: false
85
78
  version_requirements: !ruby/object:Gem::Requirement
86
79
  requirements:
87
- - - '>='
80
+ - - ">="
88
81
  - !ruby/object:Gem::Version
89
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: llt-core
90
85
  requirement: !ruby/object:Gem::Requirement
91
86
  requirements:
92
- - - '>='
87
+ - - ">="
93
88
  - !ruby/object:Gem::Version
94
89
  version: '0'
95
- prerelease: false
96
90
  type: :runtime
97
- - !ruby/object:Gem::Dependency
98
- name: llt-core_extensions
91
+ prerelease: false
99
92
  version_requirements: !ruby/object:Gem::Requirement
100
93
  requirements:
101
- - - '>='
94
+ - - ">="
102
95
  - !ruby/object:Gem::Version
103
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: llt-core_extensions
104
99
  requirement: !ruby/object:Gem::Requirement
105
100
  requirements:
106
- - - '>='
101
+ - - ">="
107
102
  - !ruby/object:Gem::Version
108
103
  version: '0'
109
- prerelease: false
110
104
  type: :runtime
111
- - !ruby/object:Gem::Dependency
112
- name: llt-db_handler
105
+ prerelease: false
113
106
  version_requirements: !ruby/object:Gem::Requirement
114
107
  requirements:
115
- - - '>='
108
+ - - ">="
116
109
  - !ruby/object:Gem::Version
117
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: llt-db_handler
118
113
  requirement: !ruby/object:Gem::Requirement
119
114
  requirements:
120
- - - '>='
115
+ - - ">="
121
116
  - !ruby/object:Gem::Version
122
117
  version: '0'
123
- prerelease: false
124
118
  type: :runtime
125
- - !ruby/object:Gem::Dependency
126
- name: llt-helpers
119
+ prerelease: false
127
120
  version_requirements: !ruby/object:Gem::Requirement
128
121
  requirements:
129
- - - '>='
122
+ - - ">="
130
123
  - !ruby/object:Gem::Version
131
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: llt-helpers
132
127
  requirement: !ruby/object:Gem::Requirement
133
128
  requirements:
134
- - - '>='
129
+ - - ">="
135
130
  - !ruby/object:Gem::Version
136
131
  version: '0'
137
- prerelease: false
138
132
  type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
139
  description: LLT's Tokenizer
140
140
  email:
141
141
  - 1986gh@gmail.com
@@ -143,9 +143,9 @@ executables: []
143
143
  extensions: []
144
144
  extra_rdoc_files: []
145
145
  files:
146
- - .gitignore
147
- - .rspec
148
- - .travis.yml
146
+ - ".gitignore"
147
+ - ".rspec"
148
+ - ".travis.yml"
149
149
  - Gemfile
150
150
  - LICENSE.txt
151
151
  - README.md
@@ -160,6 +160,7 @@ files:
160
160
  - lib/llt/tokenizer/version.rb
161
161
  - lib/llt/tokenizer/worker.rb
162
162
  - llt-tokenizer.gemspec
163
+ - spec/lib/llt/token/punctuation_spec.rb
163
164
  - spec/lib/llt/tokenizer/api_spec.rb
164
165
  - spec/lib/llt/tokenizer_spec.rb
165
166
  - spec/spec_helper.rb
@@ -168,27 +169,28 @@ homepage: ''
168
169
  licenses:
169
170
  - MIT
170
171
  metadata: {}
171
- post_install_message:
172
+ post_install_message:
172
173
  rdoc_options: []
173
174
  require_paths:
174
175
  - lib
175
176
  required_ruby_version: !ruby/object:Gem::Requirement
176
177
  requirements:
177
- - - '>='
178
+ - - ">="
178
179
  - !ruby/object:Gem::Version
179
180
  version: '0'
180
181
  required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  requirements:
182
- - - '>='
183
+ - - ">="
183
184
  - !ruby/object:Gem::Version
184
185
  version: '0'
185
186
  requirements: []
186
- rubyforge_project:
187
- rubygems_version: 2.1.9
188
- signing_key:
187
+ rubyforge_project:
188
+ rubygems_version: 2.2.0
189
+ signing_key:
189
190
  specification_version: 4
190
191
  summary: Breaks latin sentences into tokens
191
192
  test_files:
193
+ - spec/lib/llt/token/punctuation_spec.rb
192
194
  - spec/lib/llt/tokenizer/api_spec.rb
193
195
  - spec/lib/llt/tokenizer_spec.rb
194
196
  - spec/spec_helper.rb