llt-tokenizer 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0a66574ca8827b73d99ceb5c8dd59a7e1b12040
4
- data.tar.gz: 998792856ca6096a28eb2def54349bd973f30176
3
+ metadata.gz: 75d2abc5e72328a1b4ef2f224931c0656572e71d
4
+ data.tar.gz: b1b155c05e45b87cfec7f0eb4e1535d9f72783b5
5
5
  SHA512:
6
- metadata.gz: 26cc6ceb5702552ba927e4a3c2c38548a083915598998dc9bc77beab412b9c65321fc6da88553d8ade4ac409024aecae55f901024776d6aea65b1613f340200e
7
- data.tar.gz: e6ed6a22cc74fd58305e043109febe235f948a1b2c769415fd1fa471655b87bab924b5458541500e68957a3b7c5c200377ecd6d89831c19d346ce887806e7b6f
6
+ metadata.gz: 1a7f8fbd9be93c7053fe601243d6ea4c3a603b0c1643a5f102727844ae06c6083acbeb805bdf74e870c5ceb9c92e9dfdd796b8b201a434f50ef50d532fb65a93
7
+ data.tar.gz: 37d6ab4e7e39a30b6165e61e0a4d341e4620de0a90521bb17ac8ce141356dbe2dc3a952d39d57dd1e4c90eba224f9077b2bc12ecd5d818c8c9372734c91593fa
data/Gemfile CHANGED
@@ -12,7 +12,6 @@ gem 'llt-db_handler', git: 'git@github.com:latin-language-toolkit/llt-db_handler
12
12
  gem 'llt-helpers', git: 'git@github.com:latin-language-toolkit/llt-helpers.git'
13
13
 
14
14
  # Dependencies of db_handler
15
- gem 'llt-core_extensions', git: 'git@github.com:latin-language-toolkit/llt-core_extensions.git'
16
15
  gem 'llt-form_builder', git: 'git@github.com:latin-language-toolkit/llt-form_builder.git'
17
16
 
18
17
  platform :ruby do
data/README.md CHANGED
@@ -51,6 +51,11 @@ The Tokenizer takes several options upon creation or a call to #tokenize:
51
51
  tokens.map(&:to_s)
52
52
  # => ["Arma", "virum", "--que", "cano", "."]
53
53
 
54
+ # splitting of enclitics can be disabled altogether
55
+ tokens = t.tokenize('Arma virumque cano.', splitting: false)
56
+ tokens.map(&:to_s)
57
+ # => ["Arma", "virumque", "cano", "."]
58
+
54
59
  # indexing determines if each token shall receive a consecutive id
55
60
  tokens = t.tokenize('Arma virumque cano.', indexing: true)
56
61
  tokens.first.id # => 1
@@ -1,14 +1,19 @@
1
+ require 'xml_escape'
2
+
1
3
  module LLT
2
4
  class Token
3
5
  class Punctuation < Token
4
6
  xml_tag 'pc'
5
7
 
8
+ include XmlEscape
9
+
6
10
  attr_accessor :opening, :closing, :other
7
11
 
8
12
  def initialize(string, id = nil)
9
13
  super
10
14
  # this is part of an old interface that is mostly unused
11
15
  # some parts remain - find and delete em
16
+ @string = xml_decode(string)
12
17
  @opening = false
13
18
  @closing = false
14
19
  @other = false
@@ -31,6 +36,10 @@ module LLT
31
36
  def inspect
32
37
  "#{"Punctuation token:".yellow} #{@string}"
33
38
  end
39
+
40
+ def as_xml
41
+ xml_encode(@string)
42
+ end
34
43
  end
35
44
  end
36
45
  end
data/lib/llt/tokenizer.rb CHANGED
@@ -24,6 +24,8 @@ module LLT
24
24
  enclitics_marker: '-',
25
25
  merging: true,
26
26
  indexing: true,
27
+ splitting: true,
28
+ xml: false,
27
29
  }
28
30
  end
29
31
 
@@ -34,7 +36,7 @@ module LLT
34
36
  setup(text, options)
35
37
 
36
38
  find_abbreviations_and_join_strings
37
- split_enklitika_and_change_their_position
39
+ split_enklitika_and_change_their_position if @splitting
38
40
  merge_what_needs_merging if @merging # quam diu => quamdiu
39
41
  tokens = create_tokens
40
42
 
@@ -43,17 +45,20 @@ module LLT
43
45
  end
44
46
 
45
47
  def setup(text, options = {}, worker = [])
46
- @text = text
48
+ @text = text
47
49
  evaluate_metrical_presence(@text)
48
50
  @enclitics_marker = parse_option(:enclitics_marker, options)
49
51
  @merging = parse_option(:merging, options)
50
52
  @shifting = parse_option(:shifting, options)
53
+ @splitting = parse_option(:splitting, options)
51
54
  @indexing = parse_option(:indexing, options)
55
+ @xml = parse_option(:xml, options)
52
56
  @worker = setup_worker(worker)
53
57
  @shift_range = shift_range(@shifting)
54
58
  end
55
59
 
56
- PUNCTUATION = /([\.\?,!;\-:"'”\(\)\[\]]|<\/?.+?>)\1*/
60
+ PUNCTUATION = /&(?:amp|quot|apos|lt|gt);|([\.\?,!;\-:"'”&\(\)\[\]†<>])\1*/
61
+ XML_TAG = /<\/?.+?>/
57
62
 
58
63
  # This is here for two reasons:
59
64
  # 1) easier test setup, when a preliminary result shall be further evaluated
@@ -64,16 +69,15 @@ module LLT
64
69
  # if it's needed - which should perform better, when there
65
70
  # are no metrics involved (the default case)
66
71
  def setup_worker(worker)
67
- if worker.any?
68
- worker
72
+ return worker if worker.any?
73
+
74
+ elements = split_and_space_text
75
+ put_xml_attributes_back_together(elements) if @xml
76
+
77
+ if metrical?
78
+ Worker.new(elements, @enclitics_marker)
69
79
  else
70
- elements = @text.gsub(PUNCTUATION, ' \0 ').split
71
- put_xml_attributes_back_together(elements)
72
- if metrical?
73
- Worker.new(elements, @enclitics_marker)
74
- else
75
- elements
76
- end
80
+ elements
77
81
  end
78
82
  end
79
83
 
@@ -81,19 +85,23 @@ module LLT
81
85
  shifting_enabled ? 0 : 1
82
86
  end
83
87
 
88
+ def split_and_space_text
89
+ regex = @xml ? Regexp.union(XML_TAG, PUNCTUATION) : PUNCTUATION
90
+ @text.gsub(regex, ' \0 ').split
91
+ end
92
+
84
93
  def put_xml_attributes_back_together(elements)
85
- # elements could be like this
86
- # ['<tag', 'attr1="val"', 'attr1="val>']
87
- # and we want the complete xml tag back together
88
94
  as = ArrayScanner.new(elements)
89
95
  loop do
90
- last = as.look_behind
91
- if last && last.start_with?('<') &! last.end_with?('>')
92
- if as.current.match(/\w+=".*"$|>/)
96
+ last = as.look_behind.to_s # catch nil
97
+ if open_xml_tag?(last)
98
+ number_of_xml_elements = as.peek_until do |el|
99
+ el.end_with?('>')
100
+ end.size + 1
101
+
102
+ number_of_xml_elements.times do
93
103
  last << ' ' << as.current
94
104
  elements.delete_at(as.pos)
95
- # we don't need to forward, as we delete an element anyway
96
- next
97
105
  end
98
106
  else
99
107
  as.forward(1)
@@ -102,12 +110,18 @@ module LLT
102
110
  end
103
111
  end
104
112
 
113
+ def open_xml_tag?(str)
114
+ str.start_with?('<') &! str.end_with?('>')
115
+ end
116
+
105
117
 
106
118
  ######################
107
119
 
108
120
  # covers abbreviated Roman praenomen like Ti. in Ti. Claudius Nero
109
121
  # covers Roman date expression like a. d. V. Kal. Apr.
110
122
  ABBREVIATIONS = /^(#{ALL_ABBRS_PIPED})$/
123
+ # covers a list of words which are abbreviated with a ' like satin' for satisne
124
+ APOSTROPHE_WORDS = /^(#{APOSTROPHES_PIPED})$/
111
125
 
112
126
  # %w{ Atque M . Cicero mittit } to %w{ Atque M. Cicero mittit }
113
127
 
@@ -115,7 +129,7 @@ module LLT
115
129
  arr = []
116
130
  @worker.each_with_index do |e, i|
117
131
  n = @worker[i + 1]
118
- if e =~ ABBREVIATIONS && n == "."
132
+ if (n == '.' && e =~ ABBREVIATIONS) || (n == "'" && e =~ APOSTROPHE_WORDS)
119
133
  @worker[i + 1] = n.prepend(e)
120
134
  arr << (i - arr.size)
121
135
  end
@@ -324,7 +338,7 @@ module LLT
324
338
 
325
339
  ABBR_NAME_WITH_DOT = /^(#{NAMES_PIPED})\.$/
326
340
  ROMAN_DATE_EXPR_WITH_DOT = /^(#{DATES_PIPED})\.$/
327
- PUNCT_ITSELF = Regexp.new(PUNCTUATION.source + '$')
341
+ PUNCT_ITSELF = Regexp.new("^(?:#{PUNCTUATION.source})$")
328
342
  XML_TAG = /<\/?.+?>/
329
343
 
330
344
  def create_tokens
@@ -1,5 +1,5 @@
1
1
  module LLT
2
2
  class Tokenizer
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.3"
4
4
  end
5
5
  end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe LLT::Token::Punctuation do
4
+ describe "#initialize" do
5
+ it "normalizes escaped xml characters" do
6
+ punct = LLT::Token::Punctuation.new('&amp;')
7
+ punct.to_s.should == '&'
8
+ end
9
+ end
10
+
11
+ describe "#as_xml" do
12
+ it "overrides LLT::Core::Containable#as_xml to use xml encodings" do
13
+ punct = LLT::Token::Punctuation.new('&')
14
+ punct.as_xml.should == "&amp;"
15
+ end
16
+ end
17
+ end
@@ -18,10 +18,16 @@ describe LLT::Tokenizer do
18
18
  end
19
19
 
20
20
  it "handles all kinds of parens as well as cruces" do
21
- txt = "Marcus (et Claudius) †amici† [sunt]."
21
+ txt = "<Marcus> (et Claudius) †amici† [sunt]."
22
22
  tokens = tokenizer.tokenize(txt)
23
- tokens.should have(12).items
24
- tokens.map(&:to_s).should == %w{ Marcus ( et Claudius ) † amici † [ sunt ] . }
23
+ tokens.should have(14).items
24
+ tokens.map(&:to_s).should == %w{ < Marcus > ( et Claudius ) † amici † [ sunt ] . }
25
+ end
26
+
27
+ it "handles escaped xml characters" do
28
+ txt = "&amp; &quot; &apos; &gt; &lt; ;"
29
+ tokens = tokenizer.tokenize(txt)
30
+ tokens.should have(6).items
25
31
  end
26
32
 
27
33
  describe "takes an optional keyword argument add_to" do
@@ -110,19 +116,11 @@ describe LLT::Tokenizer do
110
116
  end
111
117
  end
112
118
  end
113
-
114
- context "with embedded xml tags" do
115
- it "doesn't break" do
116
- txt = '<grc>text text</grc>'
117
- tokens = tokenizer.tokenize(txt)
118
- tokens.should have(4).items
119
- end
120
- end
121
119
  end
122
120
  end
123
121
 
124
122
  describe "#find_abbreviations_and_join_strings" do
125
- describe "should bring back abbreviation dots" do
123
+ describe "should bring back abbreviation dots and apostrophes" do
126
124
  it "with names" do
127
125
  tokenizer.setup("", {}, %w{ Atque Sex . et M . Cicero . })
128
126
  tokenizer.find_abbreviations_and_join_strings
@@ -134,6 +132,12 @@ describe LLT::Tokenizer do
134
132
  tokenizer.find_abbreviations_and_join_strings
135
133
  tokenizer.preliminary.should == %w{ a. d. V Kal. Apr. }
136
134
  end
135
+
136
+ it "with apostrophe" do
137
+ tokenizer.setup("", {}, %w{ ' Apostrophi ' sunt : po ' min ' vin ' tun' scin ' potin ' satin ' })
138
+ tokenizer.find_abbreviations_and_join_strings
139
+ tokenizer.preliminary.should == %w{ ' Apostrophi ' sunt : po' min' vin' tun' scin' potin' satin' }
140
+ end
137
141
  end
138
142
  end
139
143
 
@@ -252,10 +256,10 @@ describe LLT::Tokenizer do
252
256
  end
253
257
 
254
258
  examples = {
255
- "Word" => %w{ ita Marcus quoque -que },
259
+ "Word" => %w{ ita Marcus quoque -que po' },
256
260
  "Filler" => %w{ M. Sex. App. Ap. Tib. Ti. C. a. d. Kal. Ian. }, #I XI MMC }
257
261
  "XmlTag" => %w{ <grc> </grc> },
258
- "Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' }
262
+ "Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' & < > &amp; &lt; &gt; &apos; &quot; }
259
263
  }
260
264
 
261
265
  examples.each do |klass, elements|
@@ -278,37 +282,11 @@ describe LLT::Tokenizer do
278
282
  tokens.map(&:id).should == [1, 2]
279
283
  end
280
284
 
281
- it "can be disabled" do
285
+ it "id's can be disabled" do
282
286
  txt = 'Cano.'
283
287
  tokens = tokenizer.tokenize(txt, indexing: false)
284
288
  tokens.map(&:id).should == [nil, nil]
285
289
  end
286
-
287
- it "doesn't count plain xml tags" do
288
- txt = '<grc>text text</grc>'
289
- tokens = tokenizer.tokenize(txt)
290
- tokens.map(&:id).should == [nil, 1, 2, nil]
291
- end
292
-
293
- it "doesn't count xml tags when they come with attributes" do
294
- txt = '<foreign lang="lat">Graeca</foreign> lingua est.'
295
- tokens = tokenizer.tokenize(txt).map(&:to_s)
296
- res = ['<foreign lang="lat">', 'Graeca', '</foreign>', 'lingua', 'est', '.']
297
- tokens.should == res
298
- end
299
-
300
- it "handles nested xml as well" do
301
- txt = '<l n="70"><foreign lang="lat">Graeca lingua est.</foreign></l>'
302
- tokens = tokenizer.tokenize(txt).map(&:to_s)
303
- res = ['<l n="70">', '<foreign lang="lat">', 'Graeca', 'lingua', 'est', '.', '</foreign>', '</l>']
304
- tokens.should == res
305
- end
306
-
307
- it "handles text with broken off xml tags (the rest will e.g. be in another sentence)" do
308
- txt = "<lg org=\"uniform\" sample=\"complete\"><l>quem vocet divum populus ruentis</l><l>imperi rebus?"
309
- tokens = tokenizer.tokenize(txt)
310
- tokens.should have(12).items
311
- end
312
290
  end
313
291
 
314
292
  context "with options" do
@@ -346,11 +324,68 @@ describe LLT::Tokenizer do
346
324
  tokens.should == %w{ quam diu cano ? }
347
325
  end
348
326
  end
327
+
328
+ context "with disabled splitting" do
329
+ it "doesn't split enclitics" do
330
+ txt = 'arma virumque cano.'
331
+ opts = { splitting: false }
332
+ tokens = tokenizer.tokenize(txt, opts).map(&:to_s)
333
+ tokens.should == %w{ arma virumque cano . }
334
+ end
335
+ end
336
+
337
+ context "with xml handling enabled" do
338
+ let(:xml_tokenizer) { LLT::Tokenizer.new(db: stub_db, xml: true) }
339
+
340
+ it "doesn't break when xml is embedded" do
341
+ txt = '<grc>text text</grc>'
342
+ tokens = xml_tokenizer.tokenize(txt)
343
+ tokens.should have(4).items
344
+ end
345
+
346
+ it "doesn't count plain xml tags" do
347
+ txt = '<grc>text text</grc>'
348
+ tokens = xml_tokenizer.tokenize(txt)
349
+ tokens.map(&:id).should == [nil, 1, 2, nil]
350
+ end
351
+
352
+ it "doesn't count xml tags when they come with attributes" do
353
+ txt = '<foreign lang="lat">Graeca</foreign> lingua est.'
354
+ tokens = xml_tokenizer.tokenize(txt).map(&:to_s)
355
+ res = ['<foreign lang="lat">', 'Graeca', '</foreign>', 'lingua', 'est', '.']
356
+ tokens.should == res
357
+ end
358
+
359
+ it "handles nested xml as well" do
360
+ txt = '<l n="70"><foreign lang="lat">Graeca lingua est.</foreign></l>'
361
+ tokens = xml_tokenizer.tokenize(txt).map(&:to_s)
362
+ res = ['<l n="70">', '<foreign lang="lat">', 'Graeca', 'lingua', 'est', '.', '</foreign>', '</l>']
363
+ tokens.should == res
364
+ end
365
+
366
+ it "handles text with broken off xml tags (the rest will e.g. be in another sentence)" do
367
+ txt = "<lg org=\"uniform\" sample=\"complete\"><l>quem vocet divum populus ruentis</l><l>imperi rebus?"
368
+ tokens = xml_tokenizer.tokenize(txt)
369
+ tokens.should have(12).items
370
+ end
371
+
372
+ it "doesn't fall with spaces inside of xml attributes" do
373
+ txt = '<test>veni vidi <bad att="a a a">vici</bad></test>'
374
+ tokens = xml_tokenizer.tokenize(txt)
375
+ tokens.should have(7).items
376
+ end
377
+
378
+ it "expects all text chevrons to be escaped, otherwise they are xml tags!" do
379
+ txt = '<test>&lt;veni&gt;</test>'
380
+ tokens = xml_tokenizer.tokenize(txt)
381
+ tokens.should have(5).item
382
+ end
383
+ end
349
384
  end
350
385
  end
351
386
 
352
387
  context "with options on instance creation" do
353
- it "a new instance can receive options, which it will use as it's defaults" do
388
+ it "a new instance can receive options, which it will use as its defaults" do
354
389
  custom_tok = LLT::Tokenizer.new(db: stub_db,
355
390
  shifting: false,
356
391
  enclitics_marker: '')
metadata CHANGED
@@ -1,141 +1,141 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llt-tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - LFDM
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-09 00:00:00.000000000 Z
11
+ date: 2014-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.3'
20
15
  requirement: !ruby/object:Gem::Requirement
21
16
  requirements:
22
- - - ~>
17
+ - - "~>"
23
18
  - !ruby/object:Gem::Version
24
19
  version: '1.3'
25
- prerelease: false
26
20
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
21
+ prerelease: false
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - "~>"
32
25
  - !ruby/object:Gem::Version
33
- version: '0'
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
34
29
  requirement: !ruby/object:Gem::Requirement
35
30
  requirements:
36
- - - '>='
31
+ - - ">="
37
32
  - !ruby/object:Gem::Version
38
33
  version: '0'
39
- prerelease: false
40
34
  type: :development
41
- - !ruby/object:Gem::Dependency
42
- name: rspec
35
+ prerelease: false
43
36
  version_requirements: !ruby/object:Gem::Requirement
44
37
  requirements:
45
- - - '>='
38
+ - - ">="
46
39
  - !ruby/object:Gem::Version
47
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
48
43
  requirement: !ruby/object:Gem::Requirement
49
44
  requirements:
50
- - - '>='
45
+ - - ">="
51
46
  - !ruby/object:Gem::Version
52
47
  version: '0'
53
- prerelease: false
54
48
  type: :development
55
- - !ruby/object:Gem::Dependency
56
- name: simplecov
49
+ prerelease: false
57
50
  version_requirements: !ruby/object:Gem::Requirement
58
51
  requirements:
59
- - - ~>
52
+ - - ">="
60
53
  - !ruby/object:Gem::Version
61
- version: '0.7'
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: simplecov
62
57
  requirement: !ruby/object:Gem::Requirement
63
58
  requirements:
64
- - - ~>
59
+ - - "~>"
65
60
  - !ruby/object:Gem::Version
66
61
  version: '0.7'
67
- prerelease: false
68
62
  type: :development
69
- - !ruby/object:Gem::Dependency
70
- name: array_scanner
63
+ prerelease: false
71
64
  version_requirements: !ruby/object:Gem::Requirement
72
65
  requirements:
73
- - - '>='
66
+ - - "~>"
74
67
  - !ruby/object:Gem::Version
75
- version: '0'
68
+ version: '0.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: array_scanner
76
71
  requirement: !ruby/object:Gem::Requirement
77
72
  requirements:
78
- - - '>='
73
+ - - ">="
79
74
  - !ruby/object:Gem::Version
80
75
  version: '0'
81
- prerelease: false
82
76
  type: :runtime
83
- - !ruby/object:Gem::Dependency
84
- name: llt-core
77
+ prerelease: false
85
78
  version_requirements: !ruby/object:Gem::Requirement
86
79
  requirements:
87
- - - '>='
80
+ - - ">="
88
81
  - !ruby/object:Gem::Version
89
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: llt-core
90
85
  requirement: !ruby/object:Gem::Requirement
91
86
  requirements:
92
- - - '>='
87
+ - - ">="
93
88
  - !ruby/object:Gem::Version
94
89
  version: '0'
95
- prerelease: false
96
90
  type: :runtime
97
- - !ruby/object:Gem::Dependency
98
- name: llt-core_extensions
91
+ prerelease: false
99
92
  version_requirements: !ruby/object:Gem::Requirement
100
93
  requirements:
101
- - - '>='
94
+ - - ">="
102
95
  - !ruby/object:Gem::Version
103
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: llt-core_extensions
104
99
  requirement: !ruby/object:Gem::Requirement
105
100
  requirements:
106
- - - '>='
101
+ - - ">="
107
102
  - !ruby/object:Gem::Version
108
103
  version: '0'
109
- prerelease: false
110
104
  type: :runtime
111
- - !ruby/object:Gem::Dependency
112
- name: llt-db_handler
105
+ prerelease: false
113
106
  version_requirements: !ruby/object:Gem::Requirement
114
107
  requirements:
115
- - - '>='
108
+ - - ">="
116
109
  - !ruby/object:Gem::Version
117
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: llt-db_handler
118
113
  requirement: !ruby/object:Gem::Requirement
119
114
  requirements:
120
- - - '>='
115
+ - - ">="
121
116
  - !ruby/object:Gem::Version
122
117
  version: '0'
123
- prerelease: false
124
118
  type: :runtime
125
- - !ruby/object:Gem::Dependency
126
- name: llt-helpers
119
+ prerelease: false
127
120
  version_requirements: !ruby/object:Gem::Requirement
128
121
  requirements:
129
- - - '>='
122
+ - - ">="
130
123
  - !ruby/object:Gem::Version
131
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: llt-helpers
132
127
  requirement: !ruby/object:Gem::Requirement
133
128
  requirements:
134
- - - '>='
129
+ - - ">="
135
130
  - !ruby/object:Gem::Version
136
131
  version: '0'
137
- prerelease: false
138
132
  type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
139
  description: LLT's Tokenizer
140
140
  email:
141
141
  - 1986gh@gmail.com
@@ -143,9 +143,9 @@ executables: []
143
143
  extensions: []
144
144
  extra_rdoc_files: []
145
145
  files:
146
- - .gitignore
147
- - .rspec
148
- - .travis.yml
146
+ - ".gitignore"
147
+ - ".rspec"
148
+ - ".travis.yml"
149
149
  - Gemfile
150
150
  - LICENSE.txt
151
151
  - README.md
@@ -160,6 +160,7 @@ files:
160
160
  - lib/llt/tokenizer/version.rb
161
161
  - lib/llt/tokenizer/worker.rb
162
162
  - llt-tokenizer.gemspec
163
+ - spec/lib/llt/token/punctuation_spec.rb
163
164
  - spec/lib/llt/tokenizer/api_spec.rb
164
165
  - spec/lib/llt/tokenizer_spec.rb
165
166
  - spec/spec_helper.rb
@@ -168,27 +169,28 @@ homepage: ''
168
169
  licenses:
169
170
  - MIT
170
171
  metadata: {}
171
- post_install_message:
172
+ post_install_message:
172
173
  rdoc_options: []
173
174
  require_paths:
174
175
  - lib
175
176
  required_ruby_version: !ruby/object:Gem::Requirement
176
177
  requirements:
177
- - - '>='
178
+ - - ">="
178
179
  - !ruby/object:Gem::Version
179
180
  version: '0'
180
181
  required_rubygems_version: !ruby/object:Gem::Requirement
181
182
  requirements:
182
- - - '>='
183
+ - - ">="
183
184
  - !ruby/object:Gem::Version
184
185
  version: '0'
185
186
  requirements: []
186
- rubyforge_project:
187
- rubygems_version: 2.1.9
188
- signing_key:
187
+ rubyforge_project:
188
+ rubygems_version: 2.2.0
189
+ signing_key:
189
190
  specification_version: 4
190
191
  summary: Breaks latin sentences into tokens
191
192
  test_files:
193
+ - spec/lib/llt/token/punctuation_spec.rb
192
194
  - spec/lib/llt/tokenizer/api_spec.rb
193
195
  - spec/lib/llt/tokenizer_spec.rb
194
196
  - spec/spec_helper.rb