llt-tokenizer 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/Gemfile +7 -9
- data/README.md +6 -0
- data/lib/llt/token.rb +8 -0
- data/lib/llt/tokenizer.rb +15 -6
- data/lib/llt/tokenizer/api.rb +3 -0
- data/lib/llt/tokenizer/greek.rb +69 -0
- data/lib/llt/tokenizer/version.rb +1 -1
- data/lib/llt/tokenizer/version_info.rb +7 -0
- data/llt-tokenizer.gemspec +1 -1
- data/spec/lib/llt/token_spec.rb +25 -0
- data/spec/lib/llt/tokenizer/greek_spec.rb +66 -0
- data/spec/lib/llt/tokenizer_spec.rb +4 -1
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a9abfc5e79b148f497749053c8ccfa7ac9653af
|
4
|
+
data.tar.gz: 1c9fe20eb2824eccc1840602beae6552415eb5d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3cd367d754d75f895240c709aed9697140c8359490bc634e56f118b77cc015c2a08c80d7fa4fa74448084844beec4749a7b01b1789c0805a3a5a8fa8d465d5e9
|
7
|
+
data.tar.gz: 21c50a75955cab805fb81bc1435963e047171936c015981121de1405378fb4af9c21a69153c0c043d3a504986e1022437690cedb60b88e0b8246ca6fce20565b
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -5,15 +5,15 @@ gemspec
|
|
5
5
|
|
6
6
|
gem 'coveralls', require: false
|
7
7
|
|
8
|
-
gem 'llt-core', git: 'git
|
9
|
-
gem 'llt-core_extensions', git: 'git
|
10
|
-
gem 'llt-constants', git: 'git
|
11
|
-
gem 'llt-db_handler', git: 'git
|
12
|
-
gem 'llt-db_handler-stub', git: 'git
|
13
|
-
gem 'llt-helpers', git: 'git
|
8
|
+
gem 'llt-core', git: 'git://github.com/latin-language-toolkit/llt-core.git'
|
9
|
+
gem 'llt-core_extensions', git: 'git://github.com/latin-language-toolkit/llt-core_extensions.git'
|
10
|
+
gem 'llt-constants', git: 'git://github.com/latin-language-toolkit/llt-constants.git'
|
11
|
+
gem 'llt-db_handler', git: 'git://github.com/latin-language-toolkit/llt-db_handler.git'
|
12
|
+
gem 'llt-db_handler-stub', git: 'git://github.com/latin-language-toolkit/llt-db_handler-stub.git'
|
13
|
+
gem 'llt-helpers', git: 'git://github.com/latin-language-toolkit/llt-helpers.git'
|
14
14
|
|
15
15
|
# Dependencies of db_handler
|
16
|
-
gem 'llt-form_builder', git: 'git
|
16
|
+
gem 'llt-form_builder', git: 'git://github.com/latin-language-toolkit/llt-form_builder.git'
|
17
17
|
|
18
18
|
platform :ruby do
|
19
19
|
gem 'pg'
|
@@ -23,5 +23,3 @@ platform :jruby do
|
|
23
23
|
gem 'activerecord-jdbcpostgresql-adapter'
|
24
24
|
gem 'jruby-httpclient'
|
25
25
|
end
|
26
|
-
|
27
|
-
gem 'pry'
|
data/README.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# LLT::Tokenizer
|
2
2
|
|
3
|
+
[![Version](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/badge_fury.png)](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/badge_fury)
|
4
|
+
[![Dependencies](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/gemnasium.png)](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/gemnasium)
|
5
|
+
[![Build Status](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/travis.png)](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/travis)
|
6
|
+
[![Coverage](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/coveralls.png)](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/coveralls)
|
7
|
+
[![Code Climate](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/code_climate.png)](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/code_climate)
|
8
|
+
|
3
9
|
Flexible service to tokenize Latin texts.
|
4
10
|
|
5
11
|
## Installation
|
data/lib/llt/token.rb
CHANGED
@@ -34,6 +34,10 @@ module LLT
|
|
34
34
|
@special_roles += roles
|
35
35
|
end
|
36
36
|
|
37
|
+
def ==(other)
|
38
|
+
to_s.downcase == other.to_s.downcase
|
39
|
+
end
|
40
|
+
|
37
41
|
# deprecated
|
38
42
|
def add_form(form)
|
39
43
|
@forms << form
|
@@ -47,5 +51,9 @@ module LLT
|
|
47
51
|
def use(*args)
|
48
52
|
# hook method, overwritten by Word
|
49
53
|
end
|
54
|
+
|
55
|
+
def set_functions(*args)
|
56
|
+
# hook method
|
57
|
+
end
|
50
58
|
end
|
51
59
|
end
|
data/lib/llt/tokenizer.rb
CHANGED
@@ -4,15 +4,19 @@ require 'llt/constants/abbreviations'
|
|
4
4
|
require 'llt/core_extensions/array'
|
5
5
|
require 'llt/db_handler/prometheus'
|
6
6
|
require 'llt/helpers/metrical'
|
7
|
+
require 'llt/tokenizer/version'
|
8
|
+
require 'llt/tokenizer/version_info'
|
7
9
|
|
8
10
|
module LLT
|
9
11
|
class Tokenizer
|
10
12
|
require 'llt/token'
|
11
13
|
require 'llt/tokenizer/worker'
|
14
|
+
require 'llt/tokenizer/greek'
|
12
15
|
|
13
16
|
include Core::Serviceable
|
14
17
|
include Constants::Abbreviations
|
15
18
|
include Helpers::Metrical
|
19
|
+
include Greek
|
16
20
|
|
17
21
|
uses_db { DbHandler::Prometheus.new }
|
18
22
|
|
@@ -26,6 +30,8 @@ module LLT
|
|
26
30
|
indexing: true,
|
27
31
|
splitting: true,
|
28
32
|
xml: false,
|
33
|
+
#for Greek
|
34
|
+
krasis_marker: '-'
|
29
35
|
}
|
30
36
|
end
|
31
37
|
|
@@ -36,6 +42,8 @@ module LLT
|
|
36
42
|
setup(text, options)
|
37
43
|
|
38
44
|
find_abbreviations_and_join_strings
|
45
|
+
#for Greek
|
46
|
+
split_krasis if @splitting
|
39
47
|
split_enklitika_and_change_their_position if @splitting
|
40
48
|
merge_what_needs_merging if @merging # quam diu => quamdiu
|
41
49
|
tokens = create_tokens
|
@@ -53,11 +61,13 @@ module LLT
|
|
53
61
|
@splitting = parse_option(:splitting, options)
|
54
62
|
@indexing = parse_option(:indexing, options)
|
55
63
|
@xml = parse_option(:xml, options)
|
64
|
+
#for Greek
|
65
|
+
@krasis_marker = parse_option(:krasis_marker, options)
|
56
66
|
@worker = setup_worker(worker)
|
57
67
|
@shift_range = shift_range(@shifting)
|
58
68
|
end
|
59
69
|
|
60
|
-
PUNCTUATION = /&(?:amp|quot|apos|lt|gt);|([\.\?,!;\-:"'”&\(\)\[\]
|
70
|
+
PUNCTUATION = /&(?:amp|quot|apos|lt|gt);|([\.\?,!;\-:"'”&\(\)\[\]†<>᾽·])\1*/
|
61
71
|
XML_TAG = /<\/?.+?>/
|
62
72
|
|
63
73
|
# This is here for two reasons:
|
@@ -129,7 +139,7 @@ module LLT
|
|
129
139
|
arr = []
|
130
140
|
@worker.each_with_index do |e, i|
|
131
141
|
n = @worker[i + 1]
|
132
|
-
if (n == '.' && e =~ ABBREVIATIONS) || (n == "'" && e =~ APOSTROPHE_WORDS)
|
142
|
+
if (n == '.' && e =~ ABBREVIATIONS) || (n == "'" && e =~ APOSTROPHE_WORDS) || greek_apostrophe(n,e)
|
133
143
|
@worker[i + 1] = n.prepend(e)
|
134
144
|
arr << (i - arr.size)
|
135
145
|
end
|
@@ -141,7 +151,7 @@ module LLT
|
|
141
151
|
######################
|
142
152
|
|
143
153
|
WORDS_ENDING_WITH_QUE = /^((un.{1,3})?[qc]u[aei].*que|qu[ao]que|itaque|atque|ut[er].*que|.*cumque|pler(.{1,2}|[oa]rum)que|denique|undique|usque)$/i # neque taken out!
|
144
|
-
WORDS_ENDING_WITH_NE = /^(omne|sine|bene|paene|iuvene)$/i
|
154
|
+
WORDS_ENDING_WITH_NE = /^(omne|sine|bene|paene|iuvene|siccine)$/i # generalize these words and start to look for them in the db, especiialy for adverbs
|
145
155
|
WORDS_ENDING_WITH_VE = /^(sive|neve)$/i
|
146
156
|
|
147
157
|
# laetusque to -que laetus
|
@@ -195,7 +205,7 @@ module LLT
|
|
195
205
|
def split_nec
|
196
206
|
indices = []
|
197
207
|
@worker.each_with_index do |token, i|
|
198
|
-
if token
|
208
|
+
if token =~ /^nec$/i
|
199
209
|
token.slice!(-1)
|
200
210
|
indices << (i + indices.size + @shift_range)
|
201
211
|
end
|
@@ -247,7 +257,7 @@ module LLT
|
|
247
257
|
entries = []
|
248
258
|
entries += lookup(orig_el, :noun, :nom) if orig_el =~ /io$/ # actio-ne ratio-ne
|
249
259
|
entries += lookup(orig_el + "n", :persona, :stem) if orig_el =~ /o$/ # Plato-ne Cicero-ne Solo-ne
|
250
|
-
entries += lookup(orig_el + "n", :noun, :stem, [3, 33])
|
260
|
+
entries += lookup(orig_el + "n", :noun, :stem, [3, 33]) # fortitudi-ne ratio-ne libidi-ne homi-ne fi-ne agmi-ne iuve-ne ig-ne
|
251
261
|
entries += lookup(orig_el + "n", :noun, :stem, 2) # domi-ne
|
252
262
|
entries += lookup(orig_el + "n", :adjective, :stem, [1,3]) # communis commune, or bonus
|
253
263
|
|
@@ -319,7 +329,6 @@ module LLT
|
|
319
329
|
end
|
320
330
|
end
|
321
331
|
|
322
|
-
|
323
332
|
######################
|
324
333
|
|
325
334
|
MERGE_WORDS = [ %w{ quam diu }, ['non', /null.{1,4}$/] ]
|
data/lib/llt/tokenizer/api.rb
CHANGED
@@ -5,6 +5,7 @@ require 'llt/core/api'
|
|
5
5
|
|
6
6
|
class Api < Sinatra::Base
|
7
7
|
register Sinatra::RespondWith
|
8
|
+
register LLT::Core::Api::VersionRoutes
|
8
9
|
helpers LLT::Core::Api::Helpers
|
9
10
|
|
10
11
|
get '/tokenize' do
|
@@ -17,4 +18,6 @@ class Api < Sinatra::Base
|
|
17
18
|
f.xml { to_xml(tokens, params) }
|
18
19
|
end
|
19
20
|
end
|
21
|
+
|
22
|
+
add_version_route_for('/tokenize', dependencies: %i{ Core Tokenizer })
|
20
23
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module LLT
|
2
|
+
class Tokenizer
|
3
|
+
module Greek
|
4
|
+
PLAIN_VOWELS = %w(α ε ι η ο υ ω)
|
5
|
+
VOWELS_WITH_ACUTE = %w(ά έ ή ί ó ύ ώ)
|
6
|
+
VOWELS_WITH_GRAVE = %w(ὰ ὲ ὴ ì ò ὺ ὼ)
|
7
|
+
VOWELS_WITH_CIRCUMFLEX = %w(ᾶ ῆ ῖ ῦ ῶ)
|
8
|
+
VOWELS_WITH_IOTA = %w(ᾲ ᾳ ᾴ ᾷ ῂ ῃ ῄ ῇ ῲ ῳ ῴ ῷ)
|
9
|
+
CONSONANTS = %w(β γ δ ζ θ κ λ μ ν ξ π ρ ῥ ῤ σ ς τ φ χ ψ)
|
10
|
+
VOWELS = [PLAIN_VOWELS,
|
11
|
+
VOWELS_WITH_ACUTE,
|
12
|
+
VOWELS_WITH_GRAVE,
|
13
|
+
VOWELS_WITH_CIRCUMFLEX,
|
14
|
+
VOWELS_WITH_IOTA
|
15
|
+
].flatten
|
16
|
+
|
17
|
+
SPIRITUS_LENIS = %w(ἀ ἐ ἠ ἰ ὀ ὐ ὠ)
|
18
|
+
SPIRITUS_LENIS_WITH_GRAVE = %w(ἂ ἒ ἲ ἢ ὂ ὒ ὢ)
|
19
|
+
SPIRITUS_LENIS_WITH_ACUTE = %w(ἄ ἔ ἴ ἤ ὄ ὔ ὤ)
|
20
|
+
SPIRITUS_LENIS_WITH_CIRCUMFLEX = %w(ἆ ἶ ἦ ὖ ὦ )
|
21
|
+
|
22
|
+
SPIRITUS_ASPER = %w(ἁ ἑ ἡ ἱ ὁ ὑ ὡ)
|
23
|
+
SPIRITUS_ASPER_WITH_GRAVE = %w(ἃ ἣ ἓ ἳ ὃ ὓ ὣ)
|
24
|
+
SPIRITUS_ASPER_WITH_ACUTE = %w(ἅ ἥ ἕ ἵ ὅ ὕ ὥ)
|
25
|
+
SPIRITUS_ASPER_WITH_CIRCUMFLEX = %w(ἇ ἷ ἧ ὗ ὧ)
|
26
|
+
|
27
|
+
SPIRITUS_WITH_IOTA = %w(ᾀ ᾁ ᾂ ᾃ ᾄ ᾅ ᾆ ᾇ ᾐ ᾑ ᾒ ᾓ ᾔ ᾕ ᾖ ᾗ ᾠ ᾡ ᾢ ᾣ ᾤ ᾥ ᾦ ᾧ)
|
28
|
+
|
29
|
+
VOWELS_WITH_SPIRITUS = [
|
30
|
+
SPIRITUS_LENIS,
|
31
|
+
SPIRITUS_LENIS_WITH_ACUTE,
|
32
|
+
SPIRITUS_LENIS_WITH_GRAVE,
|
33
|
+
SPIRITUS_LENIS_WITH_CIRCUMFLEX,
|
34
|
+
SPIRITUS_ASPER,
|
35
|
+
SPIRITUS_ASPER_WITH_ACUTE,
|
36
|
+
SPIRITUS_ASPER_WITH_GRAVE,
|
37
|
+
SPIRITUS_ASPER_WITH_CIRCUMFLEX,
|
38
|
+
SPIRITUS_WITH_IOTA
|
39
|
+
].flatten
|
40
|
+
|
41
|
+
STARTING_VOWELS = Regexp.union(VOWELS_WITH_SPIRITUS)
|
42
|
+
CONS = Regexp.union(CONSONANTS)
|
43
|
+
ALL = Regexp.union([CONSONANTS, VOWELS].flatten)
|
44
|
+
|
45
|
+
def split_krasis
|
46
|
+
@worker.each_with_index do |token, i|
|
47
|
+
if resolved_krasis = contains_krasis(token)
|
48
|
+
@worker[i] = resolved_krasis
|
49
|
+
@worker.flatten!
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def krasis(token)
|
55
|
+
"#{token}#{@krasis_marker}"
|
56
|
+
end
|
57
|
+
|
58
|
+
def contains_krasis(token)
|
59
|
+
if token.match(/^(#{CONS})(#{PLAIN_VOWELS}?#{STARTING_VOWELS})(#{ALL}*)$/)
|
60
|
+
[krasis($1), $2+$3]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def greek_apostrophe(n, e)
|
65
|
+
(n == "᾽" && e =~ CONS)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/llt-tokenizer.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
|
-
spec.add_development_dependency "rspec"
|
23
|
+
spec.add_development_dependency "rspec", "2.14"
|
24
24
|
spec.add_development_dependency "simplecov", "~> 0.7"
|
25
25
|
spec.add_dependency "array_scanner"
|
26
26
|
spec.add_dependency "llt-core"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Token do
|
4
|
+
let(:token) { LLT::Token }
|
5
|
+
|
6
|
+
describe "#==" do
|
7
|
+
it "equals when two takes have the same string value" do
|
8
|
+
t1 = token.new('bene')
|
9
|
+
t2 = token.new('bene')
|
10
|
+
t1.should == t2
|
11
|
+
end
|
12
|
+
|
13
|
+
it "doesn't equal when the strings are different" do
|
14
|
+
t1 = token.new('bene')
|
15
|
+
t2 = token.new('male')
|
16
|
+
t1.should_not == t2
|
17
|
+
end
|
18
|
+
|
19
|
+
it "is case insensitive" do
|
20
|
+
t1 = token.new('bene')
|
21
|
+
t2 = token.new('Bene')
|
22
|
+
t1.should == t2
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Tokenizer::Greek do
|
4
|
+
let(:tokenizer) { LLT::Tokenizer.new }
|
5
|
+
let(:greek_txt) { "καὶ διὰ τῆς περὶ τὴν ἀρχαιολογίαν συγγραφῆς."}
|
6
|
+
let(:krasis) { "κἄπειτα." }
|
7
|
+
let(:double_krasis) { "κἄπειτα τῆς περὶ τὴν ἀρχαιολογίαν κἄπειτα." }
|
8
|
+
let(:diphtong) { "τοὔνομα." }
|
9
|
+
|
10
|
+
context "with greek tokens" do
|
11
|
+
describe "#tokenize" do
|
12
|
+
it "tokenizes a string" do
|
13
|
+
res = tokenizer.tokenize(greek_txt)
|
14
|
+
res.should == %w(καὶ διὰ τῆς περὶ τὴν ἀρχαιολογίαν συγγραφῆς .)
|
15
|
+
res.should have(8).items
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "with a string that contains an apostrophe" do
|
19
|
+
it "returns one token to which the apostrophe is attached" do
|
20
|
+
txt = "εὖ δ᾽ ἴστε."
|
21
|
+
res = tokenizer.tokenize(txt)
|
22
|
+
res.should == %w(εὖ δ᾽ ἴστε .)
|
23
|
+
res.should have(4).items
|
24
|
+
end
|
25
|
+
|
26
|
+
it "splits two tokens combined by an apostrophe" do
|
27
|
+
txt = "εὖ δ᾽ἴστε."
|
28
|
+
res = tokenizer.tokenize(txt)
|
29
|
+
res.should == %w(εὖ δ᾽ ἴστε .)
|
30
|
+
res.should have(4).items
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "handles krasis" do
|
36
|
+
it "splits a krasis into two words" do
|
37
|
+
res = tokenizer.tokenize(krasis)
|
38
|
+
res.should have(3).items
|
39
|
+
res.should == %w( κ- ἄπειτα . )
|
40
|
+
end
|
41
|
+
|
42
|
+
it "handles a dipthong krasis" do
|
43
|
+
res = tokenizer.tokenize(diphtong)
|
44
|
+
res.should have(3).items
|
45
|
+
end
|
46
|
+
|
47
|
+
it "splits two kraseis in a sentence" do
|
48
|
+
res = tokenizer.tokenize(double_krasis)
|
49
|
+
res.should have(9).items
|
50
|
+
res[2].should == "τῆς"
|
51
|
+
res[8].should == "."
|
52
|
+
end
|
53
|
+
|
54
|
+
context "with options" do
|
55
|
+
context "with disabled splitting" do
|
56
|
+
it "doesn't split krasis" do
|
57
|
+
txt = 'κἄπειτα.'
|
58
|
+
opts = { splitting: false }
|
59
|
+
tokens = tokenizer.tokenize(txt, opts).map(&:to_s)
|
60
|
+
tokens.should == %w{ κἄπειτα . }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -158,7 +158,9 @@ describe LLT::Tokenizer do
|
|
158
158
|
"ad eamque" => "-que ad eam",
|
159
159
|
"ob easque" => "-que ob eas",
|
160
160
|
"neque" => "-que ne",
|
161
|
+
"Neque" => "-que Ne",
|
161
162
|
"nec" => "-c ne",
|
163
|
+
"Nec" => "-c Ne",
|
162
164
|
"Atque" => "Atque",
|
163
165
|
"atque" => "atque",
|
164
166
|
"cuiusque" => "cuiusque",
|
@@ -208,6 +210,7 @@ describe LLT::Tokenizer do
|
|
208
210
|
"fine" => "fine",
|
209
211
|
"iuvene" => "iuvene",
|
210
212
|
"sanguine" => "sanguine",
|
213
|
+
"igne" => "igne",
|
211
214
|
|
212
215
|
# frequent patterns in third declension adjective
|
213
216
|
"commune" => "commune",
|
@@ -282,7 +285,7 @@ describe LLT::Tokenizer do
|
|
282
285
|
"Word" => %w{ ita Marcus quoque -que po' },
|
283
286
|
"Filler" => %w{ M. Sex. App. Ap. Tib. Ti. C. a. d. Kal. Ian. }, #I XI MMC }
|
284
287
|
"XmlTag" => %w{ <grc> </grc> },
|
285
|
-
"Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' & < > & < > ' " }
|
288
|
+
"Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' ᾽ · & < > & < > ' " }
|
286
289
|
}
|
287
290
|
|
288
291
|
examples.each do |klass, elements|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llt-tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- LFDM
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,16 +42,16 @@ dependencies:
|
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - '='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '2.14'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - '='
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '2.14'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -158,11 +158,15 @@ files:
|
|
158
158
|
- lib/llt/token/xml_tag.rb
|
159
159
|
- lib/llt/tokenizer.rb
|
160
160
|
- lib/llt/tokenizer/api.rb
|
161
|
+
- lib/llt/tokenizer/greek.rb
|
161
162
|
- lib/llt/tokenizer/version.rb
|
163
|
+
- lib/llt/tokenizer/version_info.rb
|
162
164
|
- lib/llt/tokenizer/worker.rb
|
163
165
|
- llt-tokenizer.gemspec
|
164
166
|
- spec/lib/llt/token/punctuation_spec.rb
|
167
|
+
- spec/lib/llt/token_spec.rb
|
165
168
|
- spec/lib/llt/tokenizer/api_spec.rb
|
169
|
+
- spec/lib/llt/tokenizer/greek_spec.rb
|
166
170
|
- spec/lib/llt/tokenizer_spec.rb
|
167
171
|
- spec/spec_helper.rb
|
168
172
|
- spec/support/matchers/tokenizer.rb
|
@@ -192,7 +196,9 @@ specification_version: 4
|
|
192
196
|
summary: Breaks latin sentences into tokens
|
193
197
|
test_files:
|
194
198
|
- spec/lib/llt/token/punctuation_spec.rb
|
199
|
+
- spec/lib/llt/token_spec.rb
|
195
200
|
- spec/lib/llt/tokenizer/api_spec.rb
|
201
|
+
- spec/lib/llt/tokenizer/greek_spec.rb
|
196
202
|
- spec/lib/llt/tokenizer_spec.rb
|
197
203
|
- spec/spec_helper.rb
|
198
204
|
- spec/support/matchers/tokenizer.rb
|