llt-tokenizer 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/Gemfile +7 -9
- data/README.md +6 -0
- data/lib/llt/token.rb +8 -0
- data/lib/llt/tokenizer.rb +15 -6
- data/lib/llt/tokenizer/api.rb +3 -0
- data/lib/llt/tokenizer/greek.rb +69 -0
- data/lib/llt/tokenizer/version.rb +1 -1
- data/lib/llt/tokenizer/version_info.rb +7 -0
- data/llt-tokenizer.gemspec +1 -1
- data/spec/lib/llt/token_spec.rb +25 -0
- data/spec/lib/llt/tokenizer/greek_spec.rb +66 -0
- data/spec/lib/llt/tokenizer_spec.rb +4 -1
- metadata +12 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a9abfc5e79b148f497749053c8ccfa7ac9653af
|
4
|
+
data.tar.gz: 1c9fe20eb2824eccc1840602beae6552415eb5d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3cd367d754d75f895240c709aed9697140c8359490bc634e56f118b77cc015c2a08c80d7fa4fa74448084844beec4749a7b01b1789c0805a3a5a8fa8d465d5e9
|
7
|
+
data.tar.gz: 21c50a75955cab805fb81bc1435963e047171936c015981121de1405378fb4af9c21a69153c0c043d3a504986e1022437690cedb60b88e0b8246ca6fce20565b
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -5,15 +5,15 @@ gemspec
|
|
5
5
|
|
6
6
|
gem 'coveralls', require: false
|
7
7
|
|
8
|
-
gem 'llt-core', git: 'git
|
9
|
-
gem 'llt-core_extensions', git: 'git
|
10
|
-
gem 'llt-constants', git: 'git
|
11
|
-
gem 'llt-db_handler', git: 'git
|
12
|
-
gem 'llt-db_handler-stub', git: 'git
|
13
|
-
gem 'llt-helpers', git: 'git
|
8
|
+
gem 'llt-core', git: 'git://github.com/latin-language-toolkit/llt-core.git'
|
9
|
+
gem 'llt-core_extensions', git: 'git://github.com/latin-language-toolkit/llt-core_extensions.git'
|
10
|
+
gem 'llt-constants', git: 'git://github.com/latin-language-toolkit/llt-constants.git'
|
11
|
+
gem 'llt-db_handler', git: 'git://github.com/latin-language-toolkit/llt-db_handler.git'
|
12
|
+
gem 'llt-db_handler-stub', git: 'git://github.com/latin-language-toolkit/llt-db_handler-stub.git'
|
13
|
+
gem 'llt-helpers', git: 'git://github.com/latin-language-toolkit/llt-helpers.git'
|
14
14
|
|
15
15
|
# Dependencies of db_handler
|
16
|
-
gem 'llt-form_builder', git: 'git
|
16
|
+
gem 'llt-form_builder', git: 'git://github.com/latin-language-toolkit/llt-form_builder.git'
|
17
17
|
|
18
18
|
platform :ruby do
|
19
19
|
gem 'pg'
|
@@ -23,5 +23,3 @@ platform :jruby do
|
|
23
23
|
gem 'activerecord-jdbcpostgresql-adapter'
|
24
24
|
gem 'jruby-httpclient'
|
25
25
|
end
|
26
|
-
|
27
|
-
gem 'pry'
|
data/README.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# LLT::Tokenizer
|
2
2
|
|
3
|
+
[](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/badge_fury)
|
4
|
+
[](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/gemnasium)
|
5
|
+
[](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/travis)
|
6
|
+
[](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/coveralls)
|
7
|
+
[](http://allthebadges.io/latin-language-toolkit/llt-tokenizer/code_climate)
|
8
|
+
|
3
9
|
Flexible service to tokenize Latin texts.
|
4
10
|
|
5
11
|
## Installation
|
data/lib/llt/token.rb
CHANGED
@@ -34,6 +34,10 @@ module LLT
|
|
34
34
|
@special_roles += roles
|
35
35
|
end
|
36
36
|
|
37
|
+
def ==(other)
|
38
|
+
to_s.downcase == other.to_s.downcase
|
39
|
+
end
|
40
|
+
|
37
41
|
# deprecated
|
38
42
|
def add_form(form)
|
39
43
|
@forms << form
|
@@ -47,5 +51,9 @@ module LLT
|
|
47
51
|
def use(*args)
|
48
52
|
# hook method, overwritten by Word
|
49
53
|
end
|
54
|
+
|
55
|
+
def set_functions(*args)
|
56
|
+
# hook method
|
57
|
+
end
|
50
58
|
end
|
51
59
|
end
|
data/lib/llt/tokenizer.rb
CHANGED
@@ -4,15 +4,19 @@ require 'llt/constants/abbreviations'
|
|
4
4
|
require 'llt/core_extensions/array'
|
5
5
|
require 'llt/db_handler/prometheus'
|
6
6
|
require 'llt/helpers/metrical'
|
7
|
+
require 'llt/tokenizer/version'
|
8
|
+
require 'llt/tokenizer/version_info'
|
7
9
|
|
8
10
|
module LLT
|
9
11
|
class Tokenizer
|
10
12
|
require 'llt/token'
|
11
13
|
require 'llt/tokenizer/worker'
|
14
|
+
require 'llt/tokenizer/greek'
|
12
15
|
|
13
16
|
include Core::Serviceable
|
14
17
|
include Constants::Abbreviations
|
15
18
|
include Helpers::Metrical
|
19
|
+
include Greek
|
16
20
|
|
17
21
|
uses_db { DbHandler::Prometheus.new }
|
18
22
|
|
@@ -26,6 +30,8 @@ module LLT
|
|
26
30
|
indexing: true,
|
27
31
|
splitting: true,
|
28
32
|
xml: false,
|
33
|
+
#for Greek
|
34
|
+
krasis_marker: '-'
|
29
35
|
}
|
30
36
|
end
|
31
37
|
|
@@ -36,6 +42,8 @@ module LLT
|
|
36
42
|
setup(text, options)
|
37
43
|
|
38
44
|
find_abbreviations_and_join_strings
|
45
|
+
#for Greek
|
46
|
+
split_krasis if @splitting
|
39
47
|
split_enklitika_and_change_their_position if @splitting
|
40
48
|
merge_what_needs_merging if @merging # quam diu => quamdiu
|
41
49
|
tokens = create_tokens
|
@@ -53,11 +61,13 @@ module LLT
|
|
53
61
|
@splitting = parse_option(:splitting, options)
|
54
62
|
@indexing = parse_option(:indexing, options)
|
55
63
|
@xml = parse_option(:xml, options)
|
64
|
+
#for Greek
|
65
|
+
@krasis_marker = parse_option(:krasis_marker, options)
|
56
66
|
@worker = setup_worker(worker)
|
57
67
|
@shift_range = shift_range(@shifting)
|
58
68
|
end
|
59
69
|
|
60
|
-
PUNCTUATION = /&(?:amp|quot|apos|lt|gt);|([\.\?,!;\-:"'”&\(\)\[\]
|
70
|
+
PUNCTUATION = /&(?:amp|quot|apos|lt|gt);|([\.\?,!;\-:"'”&\(\)\[\]†<>᾽·])\1*/
|
61
71
|
XML_TAG = /<\/?.+?>/
|
62
72
|
|
63
73
|
# This is here for two reasons:
|
@@ -129,7 +139,7 @@ module LLT
|
|
129
139
|
arr = []
|
130
140
|
@worker.each_with_index do |e, i|
|
131
141
|
n = @worker[i + 1]
|
132
|
-
if (n == '.' && e =~ ABBREVIATIONS) || (n == "'" && e =~ APOSTROPHE_WORDS)
|
142
|
+
if (n == '.' && e =~ ABBREVIATIONS) || (n == "'" && e =~ APOSTROPHE_WORDS) || greek_apostrophe(n,e)
|
133
143
|
@worker[i + 1] = n.prepend(e)
|
134
144
|
arr << (i - arr.size)
|
135
145
|
end
|
@@ -141,7 +151,7 @@ module LLT
|
|
141
151
|
######################
|
142
152
|
|
143
153
|
WORDS_ENDING_WITH_QUE = /^((un.{1,3})?[qc]u[aei].*que|qu[ao]que|itaque|atque|ut[er].*que|.*cumque|pler(.{1,2}|[oa]rum)que|denique|undique|usque)$/i # neque taken out!
|
144
|
-
WORDS_ENDING_WITH_NE = /^(omne|sine|bene|paene|iuvene)$/i
|
154
|
+
WORDS_ENDING_WITH_NE = /^(omne|sine|bene|paene|iuvene|siccine)$/i # generalize these words and start to look for them in the db, especiialy for adverbs
|
145
155
|
WORDS_ENDING_WITH_VE = /^(sive|neve)$/i
|
146
156
|
|
147
157
|
# laetusque to -que laetus
|
@@ -195,7 +205,7 @@ module LLT
|
|
195
205
|
def split_nec
|
196
206
|
indices = []
|
197
207
|
@worker.each_with_index do |token, i|
|
198
|
-
if token
|
208
|
+
if token =~ /^nec$/i
|
199
209
|
token.slice!(-1)
|
200
210
|
indices << (i + indices.size + @shift_range)
|
201
211
|
end
|
@@ -247,7 +257,7 @@ module LLT
|
|
247
257
|
entries = []
|
248
258
|
entries += lookup(orig_el, :noun, :nom) if orig_el =~ /io$/ # actio-ne ratio-ne
|
249
259
|
entries += lookup(orig_el + "n", :persona, :stem) if orig_el =~ /o$/ # Plato-ne Cicero-ne Solo-ne
|
250
|
-
entries += lookup(orig_el + "n", :noun, :stem, [3, 33])
|
260
|
+
entries += lookup(orig_el + "n", :noun, :stem, [3, 33]) # fortitudi-ne ratio-ne libidi-ne homi-ne fi-ne agmi-ne iuve-ne ig-ne
|
251
261
|
entries += lookup(orig_el + "n", :noun, :stem, 2) # domi-ne
|
252
262
|
entries += lookup(orig_el + "n", :adjective, :stem, [1,3]) # communis commune, or bonus
|
253
263
|
|
@@ -319,7 +329,6 @@ module LLT
|
|
319
329
|
end
|
320
330
|
end
|
321
331
|
|
322
|
-
|
323
332
|
######################
|
324
333
|
|
325
334
|
MERGE_WORDS = [ %w{ quam diu }, ['non', /null.{1,4}$/] ]
|
data/lib/llt/tokenizer/api.rb
CHANGED
@@ -5,6 +5,7 @@ require 'llt/core/api'
|
|
5
5
|
|
6
6
|
class Api < Sinatra::Base
|
7
7
|
register Sinatra::RespondWith
|
8
|
+
register LLT::Core::Api::VersionRoutes
|
8
9
|
helpers LLT::Core::Api::Helpers
|
9
10
|
|
10
11
|
get '/tokenize' do
|
@@ -17,4 +18,6 @@ class Api < Sinatra::Base
|
|
17
18
|
f.xml { to_xml(tokens, params) }
|
18
19
|
end
|
19
20
|
end
|
21
|
+
|
22
|
+
add_version_route_for('/tokenize', dependencies: %i{ Core Tokenizer })
|
20
23
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module LLT
|
2
|
+
class Tokenizer
|
3
|
+
module Greek
|
4
|
+
PLAIN_VOWELS = %w(α ε ι η ο υ ω)
|
5
|
+
VOWELS_WITH_ACUTE = %w(ά έ ή ί ó ύ ώ)
|
6
|
+
VOWELS_WITH_GRAVE = %w(ὰ ὲ ὴ ì ò ὺ ὼ)
|
7
|
+
VOWELS_WITH_CIRCUMFLEX = %w(ᾶ ῆ ῖ ῦ ῶ)
|
8
|
+
VOWELS_WITH_IOTA = %w(ᾲ ᾳ ᾴ ᾷ ῂ ῃ ῄ ῇ ῲ ῳ ῴ ῷ)
|
9
|
+
CONSONANTS = %w(β γ δ ζ θ κ λ μ ν ξ π ρ ῥ ῤ σ ς τ φ χ ψ)
|
10
|
+
VOWELS = [PLAIN_VOWELS,
|
11
|
+
VOWELS_WITH_ACUTE,
|
12
|
+
VOWELS_WITH_GRAVE,
|
13
|
+
VOWELS_WITH_CIRCUMFLEX,
|
14
|
+
VOWELS_WITH_IOTA
|
15
|
+
].flatten
|
16
|
+
|
17
|
+
SPIRITUS_LENIS = %w(ἀ ἐ ἠ ἰ ὀ ὐ ὠ)
|
18
|
+
SPIRITUS_LENIS_WITH_GRAVE = %w(ἂ ἒ ἲ ἢ ὂ ὒ ὢ)
|
19
|
+
SPIRITUS_LENIS_WITH_ACUTE = %w(ἄ ἔ ἴ ἤ ὄ ὔ ὤ)
|
20
|
+
SPIRITUS_LENIS_WITH_CIRCUMFLEX = %w(ἆ ἶ ἦ ὖ ὦ )
|
21
|
+
|
22
|
+
SPIRITUS_ASPER = %w(ἁ ἑ ἡ ἱ ὁ ὑ ὡ)
|
23
|
+
SPIRITUS_ASPER_WITH_GRAVE = %w(ἃ ἣ ἓ ἳ ὃ ὓ ὣ)
|
24
|
+
SPIRITUS_ASPER_WITH_ACUTE = %w(ἅ ἥ ἕ ἵ ὅ ὕ ὥ)
|
25
|
+
SPIRITUS_ASPER_WITH_CIRCUMFLEX = %w(ἇ ἷ ἧ ὗ ὧ)
|
26
|
+
|
27
|
+
SPIRITUS_WITH_IOTA = %w(ᾀ ᾁ ᾂ ᾃ ᾄ ᾅ ᾆ ᾇ ᾐ ᾑ ᾒ ᾓ ᾔ ᾕ ᾖ ᾗ ᾠ ᾡ ᾢ ᾣ ᾤ ᾥ ᾦ ᾧ)
|
28
|
+
|
29
|
+
VOWELS_WITH_SPIRITUS = [
|
30
|
+
SPIRITUS_LENIS,
|
31
|
+
SPIRITUS_LENIS_WITH_ACUTE,
|
32
|
+
SPIRITUS_LENIS_WITH_GRAVE,
|
33
|
+
SPIRITUS_LENIS_WITH_CIRCUMFLEX,
|
34
|
+
SPIRITUS_ASPER,
|
35
|
+
SPIRITUS_ASPER_WITH_ACUTE,
|
36
|
+
SPIRITUS_ASPER_WITH_GRAVE,
|
37
|
+
SPIRITUS_ASPER_WITH_CIRCUMFLEX,
|
38
|
+
SPIRITUS_WITH_IOTA
|
39
|
+
].flatten
|
40
|
+
|
41
|
+
STARTING_VOWELS = Regexp.union(VOWELS_WITH_SPIRITUS)
|
42
|
+
CONS = Regexp.union(CONSONANTS)
|
43
|
+
ALL = Regexp.union([CONSONANTS, VOWELS].flatten)
|
44
|
+
|
45
|
+
def split_krasis
|
46
|
+
@worker.each_with_index do |token, i|
|
47
|
+
if resolved_krasis = contains_krasis(token)
|
48
|
+
@worker[i] = resolved_krasis
|
49
|
+
@worker.flatten!
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def krasis(token)
|
55
|
+
"#{token}#{@krasis_marker}"
|
56
|
+
end
|
57
|
+
|
58
|
+
def contains_krasis(token)
|
59
|
+
if token.match(/^(#{CONS})(#{PLAIN_VOWELS}?#{STARTING_VOWELS})(#{ALL}*)$/)
|
60
|
+
[krasis($1), $2+$3]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def greek_apostrophe(n, e)
|
65
|
+
(n == "᾽" && e =~ CONS)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/llt-tokenizer.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
|
-
spec.add_development_dependency "rspec"
|
23
|
+
spec.add_development_dependency "rspec", "2.14"
|
24
24
|
spec.add_development_dependency "simplecov", "~> 0.7"
|
25
25
|
spec.add_dependency "array_scanner"
|
26
26
|
spec.add_dependency "llt-core"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Token do
|
4
|
+
let(:token) { LLT::Token }
|
5
|
+
|
6
|
+
describe "#==" do
|
7
|
+
it "equals when two takes have the same string value" do
|
8
|
+
t1 = token.new('bene')
|
9
|
+
t2 = token.new('bene')
|
10
|
+
t1.should == t2
|
11
|
+
end
|
12
|
+
|
13
|
+
it "doesn't equal when the strings are different" do
|
14
|
+
t1 = token.new('bene')
|
15
|
+
t2 = token.new('male')
|
16
|
+
t1.should_not == t2
|
17
|
+
end
|
18
|
+
|
19
|
+
it "is case insensitive" do
|
20
|
+
t1 = token.new('bene')
|
21
|
+
t2 = token.new('Bene')
|
22
|
+
t1.should == t2
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LLT::Tokenizer::Greek do
|
4
|
+
let(:tokenizer) { LLT::Tokenizer.new }
|
5
|
+
let(:greek_txt) { "καὶ διὰ τῆς περὶ τὴν ἀρχαιολογίαν συγγραφῆς."}
|
6
|
+
let(:krasis) { "κἄπειτα." }
|
7
|
+
let(:double_krasis) { "κἄπειτα τῆς περὶ τὴν ἀρχαιολογίαν κἄπειτα." }
|
8
|
+
let(:diphtong) { "τοὔνομα." }
|
9
|
+
|
10
|
+
context "with greek tokens" do
|
11
|
+
describe "#tokenize" do
|
12
|
+
it "tokenizes a string" do
|
13
|
+
res = tokenizer.tokenize(greek_txt)
|
14
|
+
res.should == %w(καὶ διὰ τῆς περὶ τὴν ἀρχαιολογίαν συγγραφῆς .)
|
15
|
+
res.should have(8).items
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "with a string that contains an apostrophe" do
|
19
|
+
it "returns one token to which the apostrophe is attached" do
|
20
|
+
txt = "εὖ δ᾽ ἴστε."
|
21
|
+
res = tokenizer.tokenize(txt)
|
22
|
+
res.should == %w(εὖ δ᾽ ἴστε .)
|
23
|
+
res.should have(4).items
|
24
|
+
end
|
25
|
+
|
26
|
+
it "splits two tokens combined by an apostrophe" do
|
27
|
+
txt = "εὖ δ᾽ἴστε."
|
28
|
+
res = tokenizer.tokenize(txt)
|
29
|
+
res.should == %w(εὖ δ᾽ ἴστε .)
|
30
|
+
res.should have(4).items
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "handles krasis" do
|
36
|
+
it "splits a krasis into two words" do
|
37
|
+
res = tokenizer.tokenize(krasis)
|
38
|
+
res.should have(3).items
|
39
|
+
res.should == %w( κ- ἄπειτα . )
|
40
|
+
end
|
41
|
+
|
42
|
+
it "handles a dipthong krasis" do
|
43
|
+
res = tokenizer.tokenize(diphtong)
|
44
|
+
res.should have(3).items
|
45
|
+
end
|
46
|
+
|
47
|
+
it "splits two kraseis in a sentence" do
|
48
|
+
res = tokenizer.tokenize(double_krasis)
|
49
|
+
res.should have(9).items
|
50
|
+
res[2].should == "τῆς"
|
51
|
+
res[8].should == "."
|
52
|
+
end
|
53
|
+
|
54
|
+
context "with options" do
|
55
|
+
context "with disabled splitting" do
|
56
|
+
it "doesn't split krasis" do
|
57
|
+
txt = 'κἄπειτα.'
|
58
|
+
opts = { splitting: false }
|
59
|
+
tokens = tokenizer.tokenize(txt, opts).map(&:to_s)
|
60
|
+
tokens.should == %w{ κἄπειτα . }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -158,7 +158,9 @@ describe LLT::Tokenizer do
|
|
158
158
|
"ad eamque" => "-que ad eam",
|
159
159
|
"ob easque" => "-que ob eas",
|
160
160
|
"neque" => "-que ne",
|
161
|
+
"Neque" => "-que Ne",
|
161
162
|
"nec" => "-c ne",
|
163
|
+
"Nec" => "-c Ne",
|
162
164
|
"Atque" => "Atque",
|
163
165
|
"atque" => "atque",
|
164
166
|
"cuiusque" => "cuiusque",
|
@@ -208,6 +210,7 @@ describe LLT::Tokenizer do
|
|
208
210
|
"fine" => "fine",
|
209
211
|
"iuvene" => "iuvene",
|
210
212
|
"sanguine" => "sanguine",
|
213
|
+
"igne" => "igne",
|
211
214
|
|
212
215
|
# frequent patterns in third declension adjective
|
213
216
|
"commune" => "commune",
|
@@ -282,7 +285,7 @@ describe LLT::Tokenizer do
|
|
282
285
|
"Word" => %w{ ita Marcus quoque -que po' },
|
283
286
|
"Filler" => %w{ M. Sex. App. Ap. Tib. Ti. C. a. d. Kal. Ian. }, #I XI MMC }
|
284
287
|
"XmlTag" => %w{ <grc> </grc> },
|
285
|
-
"Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' & < > & < > ' " }
|
288
|
+
"Punctuation" => %w{ , . ! ? † ( ) [ ] ... -- ” " ' ᾽ · & < > & < > ' " }
|
286
289
|
}
|
287
290
|
|
288
291
|
examples.each do |klass, elements|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llt-tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- LFDM
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,16 +42,16 @@ dependencies:
|
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - '='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '2.14'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - '='
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '2.14'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: simplecov
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -158,11 +158,15 @@ files:
|
|
158
158
|
- lib/llt/token/xml_tag.rb
|
159
159
|
- lib/llt/tokenizer.rb
|
160
160
|
- lib/llt/tokenizer/api.rb
|
161
|
+
- lib/llt/tokenizer/greek.rb
|
161
162
|
- lib/llt/tokenizer/version.rb
|
163
|
+
- lib/llt/tokenizer/version_info.rb
|
162
164
|
- lib/llt/tokenizer/worker.rb
|
163
165
|
- llt-tokenizer.gemspec
|
164
166
|
- spec/lib/llt/token/punctuation_spec.rb
|
167
|
+
- spec/lib/llt/token_spec.rb
|
165
168
|
- spec/lib/llt/tokenizer/api_spec.rb
|
169
|
+
- spec/lib/llt/tokenizer/greek_spec.rb
|
166
170
|
- spec/lib/llt/tokenizer_spec.rb
|
167
171
|
- spec/spec_helper.rb
|
168
172
|
- spec/support/matchers/tokenizer.rb
|
@@ -192,7 +196,9 @@ specification_version: 4
|
|
192
196
|
summary: Breaks latin sentences into tokens
|
193
197
|
test_files:
|
194
198
|
- spec/lib/llt/token/punctuation_spec.rb
|
199
|
+
- spec/lib/llt/token_spec.rb
|
195
200
|
- spec/lib/llt/tokenizer/api_spec.rb
|
201
|
+
- spec/lib/llt/tokenizer/greek_spec.rb
|
196
202
|
- spec/lib/llt/tokenizer_spec.rb
|
197
203
|
- spec/spec_helper.rb
|
198
204
|
- spec/support/matchers/tokenizer.rb
|