pragmatic_tokenizer 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +144 -2
- data/lib/pragmatic_tokenizer/languages/common.rb +3 -2
- data/lib/pragmatic_tokenizer/languages/spanish.rb +4 -1
- data/lib/pragmatic_tokenizer/processor.rb +8 -7
- data/lib/pragmatic_tokenizer/tokenizer.rb +11 -5
- data/lib/pragmatic_tokenizer/version.rb +1 -1
- data/pragmatic_tokenizer.gemspec +1 -0
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a52658ccd583baac9e69b37649e5403ad6f3549b
|
4
|
+
data.tar.gz: 95335fab907589faa066f7a76bd8a6df4d9f2f70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7b42942e2676cc22a807938244fc38b7e4d4097bcdb3a6f0c08fe94fccc923cce50e8328e91efdfdd93f758771dda08b3ea432bb67099536630d4a9d9d9f4e9
|
7
|
+
data.tar.gz: d78a40443dc9948866d1f271e47af355a8838e38dbd932d282d7b0c905ad06dfb392f65ba7cc2df6a891098ff52b9a795317b1a458363214930c08650cc6ef96
|
data/README.md
CHANGED
@@ -61,7 +61,7 @@ Or install it yourself as:
|
|
61
61
|
##### `clean`
|
62
62
|
**default** = `'false'`
|
63
63
|
- `true`
|
64
|
-
Removes tokens consisting of only hypens or
|
64
|
+
Removes tokens consisting of only hypens, underscores, or periods as well as some special characters (®, ©, ™).
|
65
65
|
- `false`
|
66
66
|
Leaves tokens as is.
|
67
67
|
|
@@ -77,7 +77,7 @@ Or install it yourself as:
|
|
77
77
|
<hr>
|
78
78
|
|
79
79
|
##### `minimum_length`
|
80
|
-
**default** = `0`
|
80
|
+
**default** = `0`
|
81
81
|
The minimum number of characters a token should be.
|
82
82
|
|
83
83
|
**Example Usage**
|
@@ -122,6 +122,148 @@ PragmaticTokenizer::Tokenizer.new(text, minimum_length: 6).tokenize
|
|
122
122
|
# => ["minimum", "length"]
|
123
123
|
```
|
124
124
|
|
125
|
+
## Language Support
|
126
|
+
|
127
|
+
The following lists the current level of support for different languages. Pull requests or help for any languages that are not fully supported would be greatly appreciated. *N.B. - contractions might not be applicable for all languages below - in that case the CONTRACTIONS hash should stay empty.*
|
128
|
+
|
129
|
+
##### English
|
130
|
+
Specs: Yes
|
131
|
+
Abbreviations: Yes
|
132
|
+
Stop Words: Yes
|
133
|
+
Contractions: Yes
|
134
|
+
|
135
|
+
##### Arabic
|
136
|
+
Specs: No
|
137
|
+
Abbreviations: Yes
|
138
|
+
Stop Words: Yes
|
139
|
+
Contractions: No
|
140
|
+
|
141
|
+
##### Bulgarian
|
142
|
+
Specs: No
|
143
|
+
Abbreviations: Yes
|
144
|
+
Stop Words: Yes
|
145
|
+
Contractions: No
|
146
|
+
|
147
|
+
##### Catalan
|
148
|
+
Specs: No
|
149
|
+
Abbreviations: No
|
150
|
+
Stop Words: Yes
|
151
|
+
Contractions: No
|
152
|
+
|
153
|
+
##### Czech
|
154
|
+
Specs: No
|
155
|
+
Abbreviations: No
|
156
|
+
Stop Words: Yes
|
157
|
+
Contractions: No
|
158
|
+
|
159
|
+
##### Danish
|
160
|
+
Specs: No
|
161
|
+
Abbreviations: No
|
162
|
+
Stop Words: Yes
|
163
|
+
Contractions: No
|
164
|
+
|
165
|
+
##### German
|
166
|
+
Specs: More needed
|
167
|
+
Abbreviations: Yes
|
168
|
+
Stop Words: Yes
|
169
|
+
Contractions: No
|
170
|
+
|
171
|
+
##### Finnish
|
172
|
+
Specs: No
|
173
|
+
Abbreviations: No
|
174
|
+
Stop Words: Yes
|
175
|
+
Contractions: No
|
176
|
+
|
177
|
+
##### French
|
178
|
+
Specs: More needed
|
179
|
+
Abbreviations: Yes
|
180
|
+
Stop Words: Yes
|
181
|
+
Contractions: No
|
182
|
+
|
183
|
+
##### Greek
|
184
|
+
Specs: No
|
185
|
+
Abbreviations: No
|
186
|
+
Stop Words: Yes
|
187
|
+
Contractions: No
|
188
|
+
|
189
|
+
##### Indonesian
|
190
|
+
Specs: No
|
191
|
+
Abbreviations: No
|
192
|
+
Stop Words: Yes
|
193
|
+
Contractions: No
|
194
|
+
|
195
|
+
##### Italian
|
196
|
+
Specs: No
|
197
|
+
Abbreviations: Yes
|
198
|
+
Stop Words: Yes
|
199
|
+
Contractions: No
|
200
|
+
|
201
|
+
##### Latvian
|
202
|
+
Specs: No
|
203
|
+
Abbreviations: No
|
204
|
+
Stop Words: Yes
|
205
|
+
Contractions: No
|
206
|
+
|
207
|
+
##### Norwegian
|
208
|
+
Specs: No
|
209
|
+
Abbreviations: No
|
210
|
+
Stop Words: Yes
|
211
|
+
Contractions: No
|
212
|
+
|
213
|
+
##### Persian
|
214
|
+
Specs: No
|
215
|
+
Abbreviations: No
|
216
|
+
Stop Words: Yes
|
217
|
+
Contractions: No
|
218
|
+
|
219
|
+
##### Polish
|
220
|
+
Specs: No
|
221
|
+
Abbreviations: Yes
|
222
|
+
Stop Words: Yes
|
223
|
+
Contractions: No
|
224
|
+
|
225
|
+
##### Portuguese
|
226
|
+
Specs: No
|
227
|
+
Abbreviations: No
|
228
|
+
Stop Words: Yes
|
229
|
+
Contractions: No
|
230
|
+
|
231
|
+
##### Romanian
|
232
|
+
Specs: No
|
233
|
+
Abbreviations: No
|
234
|
+
Stop Words: Yes
|
235
|
+
Contractions: No
|
236
|
+
|
237
|
+
##### Russian
|
238
|
+
Specs: No
|
239
|
+
Abbreviations: Yes
|
240
|
+
Stop Words: Yes
|
241
|
+
Contractions: No
|
242
|
+
|
243
|
+
##### Slovak
|
244
|
+
Specs: No
|
245
|
+
Abbreviations: No
|
246
|
+
Stop Words: Yes
|
247
|
+
Contractions: No
|
248
|
+
|
249
|
+
##### Spanish
|
250
|
+
Specs: No
|
251
|
+
Abbreviations: Yes
|
252
|
+
Stop Words: Yes
|
253
|
+
Contractions: Yes
|
254
|
+
|
255
|
+
##### Swedish
|
256
|
+
Specs: No
|
257
|
+
Abbreviations: No
|
258
|
+
Stop Words: Yes
|
259
|
+
Contractions: No
|
260
|
+
|
261
|
+
##### Turkish
|
262
|
+
Specs: No
|
263
|
+
Abbreviations: No
|
264
|
+
Stop Words: Yes
|
265
|
+
Contractions: No
|
266
|
+
|
125
267
|
## Development
|
126
268
|
|
127
269
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -2,7 +2,7 @@ module PragmaticTokenizer
|
|
2
2
|
module Languages
|
3
3
|
module Common
|
4
4
|
PUNCTUATION = ['。', '.', '.', '!', '!', '?', '?', '、', '¡', '¿', '„', '“', '[', ']', '"', '#', '$', '%', '&', '(', ')', '*', '+', ',', ':', ';', '<', '=', '>', '@', '^', '_', '`', "'", '{', '|', '}', '~', '-', '«', '»']
|
5
|
-
PUNCTUATION_MAP =
|
5
|
+
PUNCTUATION_MAP = { "。" => "♳", "." => "♴", "." => "♵", "!" => "♶", "!" => "♷", "?" => "♸", "?" => "♹", "、" => "♺", "¡" => "⚀", "¿" => "⚁", "„" => "⚂", "“" => "⚃", "[" => "⚄", "]" => "⚅", "\"" => "☇", "#" => "☈", "$" => "☉", "%" => "☊", "&" => "☋", "(" => "☌", ")" => "☍", "*" => "☠", "+" => "☢", "," => "☣", ":" => "☤", ";" => "☥", "<" => "☦", "=" => "☧", ">" => "☀", "@" => "☁", "^" => "☂", "_" => "☃", "`" => "☄", "'" => "☮", "{" => "♔", "|" => "♕", "}" => "♖", "~" => "♗", "-" => "♘", "«" => "♙", "»" => "♚" }
|
6
6
|
SEMI_PUNCTUATION = ['。', '.', '.']
|
7
7
|
ROMAN_NUMERALS = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi', 'xvii', 'xviii', 'xix', 'xx', 'xxi', 'xxii', 'xxiii', 'xxiv', 'xxv', 'xxvi', 'xxvii', 'xxviii', 'xxix', 'xxx', 'xxxi', 'xxxii', 'xxxiii', 'xxxiv', 'xxxv', 'xxxvi', 'xxxvii', 'xxxviii', 'xxxix', 'xl', 'xli', 'xlii', 'xliii', 'xliv', 'xlv', 'xlvi', 'xlvii', 'xlviii', 'xlix', 'l', 'li', 'lii', 'liii', 'liv', 'lv', 'lvi', 'lvii', 'lviii', 'lix', 'lx', 'lxi', 'lxii', 'lxiii', 'lxiv', 'lxv', 'lxvi', 'lxvii', 'lxviii', 'lxix', 'lxx', 'lxxi', 'lxxii', 'lxxiii', 'lxxiv', 'lxxv', 'lxxvi', 'lxxvii', 'lxxviii', 'lxxix', 'lxxx', 'lxxxi', 'lxxxii', 'lxxxiii', 'lxxxiv', 'lxxxv', 'lxxxvi', 'lxxxvii', 'lxxxviii', 'lxxxix', 'xc', 'xci', 'xcii', 'xciii', 'xciv', 'xcv', 'xcvi', 'xcvii', 'xcviii', 'xcix']
|
8
8
|
SPECIAL_CHARACTERS = ['®', '©', '™']
|
@@ -11,4 +11,5 @@ module PragmaticTokenizer
|
|
11
11
|
CONTRACTIONS = {}
|
12
12
|
end
|
13
13
|
end
|
14
|
-
end
|
14
|
+
end
|
15
|
+
|
@@ -4,7 +4,10 @@ module PragmaticTokenizer
|
|
4
4
|
include Languages::Common
|
5
5
|
ABBREVIATIONS = ["a", "a.c", "a/c", "abr", "adj", "admón", "aero", "afmo", "ago", "almte", "ambi", "an", "anfi", "ante", "anti", "ap", "apdo", "archi", "arci", "arq", "art", "atte", "auto", "av", "avda", "bco", "bi", "bibl", "bien", "bis", "bs. as", "c", "c.f", "c.g", "c/c", "c/u", "cap", "cc.aa", "cdad", "cm", "co", "com", "con", "contra", "cra", "crio", "cta", "cuadri", "cuasi", "cuatri", "cv", "d.e.p", "da", "dcha", "dcho", "de", "deci", "dep", "des", "di", "dic", "dicc", "dir", "dis", "dn", "doc", "dom", "dpto", "dr", "dra", "dto", "ecto", "ee", "ej", "en", "endo", "entlo", "entre", "epi", "equi", "esq", "etc", "ex", "excmo", "ext", "extra", "f.c", "fca", "fdo", "febr", "ff. aa", "ff.cc", "fig", "fil", "fra", "g.p", "g/p", "geo", "gob", "gr", "gral", "grs", "hemi", "hetero", "hiper", "hipo", "hnos", "homo", "hs", "i", "igl", "iltre", "im", "imp", "impr", "impto", "in", "incl", "infra", "ing", "inst", "inter", "intra", "iso", "izdo", "izq", "izqdo", "j.c", "jue", "jul", "jun", "kg", "km", "lcdo", "ldo", "let", "lic", "ltd", "lun", "macro", "mar", "may", "mega", "mg", "micro", "min", "mini", "mié", "mm", "mono", "mt", "multi", "máx", "mín", "n. del t", "n.b", "neo", "no", "nos", "nov", "ntra. sra", "núm", "oct", "omni", "p", "p.a", "p.d", "p.ej", "p.v.p", "para", "pen", "ph", "ph.d", "pluri", "poli", "pos", "post", "pp", "ppal", "pre", "prev", "pro", "prof", "prov", "pseudo", "ptas", "pts", "pza", "pág", "págs", "párr", "párrf", "q.e.g.e", "q.e.p.d", "q.e.s.m", "re", "reg", "rep", "retro", "rr. hh", "rte", "s", "s. a", "s.a.r", "s.e", "s.l", "s.r.c", "s.r.l", "s.s.s", "s/n", "sdad", "seg", "semi", "sept", "seudo", "sig", "sobre", "sr", "sra", "sres", "srta", "sta", "sto", "sub", "super", "supra", "sáb", "t.v.e", "tamb", "tel", "tfno", "trans", "tras", "tri", "ud", "uds", "ulter", "ultra", "un", "uni", "univ", "uu", "v.b", "v.e", "vd", "vds", "vice", "vid", "vie", "vol", "vs", "vto", "yuxta"]
|
6
6
|
STOP_WORDS = ["algún", "alguna", "algunas", "alguno", "algunos", "ambos", "ampleamos", "ante", "antes", "aquel", "aquellas", "aquellos", "aqui", "arriba", "atras", "bajo", "bastante", "bien", "cada", "cierta", "ciertas", "cierto", "ciertos", "como", "con", "conseguimos", "conseguir", "consigo", "consigue", "consiguen", "consigues", "cual", "cuando", "dentro", "desde", "donde", "dos", "el", "ellas", "ellos", "empleais", "emplean", "emplear", "empleas", "empleo", "en", "encima", "entonces", "entre", "era", "eramos", "eran", "eras", "eres", "es", "esta", "estaba", "estado", "estais", "estamos", "estan", "estoy", "fin", "fue", "fueron", "fui", "fuimos", "gueno", "ha", "hace", "haceis", "hacemos", "hacen", "hacer", "haces", "hago", "incluso", "intenta", "intentais", "intentamos", "intentan", "intentar", "intentas", "intento", "ir", "la", "largo", "las", "lo", "los", "mientras", "mio", "modo", "muchos", "muy", "nos", "nosotros", "otro", "para", "pero", "podeis", "podemos", "poder", "podria", "podriais", "podriamos", "podrian", "podrias", "por", "por qué", "porque", "primero", "puede", "pueden", "puedo", "quien", "sabe", "sabeis", "sabemos", "saben", "saber", "sabes", "ser", "si", "siendo", "sin", "sobre", "sois", "solamente", "solo", "somos", "soy", "su", "sus", "también", "teneis", "tenemos", "tener", "tengo", "tiempo", "tiene", "tienen", "todo", "trabaja", "trabajais", "trabajamos", "trabajan", "trabajar", "trabajas", "trabajo", "tras", "tuyo", "ultimo", "un", "una", "unas", "uno", "unos", "usa", "usais", "usamos", "usan", "usar", "usas", "uso", "va", "vais", "valor", "vamos", "van", "vaya", "verdad", "verdadera", "verdadero", "vosotras", "vosotros", "voy", "yo", "él", "ésta", "éstas", "éste", "éstos", "última", "últimas", "último", "últimos", "a", "añadió", "aún", "actualmente", "adelante", "además", "afirmó", "agregó", "ahí", "ahora", "al", "algún", "algo", "alrededor", "anterior", "apenas", "aproximadamente", "aquí", "así", "aseguró", "aunque", "ayer", "buen", "buena", "buenas", "bueno", "buenos", "cómo", "casi", "cerca", "cinco", "comentó", "conocer", "consideró", "considera", "contra", "cosas", "creo", "cuales", "cualquier", "cuanto", "cuatro", "cuenta", "da", "dado", "dan", "dar", "de", "debe", "deben", "debido", "decir", "dejó", "del", "demás", "después", "dice", "dicen", "dicho", "dieron", "diferente", "diferentes", "dijeron", "dijo", "dio", "durante", "e", "ejemplo", "ella", "ello", "embargo", "encuentra", "esa", "esas", "ese", "eso", "esos", "está", "están", "estaban", "estar", "estará", "estas", "este", "esto", "estos", "estuvo", "ex", "existe", "existen", "explicó", "expresó", "fuera", "gran", "grandes", "había", "habían", "haber", "habrá", "hacerlo", "hacia", "haciendo", "han", "hasta", "hay", "haya", "he", "hecho", "hemos", "hicieron", "hizo", "hoy", "hubo", "igual", "indicó", "informó", "junto", "lado", "le", "les", "llegó", "lleva", "llevar", "luego", "lugar", "más", "manera", "manifestó", "mayor", "me", "mediante", "mejor", "mencionó", "menos", "mi", "misma", "mismas", "mismo", "mismos", "momento", "mucha", "muchas", "mucho", "nada", "nadie", "ni", "ningún", "ninguna", "ningunas", "ninguno", "ningunos", "no", "nosotras", "nuestra", "nuestras", "nuestro", "nuestros", "nueva", "nuevas", "nuevo", "nuevos", "nunca", "o", "ocho", "otra", "otras", "otros", "parece", "parte", "partir", "pasada", "pasado", "pesar", "poca", "pocas", "poco", "pocos", "podrá", "podrán", "podría", "podrían", "poner", "posible", "próximo", "próximos", "primer", "primera", "primeros", "principalmente", "propia", "propias", "propio", "propios", "pudo", "pueda", "pues", "qué", "que", "quedó", "queremos", "quién", "quienes", "quiere", "realizó", "realizado", "realizar", "respecto", "sí", "sólo", "se", "señaló", "sea", "sean", "según", "segunda", "segundo", "seis", "será", "serán", "sería", "sido", "siempre", "siete", "sigue", "siguiente", "sino", "sola", "solas", "solos", "son", "tal", "tampoco", "tan", "tanto", "tenía", "tendrá", "tendrán", "tenga", "tenido", "tercera", "toda", "todas", "todavía", "todos", "total", "trata", "través", "tres", "tuvo", "usted", "varias", "varios", "veces", "ver", "vez", "y", "ya"]
|
7
|
-
CONTRACTIONS = {
|
7
|
+
CONTRACTIONS = {
|
8
|
+
"al" => "a el",
|
9
|
+
"del" => "de el"
|
10
|
+
}
|
8
11
|
end
|
9
12
|
end
|
10
13
|
end
|
@@ -124,16 +124,17 @@ module PragmaticTokenizer
|
|
124
124
|
cleaned_tokens
|
125
125
|
end
|
126
126
|
|
127
|
-
def convert_punct_to_sym(
|
128
|
-
|
129
|
-
PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP[index]
|
127
|
+
def convert_punct_to_sym(punctuation)
|
128
|
+
PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP[punctuation]
|
130
129
|
end
|
131
130
|
|
132
|
-
def convert_sym_to_punct(
|
133
|
-
|
134
|
-
|
131
|
+
def convert_sym_to_punct(token)
|
132
|
+
symbol = /[♳ ♴ ♵ ♶ ♷ ♸ ♹ ♺ ⚀ ⚁ ⚂ ⚃ ⚄ ⚅ ☇ ☈ ☉ ☊ ☋ ☌ ☍ ☠ ☢ ☣ ☤ ☥ ☦ ☧ ☀ ☁ ☂ ☃ ☄ ☮ ♔ ♕ ♖ ♗ ♘ ♙ ♚]/.match(token)
|
133
|
+
if symbol.nil?
|
134
|
+
return token
|
135
|
+
else
|
136
|
+
return token.gsub!(symbol[0], PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP.key(symbol[0]))
|
135
137
|
end
|
136
|
-
p
|
137
138
|
end
|
138
139
|
end
|
139
140
|
end
|
@@ -4,8 +4,8 @@ require 'pragmatic_tokenizer/languages'
|
|
4
4
|
module PragmaticTokenizer
|
5
5
|
class Tokenizer
|
6
6
|
|
7
|
-
attr_reader :text, :language, :punctuation, :remove_stop_words, :expand_contractions, :language_module, :clean, :remove_numbers, :minimum_length
|
8
|
-
def initialize(text, language: 'en', punctuation: 'all', remove_stop_words: false, expand_contractions: false, clean: false, remove_numbers: false, minimum_length: 0)
|
7
|
+
attr_reader :text, :language, :punctuation, :remove_stop_words, :expand_contractions, :language_module, :clean, :remove_numbers, :minimum_length, :remove_roman_numerals
|
8
|
+
def initialize(text, language: 'en', punctuation: 'all', remove_stop_words: false, expand_contractions: false, clean: false, remove_numbers: false, minimum_length: 0, remove_roman_numerals: false)
|
9
9
|
unless punctuation.eql?('all') ||
|
10
10
|
punctuation.eql?('semi') ||
|
11
11
|
punctuation.eql?('none') ||
|
@@ -33,11 +33,12 @@ module PragmaticTokenizer
|
|
33
33
|
@clean = clean
|
34
34
|
@remove_numbers = remove_numbers
|
35
35
|
@minimum_length = minimum_length
|
36
|
+
@remove_roman_numerals = remove_roman_numerals
|
36
37
|
end
|
37
38
|
|
38
39
|
def tokenize
|
39
40
|
return [] unless text
|
40
|
-
remove_short_tokens(delete_numbers(cleaner(delete_stop_words(find_contractions(remove_punctuation(processor.new(language: language_module).process(text: text)))))))
|
41
|
+
remove_short_tokens(delete_roman_numerals(delete_numbers(cleaner(delete_stop_words(find_contractions(remove_punctuation(processor.new(language: language_module).process(text: text))))))))
|
41
42
|
end
|
42
43
|
|
43
44
|
private
|
@@ -54,12 +55,17 @@ module PragmaticTokenizer
|
|
54
55
|
|
55
56
|
def delete_numbers(tokens)
|
56
57
|
return tokens unless remove_numbers
|
57
|
-
tokens.delete_if { |t| t =~ /\D*\d+\d*/
|
58
|
+
tokens.delete_if { |t| t =~ /\D*\d+\d*/ }
|
59
|
+
end
|
60
|
+
|
61
|
+
def delete_roman_numerals(tokens)
|
62
|
+
return tokens unless remove_roman_numerals
|
63
|
+
tokens.delete_if { |t| PragmaticTokenizer::Languages::Common::ROMAN_NUMERALS.include?(t) || PragmaticTokenizer::Languages::Common::ROMAN_NUMERALS.include?("#{t}.") } if remove_roman_numerals
|
58
64
|
end
|
59
65
|
|
60
66
|
def cleaner(tokens)
|
61
67
|
return tokens unless clean
|
62
|
-
tokens.delete_if { |t| t =~ /\A_+\z/ || t =~ /\A-+\z/ || PragmaticTokenizer::Languages::Common::SPECIAL_CHARACTERS.include?(t) }
|
68
|
+
tokens.delete_if { |t| t =~ /\A_+\z/ || t =~ /\A-+\z/ || PragmaticTokenizer::Languages::Common::SPECIAL_CHARACTERS.include?(t) || t =~ /\A\.{2,}\z/ }
|
63
69
|
end
|
64
70
|
|
65
71
|
def remove_punctuation(tokens)
|
data/pragmatic_tokenizer.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pragmatic_tokenizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: stackprof
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description: A multilingual tokenizer to split a string into tokens.
|
56
70
|
email:
|
57
71
|
- diasks2@gmail.com
|