categorize 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +8 -8
  2. data/lib/categorize/constants.rb +541 -539
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OGNkMmQ5MzEwZGFlOWUxNWM0MzU0MTI0MTI2NzE5NTBlNGZjYzM3Ng==
4
+ NDZiODNkODIyOTFiNGQyZDBiYWM3NmM3MDg0YjBkOGM5ZjUyOTQ0OA==
5
5
  data.tar.gz: !binary |-
6
- YmNmMDE5NWMxYmZhNWI0ZDI2NDA3MjdkOTNjYmI2MGUzMWY0ZTVjZQ==
6
+ OTg5YWJjN2QwYTc3OWMxYjJiZTI5ZWM5MWEwMzllZGRiNGRmZDVmZQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZTdiM2IyMzRiOTg1Y2Y5MDc2ZWQwY2EyYjA3YTZjODEzYmM5MTU5NWVlNzBl
10
- ZDdmYzhiNzdiOTYxOGY3YzgzNWFmZDhmMmIxODczZmY1NGM2MmM2NzI5NzVi
11
- NWYzMGMwOGI2MWI5Mjk5NmY4MmMwM2YyZWFjNzU1MGMxMjcwYWI=
9
+ NWRlMTAzMTlhMTgwN2VlM2M3NWE3MTI3NTQ5ZDRiNTFjNDJhZWQ0ZjY4ZTVm
10
+ NWY2MTA0YTE2NWI2N2Q4ZTgyMDk4NDk0ZTg1MDFkNWI0YjFiYTMyM2U2ZjIz
11
+ ZTgzZTdkMGMyNjY4YmU2NTVlYTJlZjhhYWE4Nzg4MDk3ZjYwYjk=
12
12
  data.tar.gz: !binary |-
13
- MjQ1NWQ4ZGVlMzNjZDZkNDVmODViOTY1ZTM4ZGZlYjhjMGVmNDQ4ZGRiNmRm
14
- MGY1OTNhN2NkMzQ3Y2U4OGIyMDc3MTU2ZTc5MTE0ZGE4NTc4ODg2MGE5MjRm
15
- N2M3MWQ4YzJhYzFjNTNjZTNjNDA3ZjVlM2RmZDVkMTcxNTFkNDM=
13
+ YzlkZDkzYTYzYTE1ZDBlNzNmZTY3ZDA0ZDA3ODc1YmYzNzU5NTgzODk5MTU5
14
+ NzI0ZjQ1NGU5NGExOWIxZDhjNzQyM2VlMzAyMmE3MjUzNmVjNGFhNThjZjMw
15
+ NWI0MTNhNTI3MTI5ZTA3MGEzZmM3NmMxNjA1NTFhOTZhNDhmNGI=
@@ -1,543 +1,545 @@
1
1
  # encoding: utf-8
2
2
 
3
- module Constants
4
- module Words
5
- # only include words > 2 chars
6
- ENGLISH = %w(
7
- 000
8
- page
9
- home
10
- free
11
- also
12
- about
13
- above
14
- according
15
- accordingly
16
- across
17
- after
18
- afterward
19
- afterwards
20
- again
21
- against
22
- all
23
- almost
24
- alone
25
- along
26
- already
27
- also
28
- although
29
- always
30
- among
31
- amongst
32
- amp
33
- and
34
- another
35
- any
36
- anyhow
37
- anyone
38
- anything
39
- anywhere
40
- apr
41
- are
42
- aug
43
- around
44
- became
45
- because
46
- become
47
- becomes
48
- becoming
49
- been
50
- before
51
- beforehand
52
- began
53
- behind
54
- being
55
- below
56
- beside
57
- besides
58
- between
59
- beyond
60
- both
61
- but
62
- can
63
- cannot
64
- certain
65
- com
66
- could
67
- days ago
68
- dec
69
- did
70
- does
71
- down
72
- during
73
- each
74
- edu
75
- either
76
- else
77
- elsewhere
78
- enough
79
- especially
80
- est
81
- etc
82
- even
83
- ever
84
- every
85
- everyone
86
- everything
87
- everywhere
88
- example
89
- except
90
- feb
91
- few
92
- fewer
93
- finally
94
- find
95
- following
96
- for
97
- former
98
- formerly
99
- from
100
- further
101
- furthermore
102
- generally
103
- get
104
- given
105
- had
106
- has
107
- have
108
- having
109
- hence
110
- henceforth
111
- her
112
- here
113
- hereafter
114
- hereby
115
- herein
116
- hereupon
117
- hers
118
- herself
119
- him
120
- himself
121
- his
122
- hours ago
123
- how
124
- however
125
- http
126
- inc
127
- include
128
- included
129
- includes
130
- including
131
- indeed
132
- instead
133
- into
134
- its
135
- itself
136
- jan
137
- jul
138
- know
139
- known
140
- later
141
- latterly
142
- ldquo
143
- llc
144
- lquo
145
- least
146
- less
147
- many
148
- mar
149
- may
150
- maybe
151
- mdash
152
- meanwhile
153
- might
154
- miss
155
- more
156
- moreover
157
- most
158
- mostly
159
- much
160
- must
161
- myself
162
- nbsp
163
- ndash
164
- near
165
- nearly
166
- neither
167
- never
168
- nevertheless
169
- next
170
- nobody
171
- non
172
- none
173
- nonetheless
174
- nor
175
- not
176
- nothing
177
- nov
178
- now
179
- nowhere
180
- oct
181
- off
182
- often
183
- once
184
- one
185
- only
186
- onto
187
- org
188
- other
189
- others
190
- otherwise
191
- our
192
- ours
193
- ourselves
194
- out
195
- over
196
- overall
197
- own
198
- part
199
- particularly
200
- parts
201
- per
202
- perhaps
203
- probably
204
- quot
205
- rather
206
- rdquo
207
- rquo
208
- said
209
- same
210
- seem
211
- seemed
212
- seeming
213
- seemingly
214
- seems
215
- sep
216
- set
217
- several
218
- she
219
- should
220
- similar
221
- since
222
- site
223
- some
224
- somehow
225
- someone
226
- something
227
- sometime
228
- sometimes
229
- somewhat
230
- somewhere
231
- still
232
- such
233
- than
234
- that
235
- the
236
- their
237
- them
238
- themselves
239
- then
240
- thence
241
- thenceforth
242
- there
243
- thereafter
244
- thereby
245
- therefore
246
- therein
247
- thereupon
248
- these
249
- they
250
- this
251
- those
252
- though
253
- through
254
- throughout
255
- thru
256
- thus
257
- together
258
- too
259
- took
260
- toward
261
- towards
262
- two
263
- under
264
- unless
265
- unlike
266
- unlikely
267
- until
268
- upon
269
- url
270
- use
271
- used
272
- using
273
- usually
274
- various
275
- very
276
- via
277
- want
278
- was
279
- way
280
- well
281
- were
282
- what
283
- whatever
284
- when
285
- whence
286
- whenever
287
- where
288
- whereafter
289
- whereas
290
- whereby
291
- wherein
292
- whereupon
293
- wherever
294
- whether
295
- which
296
- while
297
- whither
298
- who
299
- whoever
300
- whole
301
- whom
302
- whomever
303
- whose
304
- why
305
- will
306
- with
307
- within
308
- without
309
- would
310
- www
311
- yes
312
- yet
313
- you
314
- your
315
- yours
316
- yourself
317
- yourselves
318
- )
319
- SPANISH = %w(
320
- acuerdo
321
- adelante
322
- ademas
323
- adrede
324
- ahi
325
- ahora
326
- alli
327
- alrededor
328
- antano
329
- ante
330
- antes
331
- apenas
332
- aproximadamente
333
- aquel
334
- aquella
335
- aquellas
336
- aquello
337
- aquellos
338
- aqui
339
- arribaabajo
340
- asi
341
- aun
342
- aunque
343
- bajo
344
- bastante
345
- bien
346
- breve
347
- casi
348
- cerca
349
- claro
350
- como
351
- con
352
- conmigo
353
- contigo
354
- contra
355
- cual
356
- cuales
357
- cuando
358
- cuanta
359
- cuantas
360
- cuanto
361
- cuantos
362
- debajo
363
- del
364
- delante
365
- demasiado
366
- dentro
367
- deprisa
368
- desde
369
- despacio
370
- despues
371
- detras
372
- dia
373
- dias
374
- donde
375
- dos
376
- durante
377
- ella
378
- ellas
379
- ellos
380
- encima
381
- enfrente
382
- enseguida
383
- entre
384
- esa
385
- esas
386
- ese
387
- eso
388
- esos
389
- esta
390
- estado
391
- estados
392
- estan
393
- estar
394
- estas
395
- este
396
- esto
397
- estos
398
- excepto
399
- final
400
- fue
401
- fuera
402
- fueron
403
- general
404
- gran
405
- habia
406
- habla
407
- hablan
408
- hace
409
- hacia
410
- han
411
- hasta
412
- hay
413
- horas
414
- hoy
415
- incluso
416
- informo
417
- junto
418
- lado
419
- las
420
- lejos
421
- los
422
- luego
423
- mal
424
- mas
425
- mayor
426
- medio
427
- mejor
428
- menos
429
- menudo
430
- mia
431
- mias
432
- mientras
433
- mio
434
- mios
435
- mis
436
- mismo
437
- mucho
438
- muy
439
- nada
440
- nadie
441
- ninguna
442
- nos
443
- nosotras
444
- nosotros
445
- nuestra
446
- nuestras
447
- nuestro
448
- nuestros
449
- nueva
450
- nuevo
451
- nunca
452
- otra
453
- otros
454
- pais
455
- para
456
- parte
457
- pasado
458
- peor
459
- pero
460
- poco
461
- por
462
- porque
463
- pronto
464
- proximo
465
- puede
466
- qeu
467
- que
468
- quien
469
- quienes
470
- quiza
471
- quizas
472
- raras
473
- repente
474
- salvo
475
- segun
476
- ser
477
- sera
478
- sido
479
- siempre
480
- sin
481
- sobre
482
- solamente
483
- solo
484
- son
485
- soyos
486
- supuesto
487
- sus
488
- suya
489
- suyas
490
- suyo
491
- tal
492
- tambien
493
- tampoco
494
- tarde
495
- temprano
496
- tiene
497
- todavia
498
- todo
499
- todos
500
- tras
501
- tus
502
- tuya
503
- tuyas
504
- tuyo
505
- tuyos
506
- una
507
- unas
508
- uno
509
- unos
510
- usted
511
- ustedes
512
- veces
513
- vez
514
- vosotras
515
- vosotros
516
- vuestra
517
- vuestras
518
- vuestro
519
- vuestros
520
- tudo
521
- dise
522
- dicas
523
- muito
524
- )
525
- FRENCH = %w(
526
- des
527
- les
528
- mais
529
- pour
530
- )
531
- COMMON = ENGLISH | SPANISH | FRENCH
532
- ASIAN_SPACE_CHARS = [
533
- '\302\267',
534
- '\343\200\201',
535
- '\343\200\202',
536
- '\343\203\273',
537
- '\357\274\201'
538
- ].join('|')
539
- SPLIT_REGEX_STR = '[^[:word:]]|[[:punct:]]|' +
540
- Constants::Words::ASIAN_SPACE_CHARS
541
- SPLIT_REGEX = Regexp.new SPLIT_REGEX_STR.force_encoding('utf-8')
3
+ module Categorize
4
+ module Constants
5
+ module Words
6
+ # only include words > 2 chars
7
+ ENGLISH = %w(
8
+ 000
9
+ page
10
+ home
11
+ free
12
+ also
13
+ about
14
+ above
15
+ according
16
+ accordingly
17
+ across
18
+ after
19
+ afterward
20
+ afterwards
21
+ again
22
+ against
23
+ all
24
+ almost
25
+ alone
26
+ along
27
+ already
28
+ also
29
+ although
30
+ always
31
+ among
32
+ amongst
33
+ amp
34
+ and
35
+ another
36
+ any
37
+ anyhow
38
+ anyone
39
+ anything
40
+ anywhere
41
+ apr
42
+ are
43
+ aug
44
+ around
45
+ became
46
+ because
47
+ become
48
+ becomes
49
+ becoming
50
+ been
51
+ before
52
+ beforehand
53
+ began
54
+ behind
55
+ being
56
+ below
57
+ beside
58
+ besides
59
+ between
60
+ beyond
61
+ both
62
+ but
63
+ can
64
+ cannot
65
+ certain
66
+ com
67
+ could
68
+ days ago
69
+ dec
70
+ did
71
+ does
72
+ down
73
+ during
74
+ each
75
+ edu
76
+ either
77
+ else
78
+ elsewhere
79
+ enough
80
+ especially
81
+ est
82
+ etc
83
+ even
84
+ ever
85
+ every
86
+ everyone
87
+ everything
88
+ everywhere
89
+ example
90
+ except
91
+ feb
92
+ few
93
+ fewer
94
+ finally
95
+ find
96
+ following
97
+ for
98
+ former
99
+ formerly
100
+ from
101
+ further
102
+ furthermore
103
+ generally
104
+ get
105
+ given
106
+ had
107
+ has
108
+ have
109
+ having
110
+ hence
111
+ henceforth
112
+ her
113
+ here
114
+ hereafter
115
+ hereby
116
+ herein
117
+ hereupon
118
+ hers
119
+ herself
120
+ him
121
+ himself
122
+ his
123
+ hours ago
124
+ how
125
+ however
126
+ http
127
+ inc
128
+ include
129
+ included
130
+ includes
131
+ including
132
+ indeed
133
+ instead
134
+ into
135
+ its
136
+ itself
137
+ jan
138
+ jul
139
+ know
140
+ known
141
+ later
142
+ latterly
143
+ ldquo
144
+ llc
145
+ lquo
146
+ least
147
+ less
148
+ many
149
+ mar
150
+ may
151
+ maybe
152
+ mdash
153
+ meanwhile
154
+ might
155
+ miss
156
+ more
157
+ moreover
158
+ most
159
+ mostly
160
+ much
161
+ must
162
+ myself
163
+ nbsp
164
+ ndash
165
+ near
166
+ nearly
167
+ neither
168
+ never
169
+ nevertheless
170
+ next
171
+ nobody
172
+ non
173
+ none
174
+ nonetheless
175
+ nor
176
+ not
177
+ nothing
178
+ nov
179
+ now
180
+ nowhere
181
+ oct
182
+ off
183
+ often
184
+ once
185
+ one
186
+ only
187
+ onto
188
+ org
189
+ other
190
+ others
191
+ otherwise
192
+ our
193
+ ours
194
+ ourselves
195
+ out
196
+ over
197
+ overall
198
+ own
199
+ part
200
+ particularly
201
+ parts
202
+ per
203
+ perhaps
204
+ probably
205
+ quot
206
+ rather
207
+ rdquo
208
+ rquo
209
+ said
210
+ same
211
+ seem
212
+ seemed
213
+ seeming
214
+ seemingly
215
+ seems
216
+ sep
217
+ set
218
+ several
219
+ she
220
+ should
221
+ similar
222
+ since
223
+ site
224
+ some
225
+ somehow
226
+ someone
227
+ something
228
+ sometime
229
+ sometimes
230
+ somewhat
231
+ somewhere
232
+ still
233
+ such
234
+ than
235
+ that
236
+ the
237
+ their
238
+ them
239
+ themselves
240
+ then
241
+ thence
242
+ thenceforth
243
+ there
244
+ thereafter
245
+ thereby
246
+ therefore
247
+ therein
248
+ thereupon
249
+ these
250
+ they
251
+ this
252
+ those
253
+ though
254
+ through
255
+ throughout
256
+ thru
257
+ thus
258
+ together
259
+ too
260
+ took
261
+ toward
262
+ towards
263
+ two
264
+ under
265
+ unless
266
+ unlike
267
+ unlikely
268
+ until
269
+ upon
270
+ url
271
+ use
272
+ used
273
+ using
274
+ usually
275
+ various
276
+ very
277
+ via
278
+ want
279
+ was
280
+ way
281
+ well
282
+ were
283
+ what
284
+ whatever
285
+ when
286
+ whence
287
+ whenever
288
+ where
289
+ whereafter
290
+ whereas
291
+ whereby
292
+ wherein
293
+ whereupon
294
+ wherever
295
+ whether
296
+ which
297
+ while
298
+ whither
299
+ who
300
+ whoever
301
+ whole
302
+ whom
303
+ whomever
304
+ whose
305
+ why
306
+ will
307
+ with
308
+ within
309
+ without
310
+ would
311
+ www
312
+ yes
313
+ yet
314
+ you
315
+ your
316
+ yours
317
+ yourself
318
+ yourselves
319
+ )
320
+ SPANISH = %w(
321
+ acuerdo
322
+ adelante
323
+ ademas
324
+ adrede
325
+ ahi
326
+ ahora
327
+ alli
328
+ alrededor
329
+ antano
330
+ ante
331
+ antes
332
+ apenas
333
+ aproximadamente
334
+ aquel
335
+ aquella
336
+ aquellas
337
+ aquello
338
+ aquellos
339
+ aqui
340
+ arribaabajo
341
+ asi
342
+ aun
343
+ aunque
344
+ bajo
345
+ bastante
346
+ bien
347
+ breve
348
+ casi
349
+ cerca
350
+ claro
351
+ como
352
+ con
353
+ conmigo
354
+ contigo
355
+ contra
356
+ cual
357
+ cuales
358
+ cuando
359
+ cuanta
360
+ cuantas
361
+ cuanto
362
+ cuantos
363
+ debajo
364
+ del
365
+ delante
366
+ demasiado
367
+ dentro
368
+ deprisa
369
+ desde
370
+ despacio
371
+ despues
372
+ detras
373
+ dia
374
+ dias
375
+ donde
376
+ dos
377
+ durante
378
+ ella
379
+ ellas
380
+ ellos
381
+ encima
382
+ enfrente
383
+ enseguida
384
+ entre
385
+ esa
386
+ esas
387
+ ese
388
+ eso
389
+ esos
390
+ esta
391
+ estado
392
+ estados
393
+ estan
394
+ estar
395
+ estas
396
+ este
397
+ esto
398
+ estos
399
+ excepto
400
+ final
401
+ fue
402
+ fuera
403
+ fueron
404
+ general
405
+ gran
406
+ habia
407
+ habla
408
+ hablan
409
+ hace
410
+ hacia
411
+ han
412
+ hasta
413
+ hay
414
+ horas
415
+ hoy
416
+ incluso
417
+ informo
418
+ junto
419
+ lado
420
+ las
421
+ lejos
422
+ los
423
+ luego
424
+ mal
425
+ mas
426
+ mayor
427
+ medio
428
+ mejor
429
+ menos
430
+ menudo
431
+ mia
432
+ mias
433
+ mientras
434
+ mio
435
+ mios
436
+ mis
437
+ mismo
438
+ mucho
439
+ muy
440
+ nada
441
+ nadie
442
+ ninguna
443
+ nos
444
+ nosotras
445
+ nosotros
446
+ nuestra
447
+ nuestras
448
+ nuestro
449
+ nuestros
450
+ nueva
451
+ nuevo
452
+ nunca
453
+ otra
454
+ otros
455
+ pais
456
+ para
457
+ parte
458
+ pasado
459
+ peor
460
+ pero
461
+ poco
462
+ por
463
+ porque
464
+ pronto
465
+ proximo
466
+ puede
467
+ qeu
468
+ que
469
+ quien
470
+ quienes
471
+ quiza
472
+ quizas
473
+ raras
474
+ repente
475
+ salvo
476
+ segun
477
+ ser
478
+ sera
479
+ sido
480
+ siempre
481
+ sin
482
+ sobre
483
+ solamente
484
+ solo
485
+ son
486
+ soyos
487
+ supuesto
488
+ sus
489
+ suya
490
+ suyas
491
+ suyo
492
+ tal
493
+ tambien
494
+ tampoco
495
+ tarde
496
+ temprano
497
+ tiene
498
+ todavia
499
+ todo
500
+ todos
501
+ tras
502
+ tus
503
+ tuya
504
+ tuyas
505
+ tuyo
506
+ tuyos
507
+ una
508
+ unas
509
+ uno
510
+ unos
511
+ usted
512
+ ustedes
513
+ veces
514
+ vez
515
+ vosotras
516
+ vosotros
517
+ vuestra
518
+ vuestras
519
+ vuestro
520
+ vuestros
521
+ tudo
522
+ dise
523
+ dicas
524
+ muito
525
+ )
526
+ FRENCH = %w(
527
+ des
528
+ les
529
+ mais
530
+ pour
531
+ )
532
+ COMMON = ENGLISH | SPANISH | FRENCH
533
+ ASIAN_SPACE_CHARS = [
534
+ '\302\267',
535
+ '\343\200\201',
536
+ '\343\200\202',
537
+ '\343\203\273',
538
+ '\357\274\201'
539
+ ].join('|')
540
+ SPLIT_REGEX_STR = '[^[:word:]]|[[:punct:]]|' +
541
+ Constants::Words::ASIAN_SPACE_CHARS
542
+ SPLIT_REGEX = Regexp.new SPLIT_REGEX_STR.force_encoding('utf-8')
543
+ end
542
544
  end
543
545
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: categorize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Lubell-Doughtie