categorize 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +8 -8
  2. data/lib/categorize/constants.rb +541 -539
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OGNkMmQ5MzEwZGFlOWUxNWM0MzU0MTI0MTI2NzE5NTBlNGZjYzM3Ng==
4
+ NDZiODNkODIyOTFiNGQyZDBiYWM3NmM3MDg0YjBkOGM5ZjUyOTQ0OA==
5
5
  data.tar.gz: !binary |-
6
- YmNmMDE5NWMxYmZhNWI0ZDI2NDA3MjdkOTNjYmI2MGUzMWY0ZTVjZQ==
6
+ OTg5YWJjN2QwYTc3OWMxYjJiZTI5ZWM5MWEwMzllZGRiNGRmZDVmZQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZTdiM2IyMzRiOTg1Y2Y5MDc2ZWQwY2EyYjA3YTZjODEzYmM5MTU5NWVlNzBl
10
- ZDdmYzhiNzdiOTYxOGY3YzgzNWFmZDhmMmIxODczZmY1NGM2MmM2NzI5NzVi
11
- NWYzMGMwOGI2MWI5Mjk5NmY4MmMwM2YyZWFjNzU1MGMxMjcwYWI=
9
+ NWRlMTAzMTlhMTgwN2VlM2M3NWE3MTI3NTQ5ZDRiNTFjNDJhZWQ0ZjY4ZTVm
10
+ NWY2MTA0YTE2NWI2N2Q4ZTgyMDk4NDk0ZTg1MDFkNWI0YjFiYTMyM2U2ZjIz
11
+ ZTgzZTdkMGMyNjY4YmU2NTVlYTJlZjhhYWE4Nzg4MDk3ZjYwYjk=
12
12
  data.tar.gz: !binary |-
13
- MjQ1NWQ4ZGVlMzNjZDZkNDVmODViOTY1ZTM4ZGZlYjhjMGVmNDQ4ZGRiNmRm
14
- MGY1OTNhN2NkMzQ3Y2U4OGIyMDc3MTU2ZTc5MTE0ZGE4NTc4ODg2MGE5MjRm
15
- N2M3MWQ4YzJhYzFjNTNjZTNjNDA3ZjVlM2RmZDVkMTcxNTFkNDM=
13
+ YzlkZDkzYTYzYTE1ZDBlNzNmZTY3ZDA0ZDA3ODc1YmYzNzU5NTgzODk5MTU5
14
+ NzI0ZjQ1NGU5NGExOWIxZDhjNzQyM2VlMzAyMmE3MjUzNmVjNGFhNThjZjMw
15
+ NWI0MTNhNTI3MTI5ZTA3MGEzZmM3NmMxNjA1NTFhOTZhNDhmNGI=
@@ -1,543 +1,545 @@
1
1
  # encoding: utf-8
2
2
 
3
- module Constants
4
- module Words
5
- # only include words > 2 chars
6
- ENGLISH = %w(
7
- 000
8
- page
9
- home
10
- free
11
- also
12
- about
13
- above
14
- according
15
- accordingly
16
- across
17
- after
18
- afterward
19
- afterwards
20
- again
21
- against
22
- all
23
- almost
24
- alone
25
- along
26
- already
27
- also
28
- although
29
- always
30
- among
31
- amongst
32
- amp
33
- and
34
- another
35
- any
36
- anyhow
37
- anyone
38
- anything
39
- anywhere
40
- apr
41
- are
42
- aug
43
- around
44
- became
45
- because
46
- become
47
- becomes
48
- becoming
49
- been
50
- before
51
- beforehand
52
- began
53
- behind
54
- being
55
- below
56
- beside
57
- besides
58
- between
59
- beyond
60
- both
61
- but
62
- can
63
- cannot
64
- certain
65
- com
66
- could
67
- days ago
68
- dec
69
- did
70
- does
71
- down
72
- during
73
- each
74
- edu
75
- either
76
- else
77
- elsewhere
78
- enough
79
- especially
80
- est
81
- etc
82
- even
83
- ever
84
- every
85
- everyone
86
- everything
87
- everywhere
88
- example
89
- except
90
- feb
91
- few
92
- fewer
93
- finally
94
- find
95
- following
96
- for
97
- former
98
- formerly
99
- from
100
- further
101
- furthermore
102
- generally
103
- get
104
- given
105
- had
106
- has
107
- have
108
- having
109
- hence
110
- henceforth
111
- her
112
- here
113
- hereafter
114
- hereby
115
- herein
116
- hereupon
117
- hers
118
- herself
119
- him
120
- himself
121
- his
122
- hours ago
123
- how
124
- however
125
- http
126
- inc
127
- include
128
- included
129
- includes
130
- including
131
- indeed
132
- instead
133
- into
134
- its
135
- itself
136
- jan
137
- jul
138
- know
139
- known
140
- later
141
- latterly
142
- ldquo
143
- llc
144
- lquo
145
- least
146
- less
147
- many
148
- mar
149
- may
150
- maybe
151
- mdash
152
- meanwhile
153
- might
154
- miss
155
- more
156
- moreover
157
- most
158
- mostly
159
- much
160
- must
161
- myself
162
- nbsp
163
- ndash
164
- near
165
- nearly
166
- neither
167
- never
168
- nevertheless
169
- next
170
- nobody
171
- non
172
- none
173
- nonetheless
174
- nor
175
- not
176
- nothing
177
- nov
178
- now
179
- nowhere
180
- oct
181
- off
182
- often
183
- once
184
- one
185
- only
186
- onto
187
- org
188
- other
189
- others
190
- otherwise
191
- our
192
- ours
193
- ourselves
194
- out
195
- over
196
- overall
197
- own
198
- part
199
- particularly
200
- parts
201
- per
202
- perhaps
203
- probably
204
- quot
205
- rather
206
- rdquo
207
- rquo
208
- said
209
- same
210
- seem
211
- seemed
212
- seeming
213
- seemingly
214
- seems
215
- sep
216
- set
217
- several
218
- she
219
- should
220
- similar
221
- since
222
- site
223
- some
224
- somehow
225
- someone
226
- something
227
- sometime
228
- sometimes
229
- somewhat
230
- somewhere
231
- still
232
- such
233
- than
234
- that
235
- the
236
- their
237
- them
238
- themselves
239
- then
240
- thence
241
- thenceforth
242
- there
243
- thereafter
244
- thereby
245
- therefore
246
- therein
247
- thereupon
248
- these
249
- they
250
- this
251
- those
252
- though
253
- through
254
- throughout
255
- thru
256
- thus
257
- together
258
- too
259
- took
260
- toward
261
- towards
262
- two
263
- under
264
- unless
265
- unlike
266
- unlikely
267
- until
268
- upon
269
- url
270
- use
271
- used
272
- using
273
- usually
274
- various
275
- very
276
- via
277
- want
278
- was
279
- way
280
- well
281
- were
282
- what
283
- whatever
284
- when
285
- whence
286
- whenever
287
- where
288
- whereafter
289
- whereas
290
- whereby
291
- wherein
292
- whereupon
293
- wherever
294
- whether
295
- which
296
- while
297
- whither
298
- who
299
- whoever
300
- whole
301
- whom
302
- whomever
303
- whose
304
- why
305
- will
306
- with
307
- within
308
- without
309
- would
310
- www
311
- yes
312
- yet
313
- you
314
- your
315
- yours
316
- yourself
317
- yourselves
318
- )
319
- SPANISH = %w(
320
- acuerdo
321
- adelante
322
- ademas
323
- adrede
324
- ahi
325
- ahora
326
- alli
327
- alrededor
328
- antano
329
- ante
330
- antes
331
- apenas
332
- aproximadamente
333
- aquel
334
- aquella
335
- aquellas
336
- aquello
337
- aquellos
338
- aqui
339
- arribaabajo
340
- asi
341
- aun
342
- aunque
343
- bajo
344
- bastante
345
- bien
346
- breve
347
- casi
348
- cerca
349
- claro
350
- como
351
- con
352
- conmigo
353
- contigo
354
- contra
355
- cual
356
- cuales
357
- cuando
358
- cuanta
359
- cuantas
360
- cuanto
361
- cuantos
362
- debajo
363
- del
364
- delante
365
- demasiado
366
- dentro
367
- deprisa
368
- desde
369
- despacio
370
- despues
371
- detras
372
- dia
373
- dias
374
- donde
375
- dos
376
- durante
377
- ella
378
- ellas
379
- ellos
380
- encima
381
- enfrente
382
- enseguida
383
- entre
384
- esa
385
- esas
386
- ese
387
- eso
388
- esos
389
- esta
390
- estado
391
- estados
392
- estan
393
- estar
394
- estas
395
- este
396
- esto
397
- estos
398
- excepto
399
- final
400
- fue
401
- fuera
402
- fueron
403
- general
404
- gran
405
- habia
406
- habla
407
- hablan
408
- hace
409
- hacia
410
- han
411
- hasta
412
- hay
413
- horas
414
- hoy
415
- incluso
416
- informo
417
- junto
418
- lado
419
- las
420
- lejos
421
- los
422
- luego
423
- mal
424
- mas
425
- mayor
426
- medio
427
- mejor
428
- menos
429
- menudo
430
- mia
431
- mias
432
- mientras
433
- mio
434
- mios
435
- mis
436
- mismo
437
- mucho
438
- muy
439
- nada
440
- nadie
441
- ninguna
442
- nos
443
- nosotras
444
- nosotros
445
- nuestra
446
- nuestras
447
- nuestro
448
- nuestros
449
- nueva
450
- nuevo
451
- nunca
452
- otra
453
- otros
454
- pais
455
- para
456
- parte
457
- pasado
458
- peor
459
- pero
460
- poco
461
- por
462
- porque
463
- pronto
464
- proximo
465
- puede
466
- qeu
467
- que
468
- quien
469
- quienes
470
- quiza
471
- quizas
472
- raras
473
- repente
474
- salvo
475
- segun
476
- ser
477
- sera
478
- sido
479
- siempre
480
- sin
481
- sobre
482
- solamente
483
- solo
484
- son
485
- soyos
486
- supuesto
487
- sus
488
- suya
489
- suyas
490
- suyo
491
- tal
492
- tambien
493
- tampoco
494
- tarde
495
- temprano
496
- tiene
497
- todavia
498
- todo
499
- todos
500
- tras
501
- tus
502
- tuya
503
- tuyas
504
- tuyo
505
- tuyos
506
- una
507
- unas
508
- uno
509
- unos
510
- usted
511
- ustedes
512
- veces
513
- vez
514
- vosotras
515
- vosotros
516
- vuestra
517
- vuestras
518
- vuestro
519
- vuestros
520
- tudo
521
- dise
522
- dicas
523
- muito
524
- )
525
- FRENCH = %w(
526
- des
527
- les
528
- mais
529
- pour
530
- )
531
- COMMON = ENGLISH | SPANISH | FRENCH
532
- ASIAN_SPACE_CHARS = [
533
- '\302\267',
534
- '\343\200\201',
535
- '\343\200\202',
536
- '\343\203\273',
537
- '\357\274\201'
538
- ].join('|')
539
- SPLIT_REGEX_STR = '[^[:word:]]|[[:punct:]]|' +
540
- Constants::Words::ASIAN_SPACE_CHARS
541
- SPLIT_REGEX = Regexp.new SPLIT_REGEX_STR.force_encoding('utf-8')
3
+ module Categorize
4
+ module Constants
5
+ module Words
6
+ # only include words > 2 chars
7
+ ENGLISH = %w(
8
+ 000
9
+ page
10
+ home
11
+ free
12
+ also
13
+ about
14
+ above
15
+ according
16
+ accordingly
17
+ across
18
+ after
19
+ afterward
20
+ afterwards
21
+ again
22
+ against
23
+ all
24
+ almost
25
+ alone
26
+ along
27
+ already
28
+ also
29
+ although
30
+ always
31
+ among
32
+ amongst
33
+ amp
34
+ and
35
+ another
36
+ any
37
+ anyhow
38
+ anyone
39
+ anything
40
+ anywhere
41
+ apr
42
+ are
43
+ aug
44
+ around
45
+ became
46
+ because
47
+ become
48
+ becomes
49
+ becoming
50
+ been
51
+ before
52
+ beforehand
53
+ began
54
+ behind
55
+ being
56
+ below
57
+ beside
58
+ besides
59
+ between
60
+ beyond
61
+ both
62
+ but
63
+ can
64
+ cannot
65
+ certain
66
+ com
67
+ could
68
+ days ago
69
+ dec
70
+ did
71
+ does
72
+ down
73
+ during
74
+ each
75
+ edu
76
+ either
77
+ else
78
+ elsewhere
79
+ enough
80
+ especially
81
+ est
82
+ etc
83
+ even
84
+ ever
85
+ every
86
+ everyone
87
+ everything
88
+ everywhere
89
+ example
90
+ except
91
+ feb
92
+ few
93
+ fewer
94
+ finally
95
+ find
96
+ following
97
+ for
98
+ former
99
+ formerly
100
+ from
101
+ further
102
+ furthermore
103
+ generally
104
+ get
105
+ given
106
+ had
107
+ has
108
+ have
109
+ having
110
+ hence
111
+ henceforth
112
+ her
113
+ here
114
+ hereafter
115
+ hereby
116
+ herein
117
+ hereupon
118
+ hers
119
+ herself
120
+ him
121
+ himself
122
+ his
123
+ hours ago
124
+ how
125
+ however
126
+ http
127
+ inc
128
+ include
129
+ included
130
+ includes
131
+ including
132
+ indeed
133
+ instead
134
+ into
135
+ its
136
+ itself
137
+ jan
138
+ jul
139
+ know
140
+ known
141
+ later
142
+ latterly
143
+ ldquo
144
+ llc
145
+ lquo
146
+ least
147
+ less
148
+ many
149
+ mar
150
+ may
151
+ maybe
152
+ mdash
153
+ meanwhile
154
+ might
155
+ miss
156
+ more
157
+ moreover
158
+ most
159
+ mostly
160
+ much
161
+ must
162
+ myself
163
+ nbsp
164
+ ndash
165
+ near
166
+ nearly
167
+ neither
168
+ never
169
+ nevertheless
170
+ next
171
+ nobody
172
+ non
173
+ none
174
+ nonetheless
175
+ nor
176
+ not
177
+ nothing
178
+ nov
179
+ now
180
+ nowhere
181
+ oct
182
+ off
183
+ often
184
+ once
185
+ one
186
+ only
187
+ onto
188
+ org
189
+ other
190
+ others
191
+ otherwise
192
+ our
193
+ ours
194
+ ourselves
195
+ out
196
+ over
197
+ overall
198
+ own
199
+ part
200
+ particularly
201
+ parts
202
+ per
203
+ perhaps
204
+ probably
205
+ quot
206
+ rather
207
+ rdquo
208
+ rquo
209
+ said
210
+ same
211
+ seem
212
+ seemed
213
+ seeming
214
+ seemingly
215
+ seems
216
+ sep
217
+ set
218
+ several
219
+ she
220
+ should
221
+ similar
222
+ since
223
+ site
224
+ some
225
+ somehow
226
+ someone
227
+ something
228
+ sometime
229
+ sometimes
230
+ somewhat
231
+ somewhere
232
+ still
233
+ such
234
+ than
235
+ that
236
+ the
237
+ their
238
+ them
239
+ themselves
240
+ then
241
+ thence
242
+ thenceforth
243
+ there
244
+ thereafter
245
+ thereby
246
+ therefore
247
+ therein
248
+ thereupon
249
+ these
250
+ they
251
+ this
252
+ those
253
+ though
254
+ through
255
+ throughout
256
+ thru
257
+ thus
258
+ together
259
+ too
260
+ took
261
+ toward
262
+ towards
263
+ two
264
+ under
265
+ unless
266
+ unlike
267
+ unlikely
268
+ until
269
+ upon
270
+ url
271
+ use
272
+ used
273
+ using
274
+ usually
275
+ various
276
+ very
277
+ via
278
+ want
279
+ was
280
+ way
281
+ well
282
+ were
283
+ what
284
+ whatever
285
+ when
286
+ whence
287
+ whenever
288
+ where
289
+ whereafter
290
+ whereas
291
+ whereby
292
+ wherein
293
+ whereupon
294
+ wherever
295
+ whether
296
+ which
297
+ while
298
+ whither
299
+ who
300
+ whoever
301
+ whole
302
+ whom
303
+ whomever
304
+ whose
305
+ why
306
+ will
307
+ with
308
+ within
309
+ without
310
+ would
311
+ www
312
+ yes
313
+ yet
314
+ you
315
+ your
316
+ yours
317
+ yourself
318
+ yourselves
319
+ )
320
+ SPANISH = %w(
321
+ acuerdo
322
+ adelante
323
+ ademas
324
+ adrede
325
+ ahi
326
+ ahora
327
+ alli
328
+ alrededor
329
+ antano
330
+ ante
331
+ antes
332
+ apenas
333
+ aproximadamente
334
+ aquel
335
+ aquella
336
+ aquellas
337
+ aquello
338
+ aquellos
339
+ aqui
340
+ arribaabajo
341
+ asi
342
+ aun
343
+ aunque
344
+ bajo
345
+ bastante
346
+ bien
347
+ breve
348
+ casi
349
+ cerca
350
+ claro
351
+ como
352
+ con
353
+ conmigo
354
+ contigo
355
+ contra
356
+ cual
357
+ cuales
358
+ cuando
359
+ cuanta
360
+ cuantas
361
+ cuanto
362
+ cuantos
363
+ debajo
364
+ del
365
+ delante
366
+ demasiado
367
+ dentro
368
+ deprisa
369
+ desde
370
+ despacio
371
+ despues
372
+ detras
373
+ dia
374
+ dias
375
+ donde
376
+ dos
377
+ durante
378
+ ella
379
+ ellas
380
+ ellos
381
+ encima
382
+ enfrente
383
+ enseguida
384
+ entre
385
+ esa
386
+ esas
387
+ ese
388
+ eso
389
+ esos
390
+ esta
391
+ estado
392
+ estados
393
+ estan
394
+ estar
395
+ estas
396
+ este
397
+ esto
398
+ estos
399
+ excepto
400
+ final
401
+ fue
402
+ fuera
403
+ fueron
404
+ general
405
+ gran
406
+ habia
407
+ habla
408
+ hablan
409
+ hace
410
+ hacia
411
+ han
412
+ hasta
413
+ hay
414
+ horas
415
+ hoy
416
+ incluso
417
+ informo
418
+ junto
419
+ lado
420
+ las
421
+ lejos
422
+ los
423
+ luego
424
+ mal
425
+ mas
426
+ mayor
427
+ medio
428
+ mejor
429
+ menos
430
+ menudo
431
+ mia
432
+ mias
433
+ mientras
434
+ mio
435
+ mios
436
+ mis
437
+ mismo
438
+ mucho
439
+ muy
440
+ nada
441
+ nadie
442
+ ninguna
443
+ nos
444
+ nosotras
445
+ nosotros
446
+ nuestra
447
+ nuestras
448
+ nuestro
449
+ nuestros
450
+ nueva
451
+ nuevo
452
+ nunca
453
+ otra
454
+ otros
455
+ pais
456
+ para
457
+ parte
458
+ pasado
459
+ peor
460
+ pero
461
+ poco
462
+ por
463
+ porque
464
+ pronto
465
+ proximo
466
+ puede
467
+ qeu
468
+ que
469
+ quien
470
+ quienes
471
+ quiza
472
+ quizas
473
+ raras
474
+ repente
475
+ salvo
476
+ segun
477
+ ser
478
+ sera
479
+ sido
480
+ siempre
481
+ sin
482
+ sobre
483
+ solamente
484
+ solo
485
+ son
486
+ soyos
487
+ supuesto
488
+ sus
489
+ suya
490
+ suyas
491
+ suyo
492
+ tal
493
+ tambien
494
+ tampoco
495
+ tarde
496
+ temprano
497
+ tiene
498
+ todavia
499
+ todo
500
+ todos
501
+ tras
502
+ tus
503
+ tuya
504
+ tuyas
505
+ tuyo
506
+ tuyos
507
+ una
508
+ unas
509
+ uno
510
+ unos
511
+ usted
512
+ ustedes
513
+ veces
514
+ vez
515
+ vosotras
516
+ vosotros
517
+ vuestra
518
+ vuestras
519
+ vuestro
520
+ vuestros
521
+ tudo
522
+ dise
523
+ dicas
524
+ muito
525
+ )
526
+ FRENCH = %w(
527
+ des
528
+ les
529
+ mais
530
+ pour
531
+ )
532
+ COMMON = ENGLISH | SPANISH | FRENCH
533
+ ASIAN_SPACE_CHARS = [
534
+ '\302\267',
535
+ '\343\200\201',
536
+ '\343\200\202',
537
+ '\343\203\273',
538
+ '\357\274\201'
539
+ ].join('|')
540
+ SPLIT_REGEX_STR = '[^[:word:]]|[[:punct:]]|' +
541
+ Constants::Words::ASIAN_SPACE_CHARS
542
+ SPLIT_REGEX = Regexp.new SPLIT_REGEX_STR.force_encoding('utf-8')
543
+ end
542
544
  end
543
545
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: categorize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Lubell-Doughtie