keyphrase 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -4
  3. data/lib/keyphrase/stoplist/afr.rb +14 -0
  4. data/lib/keyphrase/stoplist/aka.rb +10 -0
  5. data/lib/keyphrase/stoplist/amh.rb +10 -0
  6. data/lib/keyphrase/stoplist/ara.rb +490 -0
  7. data/lib/keyphrase/stoplist/aze.rb +175 -0
  8. data/lib/keyphrase/stoplist/bel.rb +11 -0
  9. data/lib/keyphrase/stoplist/ben.rb +408 -0
  10. data/lib/keyphrase/stoplist/bul.rb +528 -0
  11. data/lib/keyphrase/stoplist/cat.rb +711 -0
  12. data/lib/keyphrase/stoplist/ces.rb +560 -0
  13. data/lib/keyphrase/stoplist/cmn.rb +1119 -0
  14. data/lib/keyphrase/stoplist/dan.rb +25 -0
  15. data/lib/keyphrase/stoplist/deu.rb +631 -0
  16. data/lib/keyphrase/stoplist/ell.rb +275 -0
  17. data/lib/keyphrase/stoplist/eng.rb +2 -589
  18. data/lib/keyphrase/stoplist/epo.rb +183 -0
  19. data/lib/keyphrase/stoplist/est.rb +13 -0
  20. data/lib/keyphrase/stoplist/fin.rb +857 -0
  21. data/lib/keyphrase/stoplist/fra.rb +699 -0
  22. data/lib/keyphrase/stoplist/guj.rb +234 -0
  23. data/lib/keyphrase/stoplist/heb.rb +204 -0
  24. data/lib/keyphrase/stoplist/hin.rb +235 -0
  25. data/lib/keyphrase/stoplist/hrv.rb +25 -0
  26. data/lib/keyphrase/stoplist/hun.rb +1195 -0
  27. data/lib/keyphrase/stoplist/hye.rb +55 -0
  28. data/lib/keyphrase/stoplist/ind.rb +768 -0
  29. data/lib/keyphrase/stoplist/ita.rb +670 -0
  30. data/lib/keyphrase/stoplist/jav.rb +10 -0
  31. data/lib/keyphrase/stoplist/jpn.rb +144 -0
  32. data/lib/keyphrase/stoplist/kan.rb +92 -0
  33. data/lib/keyphrase/stoplist/kat.rb +383 -0
  34. data/lib/keyphrase/stoplist/khm.rb +245 -0
  35. data/lib/keyphrase/stoplist/kor.rb +610 -0
  36. data/lib/keyphrase/stoplist/lat.rb +14 -0
  37. data/lib/keyphrase/stoplist/lav.rb +171 -0
  38. data/lib/keyphrase/stoplist/lit.rb +484 -0
  39. data/lib/keyphrase/stoplist/mal.rb +11 -0
  40. data/lib/keyphrase/stoplist/mar.rb +109 -0
  41. data/lib/keyphrase/stoplist/mkd.rb +11 -0
  42. data/lib/keyphrase/stoplist/mya.rb +285 -0
  43. data/lib/keyphrase/stoplist/nep.rb +265 -0
  44. data/lib/keyphrase/stoplist/nld.rb +423 -0
  45. data/lib/keyphrase/stoplist/nob.rb +186 -0
  46. data/lib/keyphrase/stoplist/ori.rb +11 -0
  47. data/lib/keyphrase/stoplist/pan.rb +473 -0
  48. data/lib/keyphrase/stoplist/pes.rb +801 -0
  49. data/lib/keyphrase/stoplist/pol.rb +338 -0
  50. data/lib/keyphrase/stoplist/por.rb +570 -0
  51. data/lib/keyphrase/stoplist/ron.rb +444 -0
  52. data/lib/keyphrase/stoplist/rus.rb +569 -0
  53. data/lib/keyphrase/stoplist/sin.rb +10 -0
  54. data/lib/keyphrase/stoplist/slk.rb +428 -0
  55. data/lib/keyphrase/stoplist/slv.rb +456 -0
  56. data/lib/keyphrase/stoplist/sna.rb +11 -0
  57. data/lib/keyphrase/stoplist/spa.rb +731 -0
  58. data/lib/keyphrase/stoplist/srp.rb +11 -0
  59. data/lib/keyphrase/stoplist/swe.rb +428 -0
  60. data/lib/keyphrase/stoplist/tam.rb +135 -0
  61. data/lib/keyphrase/stoplist/tel.rb +10 -0
  62. data/lib/keyphrase/stoplist/tgl.rb +157 -0
  63. data/lib/keyphrase/stoplist/tha.rb +125 -0
  64. data/lib/keyphrase/stoplist/tuk.rb +11 -0
  65. data/lib/keyphrase/stoplist/tur.rb +514 -0
  66. data/lib/keyphrase/stoplist/ukr.rb +38 -0
  67. data/lib/keyphrase/stoplist/urd.rb +527 -0
  68. data/lib/keyphrase/stoplist/uzb.rb +10 -0
  69. data/lib/keyphrase/stoplist/vie.rb +655 -0
  70. data/lib/keyphrase/stoplist/yid.rb +204 -0
  71. data/lib/keyphrase/stoplist/zul.rb +39 -0
  72. data/lib/keyphrase/stoplist.rb +13 -10
  73. data/lib/keyphrase/version.rb +1 -1
  74. data/lib/keyphrase.rb +20 -12
  75. metadata +71 -3
@@ -1,596 +1,9 @@
1
1
  class Keyphrase
2
2
  module Stoplist
3
3
  class Eng
4
- def self.smart
5
- @@smart_regex ||= /(?:^|\s)(?:#{smart_words.join('|')})(?:$|\s)/io
6
- end
7
4
 
8
- def self.strict
9
- @@strict_regex ||= /(?:^|\s)(?:#{strict_words.join('|')})(?:$|\s)/io
10
- end
11
-
12
- def self.smart_words
13
- @@smart ||= %w{
14
- dr
15
- dra
16
- mr
17
- ms
18
- a
19
- a's
20
- able
21
- about
22
- above
23
- according
24
- accordingly
25
- across
26
- actually
27
- after
28
- afterwards
29
- again
30
- against
31
- ain't
32
- all
33
- allow
34
- allows
35
- almost
36
- alone
37
- along
38
- already
39
- also
40
- although
41
- always
42
- am
43
- among
44
- amongst
45
- an
46
- and
47
- another
48
- any
49
- anybody
50
- anyhow
51
- anyone
52
- anything
53
- anyway
54
- anyways
55
- anywhere
56
- apart
57
- appear
58
- appreciate
59
- appropriate
60
- are
61
- aren't
62
- around
63
- as
64
- aside
65
- ask
66
- asking
67
- associated
68
- at
69
- available
70
- away
71
- awfully
72
- b
73
- be
74
- became
75
- because
76
- become
77
- becomes
78
- becoming
79
- been
80
- before
81
- beforehand
82
- behind
83
- being
84
- believe
85
- below
86
- beside
87
- besides
88
- best
89
- better
90
- between
91
- beyond
92
- both
93
- brief
94
- but
95
- by
96
- c
97
- c'mon
98
- c's
99
- came
100
- can
101
- can't
102
- cannot
103
- cant
104
- cause
105
- causes
106
- certain
107
- certainly
108
- changes
109
- clearly
110
- co
111
- com
112
- come
113
- comes
114
- concerning
115
- consequently
116
- consider
117
- considering
118
- contain
119
- containing
120
- contains
121
- corresponding
122
- could
123
- couldn't
124
- course
125
- currently
126
- d
127
- definitely
128
- described
129
- despite
130
- did
131
- didn't
132
- different
133
- do
134
- does
135
- doesn't
136
- doing
137
- don't
138
- done
139
- down
140
- downwards
141
- during
142
- e
143
- each
144
- edu
145
- eg
146
- eight
147
- either
148
- else
149
- elsewhere
150
- enough
151
- entirely
152
- especially
153
- et
154
- etc
155
- even
156
- ever
157
- every
158
- everybody
159
- everyone
160
- everything
161
- everywhere
162
- ex
163
- exactly
164
- example
165
- except
166
- f
167
- far
168
- few
169
- fifth
170
- first
171
- five
172
- followed
173
- following
174
- follows
175
- for
176
- former
177
- formerly
178
- forth
179
- four
180
- from
181
- further
182
- furthermore
183
- g
184
- get
185
- gets
186
- getting
187
- given
188
- gives
189
- go
190
- goes
191
- going
192
- gone
193
- got
194
- gotten
195
- greetings
196
- h
197
- had
198
- hadn't
199
- happens
200
- hardly
201
- has
202
- hasn't
203
- have
204
- haven't
205
- having
206
- he
207
- he's
208
- hello
209
- help
210
- hence
211
- her
212
- here
213
- here's
214
- hereafter
215
- hereby
216
- herein
217
- hereupon
218
- hers
219
- herself
220
- hi
221
- him
222
- himself
223
- his
224
- hither
225
- hopefully
226
- how
227
- howbeit
228
- however
229
- i
230
- i'd
231
- i'll
232
- i'm
233
- i've
234
- ie
235
- if
236
- ignored
237
- immediate
238
- in
239
- inasmuch
240
- inc
241
- indeed
242
- indicate
243
- indicated
244
- indicates
245
- inner
246
- insofar
247
- instead
248
- into
249
- inward
250
- is
251
- isn't
252
- it
253
- it'd
254
- it'll
255
- it's
256
- its
257
- itself
258
- j
259
- just
260
- k
261
- keep
262
- keeps
263
- kept
264
- know
265
- knows
266
- known
267
- l
268
- last
269
- lately
270
- later
271
- latter
272
- latterly
273
- least
274
- less
275
- lest
276
- let
277
- let's
278
- like
279
- liked
280
- likely
281
- little
282
- look
283
- looking
284
- looks
285
- ltd
286
- m
287
- mainly
288
- many
289
- may
290
- maybe
291
- me
292
- mean
293
- meanwhile
294
- merely
295
- might
296
- more
297
- moreover
298
- most
299
- mostly
300
- much
301
- must
302
- my
303
- myself
304
- n
305
- name
306
- namely
307
- nd
308
- near
309
- nearly
310
- necessary
311
- need
312
- needs
313
- neither
314
- never
315
- nevertheless
316
- new
317
- next
318
- nine
319
- no
320
- nobody
321
- non
322
- none
323
- noone
324
- nor
325
- normally
326
- not
327
- nothing
328
- novel
329
- now
330
- nowhere
331
- o
332
- obviously
333
- of
334
- off
335
- often
336
- oh
337
- ok
338
- okay
339
- old
340
- on
341
- once
342
- one
343
- ones
344
- only
345
- onto
346
- or
347
- other
348
- others
349
- otherwise
350
- ought
351
- our
352
- ours
353
- ourselves
354
- out
355
- outside
356
- over
357
- overall
358
- own
359
- p
360
- particular
361
- particularly
362
- per
363
- perhaps
364
- placed
365
- please
366
- plus
367
- possible
368
- presumably
369
- probably
370
- provides
371
- q
372
- que
373
- quite
374
- qv
375
- r
376
- rather
377
- rd
378
- re
379
- really
380
- reasonably
381
- regarding
382
- regardless
383
- regards
384
- relatively
385
- respectively
386
- right
387
- s
388
- said
389
- same
390
- saw
391
- say
392
- saying
393
- says
394
- second
395
- secondly
396
- see
397
- seeing
398
- seem
399
- seemed
400
- seeming
401
- seems
402
- seen
403
- self
404
- selves
405
- sensible
406
- sent
407
- serious
408
- seriously
409
- seven
410
- several
411
- shall
412
- she
413
- should
414
- shouldn't
415
- since
416
- six
417
- so
418
- some
419
- somebody
420
- somehow
421
- someone
422
- something
423
- sometime
424
- sometimes
425
- somewhat
426
- somewhere
427
- soon
428
- sorry
429
- specified
430
- specify
431
- specifying
432
- still
433
- sub
434
- such
435
- sup
436
- sure
437
- t
438
- t's
439
- take
440
- taken
441
- tell
442
- tends
443
- th
444
- than
445
- thank
446
- thanks
447
- thanx
448
- that
449
- that's
450
- thats
451
- the
452
- their
453
- theirs
454
- them
455
- themselves
456
- then
457
- thence
458
- there
459
- there's
460
- thereafter
461
- thereby
462
- therefore
463
- therein
464
- theres
465
- thereupon
466
- these
467
- they
468
- they'd
469
- they'll
470
- they're
471
- they've
472
- think
473
- third
474
- this
475
- thorough
476
- thoroughly
477
- those
478
- though
479
- three
480
- through
481
- throughout
482
- thru
483
- thus
484
- to
485
- together
486
- too
487
- took
488
- toward
489
- towards
490
- tried
491
- tries
492
- truly
493
- try
494
- trying
495
- twice
496
- two
497
- u
498
- un
499
- under
500
- unfortunately
501
- unless
502
- unlikely
503
- until
504
- unto
505
- up
506
- upon
507
- us
508
- use
509
- used
510
- useful
511
- uses
512
- using
513
- usually
514
- uucp
515
- v
516
- value
517
- various
518
- very
519
- via
520
- viz
521
- vs
522
- w
523
- want
524
- wants
525
- was
526
- wasn't
527
- way
528
- we
529
- we'd
530
- we'll
531
- we're
532
- we've
533
- welcome
534
- well
535
- went
536
- were
537
- weren't
538
- what
539
- what's
540
- whatever
541
- when
542
- whence
543
- whenever
544
- where
545
- where's
546
- whereafter
547
- whereas
548
- whereby
549
- wherein
550
- whereupon
551
- wherever
552
- whether
553
- which
554
- while
555
- whither
556
- who
557
- who's
558
- whoever
559
- whole
560
- whom
561
- whose
562
- why
563
- will
564
- willing
565
- wish
566
- with
567
- within
568
- without
569
- won't
570
- wonder
571
- would
572
- would
573
- wouldn't
574
- x
575
- y
576
- yes
577
- yet
578
- you
579
- you'd
580
- you'll
581
- you're
582
- you've
583
- your
584
- yours
585
- yourself
586
- yourselves
587
- z
588
- zero
589
- }
590
- end
591
-
592
- def self.strict_words
593
- @@strict ||= %w{
5
+ def self.stopwords
6
+ @@stopwords ||= %w{
594
7
  'll
595
8
  'tis
596
9
  'twas