keyphrase 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -4
  3. data/lib/keyphrase/stoplist/afr.rb +14 -0
  4. data/lib/keyphrase/stoplist/aka.rb +10 -0
  5. data/lib/keyphrase/stoplist/amh.rb +10 -0
  6. data/lib/keyphrase/stoplist/ara.rb +490 -0
  7. data/lib/keyphrase/stoplist/aze.rb +175 -0
  8. data/lib/keyphrase/stoplist/bel.rb +11 -0
  9. data/lib/keyphrase/stoplist/ben.rb +408 -0
  10. data/lib/keyphrase/stoplist/bul.rb +528 -0
  11. data/lib/keyphrase/stoplist/cat.rb +711 -0
  12. data/lib/keyphrase/stoplist/ces.rb +560 -0
  13. data/lib/keyphrase/stoplist/cmn.rb +1119 -0
  14. data/lib/keyphrase/stoplist/dan.rb +25 -0
  15. data/lib/keyphrase/stoplist/deu.rb +631 -0
  16. data/lib/keyphrase/stoplist/ell.rb +275 -0
  17. data/lib/keyphrase/stoplist/eng.rb +2 -589
  18. data/lib/keyphrase/stoplist/epo.rb +183 -0
  19. data/lib/keyphrase/stoplist/est.rb +13 -0
  20. data/lib/keyphrase/stoplist/fin.rb +857 -0
  21. data/lib/keyphrase/stoplist/fra.rb +699 -0
  22. data/lib/keyphrase/stoplist/guj.rb +234 -0
  23. data/lib/keyphrase/stoplist/heb.rb +204 -0
  24. data/lib/keyphrase/stoplist/hin.rb +235 -0
  25. data/lib/keyphrase/stoplist/hrv.rb +25 -0
  26. data/lib/keyphrase/stoplist/hun.rb +1195 -0
  27. data/lib/keyphrase/stoplist/hye.rb +55 -0
  28. data/lib/keyphrase/stoplist/ind.rb +768 -0
  29. data/lib/keyphrase/stoplist/ita.rb +670 -0
  30. data/lib/keyphrase/stoplist/jav.rb +10 -0
  31. data/lib/keyphrase/stoplist/jpn.rb +144 -0
  32. data/lib/keyphrase/stoplist/kan.rb +92 -0
  33. data/lib/keyphrase/stoplist/kat.rb +383 -0
  34. data/lib/keyphrase/stoplist/khm.rb +245 -0
  35. data/lib/keyphrase/stoplist/kor.rb +610 -0
  36. data/lib/keyphrase/stoplist/lat.rb +14 -0
  37. data/lib/keyphrase/stoplist/lav.rb +171 -0
  38. data/lib/keyphrase/stoplist/lit.rb +484 -0
  39. data/lib/keyphrase/stoplist/mal.rb +11 -0
  40. data/lib/keyphrase/stoplist/mar.rb +109 -0
  41. data/lib/keyphrase/stoplist/mkd.rb +11 -0
  42. data/lib/keyphrase/stoplist/mya.rb +285 -0
  43. data/lib/keyphrase/stoplist/nep.rb +265 -0
  44. data/lib/keyphrase/stoplist/nld.rb +423 -0
  45. data/lib/keyphrase/stoplist/nob.rb +186 -0
  46. data/lib/keyphrase/stoplist/ori.rb +11 -0
  47. data/lib/keyphrase/stoplist/pan.rb +473 -0
  48. data/lib/keyphrase/stoplist/pes.rb +801 -0
  49. data/lib/keyphrase/stoplist/pol.rb +338 -0
  50. data/lib/keyphrase/stoplist/por.rb +570 -0
  51. data/lib/keyphrase/stoplist/ron.rb +444 -0
  52. data/lib/keyphrase/stoplist/rus.rb +569 -0
  53. data/lib/keyphrase/stoplist/sin.rb +10 -0
  54. data/lib/keyphrase/stoplist/slk.rb +428 -0
  55. data/lib/keyphrase/stoplist/slv.rb +456 -0
  56. data/lib/keyphrase/stoplist/sna.rb +11 -0
  57. data/lib/keyphrase/stoplist/spa.rb +731 -0
  58. data/lib/keyphrase/stoplist/srp.rb +11 -0
  59. data/lib/keyphrase/stoplist/swe.rb +428 -0
  60. data/lib/keyphrase/stoplist/tam.rb +135 -0
  61. data/lib/keyphrase/stoplist/tel.rb +10 -0
  62. data/lib/keyphrase/stoplist/tgl.rb +157 -0
  63. data/lib/keyphrase/stoplist/tha.rb +125 -0
  64. data/lib/keyphrase/stoplist/tuk.rb +11 -0
  65. data/lib/keyphrase/stoplist/tur.rb +514 -0
  66. data/lib/keyphrase/stoplist/ukr.rb +38 -0
  67. data/lib/keyphrase/stoplist/urd.rb +527 -0
  68. data/lib/keyphrase/stoplist/uzb.rb +10 -0
  69. data/lib/keyphrase/stoplist/vie.rb +655 -0
  70. data/lib/keyphrase/stoplist/yid.rb +204 -0
  71. data/lib/keyphrase/stoplist/zul.rb +39 -0
  72. data/lib/keyphrase/stoplist.rb +13 -10
  73. data/lib/keyphrase/version.rb +1 -1
  74. data/lib/keyphrase.rb +20 -12
  75. metadata +71 -3
@@ -1,596 +1,9 @@
1
1
  class Keyphrase
2
2
  module Stoplist
3
3
  class Eng
4
- def self.smart
5
- @@smart_regex ||= /(?:^|\s)(?:#{smart_words.join('|')})(?:$|\s)/io
6
- end
7
4
 
8
- def self.strict
9
- @@strict_regex ||= /(?:^|\s)(?:#{strict_words.join('|')})(?:$|\s)/io
10
- end
11
-
12
- def self.smart_words
13
- @@smart ||= %w{
14
- dr
15
- dra
16
- mr
17
- ms
18
- a
19
- a's
20
- able
21
- about
22
- above
23
- according
24
- accordingly
25
- across
26
- actually
27
- after
28
- afterwards
29
- again
30
- against
31
- ain't
32
- all
33
- allow
34
- allows
35
- almost
36
- alone
37
- along
38
- already
39
- also
40
- although
41
- always
42
- am
43
- among
44
- amongst
45
- an
46
- and
47
- another
48
- any
49
- anybody
50
- anyhow
51
- anyone
52
- anything
53
- anyway
54
- anyways
55
- anywhere
56
- apart
57
- appear
58
- appreciate
59
- appropriate
60
- are
61
- aren't
62
- around
63
- as
64
- aside
65
- ask
66
- asking
67
- associated
68
- at
69
- available
70
- away
71
- awfully
72
- b
73
- be
74
- became
75
- because
76
- become
77
- becomes
78
- becoming
79
- been
80
- before
81
- beforehand
82
- behind
83
- being
84
- believe
85
- below
86
- beside
87
- besides
88
- best
89
- better
90
- between
91
- beyond
92
- both
93
- brief
94
- but
95
- by
96
- c
97
- c'mon
98
- c's
99
- came
100
- can
101
- can't
102
- cannot
103
- cant
104
- cause
105
- causes
106
- certain
107
- certainly
108
- changes
109
- clearly
110
- co
111
- com
112
- come
113
- comes
114
- concerning
115
- consequently
116
- consider
117
- considering
118
- contain
119
- containing
120
- contains
121
- corresponding
122
- could
123
- couldn't
124
- course
125
- currently
126
- d
127
- definitely
128
- described
129
- despite
130
- did
131
- didn't
132
- different
133
- do
134
- does
135
- doesn't
136
- doing
137
- don't
138
- done
139
- down
140
- downwards
141
- during
142
- e
143
- each
144
- edu
145
- eg
146
- eight
147
- either
148
- else
149
- elsewhere
150
- enough
151
- entirely
152
- especially
153
- et
154
- etc
155
- even
156
- ever
157
- every
158
- everybody
159
- everyone
160
- everything
161
- everywhere
162
- ex
163
- exactly
164
- example
165
- except
166
- f
167
- far
168
- few
169
- fifth
170
- first
171
- five
172
- followed
173
- following
174
- follows
175
- for
176
- former
177
- formerly
178
- forth
179
- four
180
- from
181
- further
182
- furthermore
183
- g
184
- get
185
- gets
186
- getting
187
- given
188
- gives
189
- go
190
- goes
191
- going
192
- gone
193
- got
194
- gotten
195
- greetings
196
- h
197
- had
198
- hadn't
199
- happens
200
- hardly
201
- has
202
- hasn't
203
- have
204
- haven't
205
- having
206
- he
207
- he's
208
- hello
209
- help
210
- hence
211
- her
212
- here
213
- here's
214
- hereafter
215
- hereby
216
- herein
217
- hereupon
218
- hers
219
- herself
220
- hi
221
- him
222
- himself
223
- his
224
- hither
225
- hopefully
226
- how
227
- howbeit
228
- however
229
- i
230
- i'd
231
- i'll
232
- i'm
233
- i've
234
- ie
235
- if
236
- ignored
237
- immediate
238
- in
239
- inasmuch
240
- inc
241
- indeed
242
- indicate
243
- indicated
244
- indicates
245
- inner
246
- insofar
247
- instead
248
- into
249
- inward
250
- is
251
- isn't
252
- it
253
- it'd
254
- it'll
255
- it's
256
- its
257
- itself
258
- j
259
- just
260
- k
261
- keep
262
- keeps
263
- kept
264
- know
265
- knows
266
- known
267
- l
268
- last
269
- lately
270
- later
271
- latter
272
- latterly
273
- least
274
- less
275
- lest
276
- let
277
- let's
278
- like
279
- liked
280
- likely
281
- little
282
- look
283
- looking
284
- looks
285
- ltd
286
- m
287
- mainly
288
- many
289
- may
290
- maybe
291
- me
292
- mean
293
- meanwhile
294
- merely
295
- might
296
- more
297
- moreover
298
- most
299
- mostly
300
- much
301
- must
302
- my
303
- myself
304
- n
305
- name
306
- namely
307
- nd
308
- near
309
- nearly
310
- necessary
311
- need
312
- needs
313
- neither
314
- never
315
- nevertheless
316
- new
317
- next
318
- nine
319
- no
320
- nobody
321
- non
322
- none
323
- noone
324
- nor
325
- normally
326
- not
327
- nothing
328
- novel
329
- now
330
- nowhere
331
- o
332
- obviously
333
- of
334
- off
335
- often
336
- oh
337
- ok
338
- okay
339
- old
340
- on
341
- once
342
- one
343
- ones
344
- only
345
- onto
346
- or
347
- other
348
- others
349
- otherwise
350
- ought
351
- our
352
- ours
353
- ourselves
354
- out
355
- outside
356
- over
357
- overall
358
- own
359
- p
360
- particular
361
- particularly
362
- per
363
- perhaps
364
- placed
365
- please
366
- plus
367
- possible
368
- presumably
369
- probably
370
- provides
371
- q
372
- que
373
- quite
374
- qv
375
- r
376
- rather
377
- rd
378
- re
379
- really
380
- reasonably
381
- regarding
382
- regardless
383
- regards
384
- relatively
385
- respectively
386
- right
387
- s
388
- said
389
- same
390
- saw
391
- say
392
- saying
393
- says
394
- second
395
- secondly
396
- see
397
- seeing
398
- seem
399
- seemed
400
- seeming
401
- seems
402
- seen
403
- self
404
- selves
405
- sensible
406
- sent
407
- serious
408
- seriously
409
- seven
410
- several
411
- shall
412
- she
413
- should
414
- shouldn't
415
- since
416
- six
417
- so
418
- some
419
- somebody
420
- somehow
421
- someone
422
- something
423
- sometime
424
- sometimes
425
- somewhat
426
- somewhere
427
- soon
428
- sorry
429
- specified
430
- specify
431
- specifying
432
- still
433
- sub
434
- such
435
- sup
436
- sure
437
- t
438
- t's
439
- take
440
- taken
441
- tell
442
- tends
443
- th
444
- than
445
- thank
446
- thanks
447
- thanx
448
- that
449
- that's
450
- thats
451
- the
452
- their
453
- theirs
454
- them
455
- themselves
456
- then
457
- thence
458
- there
459
- there's
460
- thereafter
461
- thereby
462
- therefore
463
- therein
464
- theres
465
- thereupon
466
- these
467
- they
468
- they'd
469
- they'll
470
- they're
471
- they've
472
- think
473
- third
474
- this
475
- thorough
476
- thoroughly
477
- those
478
- though
479
- three
480
- through
481
- throughout
482
- thru
483
- thus
484
- to
485
- together
486
- too
487
- took
488
- toward
489
- towards
490
- tried
491
- tries
492
- truly
493
- try
494
- trying
495
- twice
496
- two
497
- u
498
- un
499
- under
500
- unfortunately
501
- unless
502
- unlikely
503
- until
504
- unto
505
- up
506
- upon
507
- us
508
- use
509
- used
510
- useful
511
- uses
512
- using
513
- usually
514
- uucp
515
- v
516
- value
517
- various
518
- very
519
- via
520
- viz
521
- vs
522
- w
523
- want
524
- wants
525
- was
526
- wasn't
527
- way
528
- we
529
- we'd
530
- we'll
531
- we're
532
- we've
533
- welcome
534
- well
535
- went
536
- were
537
- weren't
538
- what
539
- what's
540
- whatever
541
- when
542
- whence
543
- whenever
544
- where
545
- where's
546
- whereafter
547
- whereas
548
- whereby
549
- wherein
550
- whereupon
551
- wherever
552
- whether
553
- which
554
- while
555
- whither
556
- who
557
- who's
558
- whoever
559
- whole
560
- whom
561
- whose
562
- why
563
- will
564
- willing
565
- wish
566
- with
567
- within
568
- without
569
- won't
570
- wonder
571
- would
572
- would
573
- wouldn't
574
- x
575
- y
576
- yes
577
- yet
578
- you
579
- you'd
580
- you'll
581
- you're
582
- you've
583
- your
584
- yours
585
- yourself
586
- yourselves
587
- z
588
- zero
589
- }
590
- end
591
-
592
- def self.strict_words
593
- @@strict ||= %w{
5
+ def self.stopwords
6
+ @@stopwords ||= %w{
594
7
  'll
595
8
  'tis
596
9
  'twas