stopwords 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/stopwords.rb +43 -0
  2. metadata +5 -5
@@ -0,0 +1,43 @@
1
+ module Stopwords
2
+
3
+ STOP_WORDS = [
4
+ 'a','cannot','into','our','thus','about','co','is','ours','to','above',
5
+ 'could','it','ourselves','together','across','down','its','out','too',
6
+ 'after','during','itself','over','toward','afterwards','each','last','own',
7
+ 'towards','again','eg','latter','per','under','against','either','latterly',
8
+ 'perhaps','until','all','else','least','rather','up','almost','elsewhere',
9
+ 'less','same','upon','alone','enough','ltd','seem','us','along','etc',
10
+ 'many','seemed','very','already','even','may','seeming','via','also','ever',
11
+ 'me','seems','was','although','every','meanwhile','several','we','always',
12
+ 'everyone','might','she','well','among','everything','more','should','were',
13
+ 'amongst','everywhere','moreover','since','what','an','except','most','so',
14
+ 'whatever','and','few','mostly','some','when','another','first','much',
15
+ 'somehow','whence','any','for','must','someone','whenever','anyhow',
16
+ 'former','my','something','where','anyone','formerly','myself','sometime',
17
+ 'whereafter','anything','from','namely','sometimes','whereas','anywhere',
18
+ 'further','neither','somewhere','whereby','are','had','never','still',
19
+ 'wherein','around','has','nevertheless','such','whereupon','as','have',
20
+ 'next','than','wherever','at','he','no','that','whether','be','hence',
21
+ 'nobody','the','whither','became','her','none','their','which','because',
22
+ 'here','noone','them','while','become','hereafter','nor','themselves','who',
23
+ 'becomes','hereby','not','then','whoever','becoming','herein','nothing',
24
+ 'thence','whole','been','hereupon','now','there','whom','before','hers',
25
+ 'nowhere','thereafter','whose','beforehand','herself','of','thereby','why',
26
+ 'behind','him','off','therefore','will','being','himself','often','therein',
27
+ 'with','below','his','on','thereupon','within','beside','how','once',
28
+ 'these','without','besides','however','one','they','would','between','i',
29
+ 'only','this','yet','beyond','ie','onto','those','you','both','if','or',
30
+ 'though','your','but','in','other','through','yours','by','inc','others',
31
+ 'throughout','yourself','can','indeed','otherwise','thru','yourselves'
32
+ ]
33
+ TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
34
+
35
+ def self.is?(token)
36
+ STOP_WORDS.member?(token)
37
+ end
38
+
39
+ def self.valid?(token)
40
+ (((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
41
+ end
42
+
43
+ end
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stopwords
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - ENDAX, LLC
8
- autorequire: stopwords
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
@@ -13,7 +13,7 @@ date: 2010-01-30 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description:
16
+ description: A stopword library
17
17
  email: john@endax.com
18
18
  executables: []
19
19
 
@@ -21,8 +21,8 @@ extensions: []
21
21
 
22
22
  extra_rdoc_files: []
23
23
 
24
- files: []
25
-
24
+ files:
25
+ - lib/stopwords.rb
26
26
  has_rdoc: true
27
27
  homepage: http://endax.github.com/
28
28
  licenses: []