stopwords 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/stopwords.rb +43 -0
  2. metadata +5 -5
@@ -0,0 +1,43 @@
1
+ module Stopwords
2
+
3
+ STOP_WORDS = [
4
+ 'a','cannot','into','our','thus','about','co','is','ours','to','above',
5
+ 'could','it','ourselves','together','across','down','its','out','too',
6
+ 'after','during','itself','over','toward','afterwards','each','last','own',
7
+ 'towards','again','eg','latter','per','under','against','either','latterly',
8
+ 'perhaps','until','all','else','least','rather','up','almost','elsewhere',
9
+ 'less','same','upon','alone','enough','ltd','seem','us','along','etc',
10
+ 'many','seemed','very','already','even','may','seeming','via','also','ever',
11
+ 'me','seems','was','although','every','meanwhile','several','we','always',
12
+ 'everyone','might','she','well','among','everything','more','should','were',
13
+ 'amongst','everywhere','moreover','since','what','an','except','most','so',
14
+ 'whatever','and','few','mostly','some','when','another','first','much',
15
+ 'somehow','whence','any','for','must','someone','whenever','anyhow',
16
+ 'former','my','something','where','anyone','formerly','myself','sometime',
17
+ 'whereafter','anything','from','namely','sometimes','whereas','anywhere',
18
+ 'further','neither','somewhere','whereby','are','had','never','still',
19
+ 'wherein','around','has','nevertheless','such','whereupon','as','have',
20
+ 'next','than','wherever','at','he','no','that','whether','be','hence',
21
+ 'nobody','the','whither','became','her','none','their','which','because',
22
+ 'here','noone','them','while','become','hereafter','nor','themselves','who',
23
+ 'becomes','hereby','not','then','whoever','becoming','herein','nothing',
24
+ 'thence','whole','been','hereupon','now','there','whom','before','hers',
25
+ 'nowhere','thereafter','whose','beforehand','herself','of','thereby','why',
26
+ 'behind','him','off','therefore','will','being','himself','often','therein',
27
+ 'with','below','his','on','thereupon','within','beside','how','once',
28
+ 'these','without','besides','however','one','they','would','between','i',
29
+ 'only','this','yet','beyond','ie','onto','those','you','both','if','or',
30
+ 'though','your','but','in','other','through','yours','by','inc','others',
31
+ 'throughout','yourself','can','indeed','otherwise','thru','yourselves'
32
+ ]
33
+ TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
34
+
35
+ def self.is?(token)
36
+ STOP_WORDS.member?(token)
37
+ end
38
+
39
+ def self.valid?(token)
40
+ (((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
41
+ end
42
+
43
+ end
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stopwords
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.1"
4
+ version: "0.2"
5
5
  platform: ruby
6
6
  authors:
7
7
  - ENDAX, LLC
8
- autorequire: stopwords
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
@@ -13,7 +13,7 @@ date: 2010-01-30 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description:
16
+ description: A stopword library
17
17
  email: john@endax.com
18
18
  executables: []
19
19
 
@@ -21,8 +21,8 @@ extensions: []
21
21
 
22
22
  extra_rdoc_files: []
23
23
 
24
- files: []
25
-
24
+ files:
25
+ - lib/stopwords.rb
26
26
  has_rdoc: true
27
27
  homepage: http://endax.github.com/
28
28
  licenses: []