stopwords 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/stopwords.rb +43 -0
- metadata +5 -5
data/lib/stopwords.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
module Stopwords
|
2
|
+
|
3
|
+
STOP_WORDS = [
|
4
|
+
'a','cannot','into','our','thus','about','co','is','ours','to','above',
|
5
|
+
'could','it','ourselves','together','across','down','its','out','too',
|
6
|
+
'after','during','itself','over','toward','afterwards','each','last','own',
|
7
|
+
'towards','again','eg','latter','per','under','against','either','latterly',
|
8
|
+
'perhaps','until','all','else','least','rather','up','almost','elsewhere',
|
9
|
+
'less','same','upon','alone','enough','ltd','seem','us','along','etc',
|
10
|
+
'many','seemed','very','already','even','may','seeming','via','also','ever',
|
11
|
+
'me','seems','was','although','every','meanwhile','several','we','always',
|
12
|
+
'everyone','might','she','well','among','everything','more','should','were',
|
13
|
+
'amongst','everywhere','moreover','since','what','an','except','most','so',
|
14
|
+
'whatever','and','few','mostly','some','when','another','first','much',
|
15
|
+
'somehow','whence','any','for','must','someone','whenever','anyhow',
|
16
|
+
'former','my','something','where','anyone','formerly','myself','sometime',
|
17
|
+
'whereafter','anything','from','namely','sometimes','whereas','anywhere',
|
18
|
+
'further','neither','somewhere','whereby','are','had','never','still',
|
19
|
+
'wherein','around','has','nevertheless','such','whereupon','as','have',
|
20
|
+
'next','than','wherever','at','he','no','that','whether','be','hence',
|
21
|
+
'nobody','the','whither','became','her','none','their','which','because',
|
22
|
+
'here','noone','them','while','become','hereafter','nor','themselves','who',
|
23
|
+
'becomes','hereby','not','then','whoever','becoming','herein','nothing',
|
24
|
+
'thence','whole','been','hereupon','now','there','whom','before','hers',
|
25
|
+
'nowhere','thereafter','whose','beforehand','herself','of','thereby','why',
|
26
|
+
'behind','him','off','therefore','will','being','himself','often','therein',
|
27
|
+
'with','below','his','on','thereupon','within','beside','how','once',
|
28
|
+
'these','without','besides','however','one','they','would','between','i',
|
29
|
+
'only','this','yet','beyond','ie','onto','those','you','both','if','or',
|
30
|
+
'though','your','but','in','other','through','yours','by','inc','others',
|
31
|
+
'throughout','yourself','can','indeed','otherwise','thru','yourselves'
|
32
|
+
]
|
33
|
+
TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
|
34
|
+
|
35
|
+
def self.is?(token)
|
36
|
+
STOP_WORDS.member?(token)
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.valid?(token)
|
40
|
+
(((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
metadata
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stopwords
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "0.
|
4
|
+
version: "0.2"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ENDAX, LLC
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
@@ -13,7 +13,7 @@ date: 2010-01-30 00:00:00 -05:00
|
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description:
|
16
|
+
description: A stopword library
|
17
17
|
email: john@endax.com
|
18
18
|
executables: []
|
19
19
|
|
@@ -21,8 +21,8 @@ extensions: []
|
|
21
21
|
|
22
22
|
extra_rdoc_files: []
|
23
23
|
|
24
|
-
files:
|
25
|
-
|
24
|
+
files:
|
25
|
+
- lib/stopwords.rb
|
26
26
|
has_rdoc: true
|
27
27
|
homepage: http://endax.github.com/
|
28
28
|
licenses: []
|