jake69mac-stopwords 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/stopwords.rb +63 -0
- metadata +65 -0
data/lib/stopwords.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module Stopwords
|
2
|
+
|
3
|
+
STOP_WORDS = [
|
4
|
+
'a','about','above','across','after','again','against','all','almost',
|
5
|
+
'alone','along','already','also','although','always','among','an','and',
|
6
|
+
'another','any','anybody','anyone','anything','anywhere','are','area',
|
7
|
+
'areas','around','as','ask','asked','asking','asks','at','away','back',
|
8
|
+
'backed','backing','backs','be','became','because','become','becomes',
|
9
|
+
'been','before','began','behind','being','beings','best','better','between',
|
10
|
+
'big','both','but','by','came','can','cannot','case','cases','certain',
|
11
|
+
'certainly','clear','clearly','come','could','did','differ','different',
|
12
|
+
'differently','do','does','done','down','down','downed','downing','downs',
|
13
|
+
'during','e','each','early','either','end','ended','ending','ends','enough',
|
14
|
+
'even','evenly','ever','every','everybody','everyone','everything',
|
15
|
+
'everywhere','face','faces','fact','facts','far','felt','few','find',
|
16
|
+
'finds','first','for','four','from','full','fully','further','furthered',
|
17
|
+
'furthering','furthers','gave','general','generally','get','gets','give',
|
18
|
+
'given','gives','go','going','good','goods','got','great','greater',
|
19
|
+
'greatest','group','grouped','grouping','groups','had','has','have',
|
20
|
+
'having','he','her','here','herself','high','high','high','higher',
|
21
|
+
'highest','him','himself','his','how','however','i','if','important','in',
|
22
|
+
'interest','interested','interesting','interests','into','is','it','its',
|
23
|
+
'itself','just','keep','keeps','kind','knew','know','known','knows','large',
|
24
|
+
'largely','last','later','latest','least','less','let','lets','like','likely',
|
25
|
+
'long','longer','longest','made','make','making','man','many','may','me',
|
26
|
+
'member','members','men','might','more','most','mostly','mr','mrs','much',
|
27
|
+
'must','my','myself','necessary','need','needed','needing','needs','never',
|
28
|
+
'new','new','newer','newest','next','no','nobody','non','noone','not',
|
29
|
+
'nothing','now','nowhere','number','numbers','of','off','often','old','older',
|
30
|
+
'oldest','on','once','one','only','open','opened','opening','opens','or',
|
31
|
+
'order','ordered','ordering','orders','other','others','our','out','over',
|
32
|
+
'part','parted','parting','parts','per','perhaps','place','places','point',
|
33
|
+
'pointed','pointing','points','possible','present','presented','presenting',
|
34
|
+
'presents','problem','problems','put','puts','quite','rather','really','right',
|
35
|
+
'right','room','rooms','said','same','saw','say','says','second','seconds',
|
36
|
+
'see','seem','seemed','seeming','seems','sees','several','shall','she','should',
|
37
|
+
'show','showed','showing','shows','side','sides','since','small','smaller',
|
38
|
+
'smallest','so','some','somebody','someone','something','somewhere','state',
|
39
|
+
'states','still','such','sure','take','taken','than','that','the','their',
|
40
|
+
'them','then','there','therefore','these','they','thing','things','think',
|
41
|
+
'thinks','this','those','though','thought','thoughts','three','through',
|
42
|
+
'thus','to','today','together','too','took','toward','turn','turned','turning',
|
43
|
+
'turns','two','u','under','until','up','upon','us','use','used','uses','very',
|
44
|
+
'want','wanted','wanting','wants','was','way','ways','we','well','wells','went',
|
45
|
+
'were','what','when','where','whether','which','while','who','whole','whose',
|
46
|
+
'why','will','with','within','without','work','worked','working','works','would',
|
47
|
+
'year','years','yet','you','young','younger','youngest','your','yours','bt','jst',
|
48
|
+
'iv','x','xx','xxx','xxxx''xxxxx','ya','yeah','yo','here','oh','ha','haha','haaaa',
|
49
|
+
'ahahaha','go','do','hola','pls','lol','p','mah','eh','aw','tryna','run','u','ur','d','r',
|
50
|
+
'nw','lool','loool','looool','hehe','hehehe','aw','aww','awww','heh','ar','ooh','ooo',
|
51
|
+
'did'
|
52
|
+
]
|
53
|
+
TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
|
54
|
+
|
55
|
+
def self.is?(token)
|
56
|
+
STOP_WORDS.member?(token)
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.valid?(token)
|
60
|
+
(((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jake69mac-stopwords
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- jake mcallister
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-06-20 00:00:00 Z
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description:
|
22
|
+
email:
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- lib/stopwords.rb
|
31
|
+
homepage:
|
32
|
+
licenses: []
|
33
|
+
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options: []
|
36
|
+
|
37
|
+
require_paths:
|
38
|
+
- lib
|
39
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
hash: 3
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.8.13
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: list of stopwords handy to remove words
|
64
|
+
test_files: []
|
65
|
+
|