jake69mac-stopwords 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/stopwords.rb +63 -0
  2. metadata +65 -0
data/lib/stopwords.rb ADDED
@@ -0,0 +1,63 @@
1
+ module Stopwords
2
+
3
+ STOP_WORDS = [
4
+ 'a','about','above','across','after','again','against','all','almost',
5
+ 'alone','along','already','also','although','always','among','an','and',
6
+ 'another','any','anybody','anyone','anything','anywhere','are','area',
7
+ 'areas','around','as','ask','asked','asking','asks','at','away','back',
8
+ 'backed','backing','backs','be','became','because','become','becomes',
9
+ 'been','before','began','behind','being','beings','best','better','between',
10
+ 'big','both','but','by','came','can','cannot','case','cases','certain',
11
+ 'certainly','clear','clearly','come','could','did','differ','different',
12
+ 'differently','do','does','done','down','down','downed','downing','downs',
13
+ 'during','e','each','early','either','end','ended','ending','ends','enough',
14
+ 'even','evenly','ever','every','everybody','everyone','everything',
15
+ 'everywhere','face','faces','fact','facts','far','felt','few','find',
16
+ 'finds','first','for','four','from','full','fully','further','furthered',
17
+ 'furthering','furthers','gave','general','generally','get','gets','give',
18
+ 'given','gives','go','going','good','goods','got','great','greater',
19
+ 'greatest','group','grouped','grouping','groups','had','has','have',
20
+ 'having','he','her','here','herself','high','high','high','higher',
21
+ 'highest','him','himself','his','how','however','i','if','important','in',
22
+ 'interest','interested','interesting','interests','into','is','it','its',
23
+ 'itself','just','keep','keeps','kind','knew','know','known','knows','large',
24
+ 'largely','last','later','latest','least','less','let','lets','like','likely',
25
+ 'long','longer','longest','made','make','making','man','many','may','me',
26
+ 'member','members','men','might','more','most','mostly','mr','mrs','much',
27
+ 'must','my','myself','necessary','need','needed','needing','needs','never',
28
+ 'new','new','newer','newest','next','no','nobody','non','noone','not',
29
+ 'nothing','now','nowhere','number','numbers','of','off','often','old','older',
30
+ 'oldest','on','once','one','only','open','opened','opening','opens','or',
31
+ 'order','ordered','ordering','orders','other','others','our','out','over',
32
+ 'part','parted','parting','parts','per','perhaps','place','places','point',
33
+ 'pointed','pointing','points','possible','present','presented','presenting',
34
+ 'presents','problem','problems','put','puts','quite','rather','really','right',
35
+ 'right','room','rooms','said','same','saw','say','says','second','seconds',
36
+ 'see','seem','seemed','seeming','seems','sees','several','shall','she','should',
37
+ 'show','showed','showing','shows','side','sides','since','small','smaller',
38
+ 'smallest','so','some','somebody','someone','something','somewhere','state',
39
+ 'states','still','such','sure','take','taken','than','that','the','their',
40
+ 'them','then','there','therefore','these','they','thing','things','think',
41
+ 'thinks','this','those','though','thought','thoughts','three','through',
42
+ 'thus','to','today','together','too','took','toward','turn','turned','turning',
43
+ 'turns','two','u','under','until','up','upon','us','use','used','uses','very',
44
+ 'want','wanted','wanting','wants','was','way','ways','we','well','wells','went',
45
+ 'were','what','when','where','whether','which','while','who','whole','whose',
46
+ 'why','will','with','within','without','work','worked','working','works','would',
47
+ 'year','years','yet','you','young','younger','youngest','your','yours','bt','jst',
48
+ 'iv','x','xx','xxx','xxxx''xxxxx','ya','yeah','yo','here','oh','ha','haha','haaaa',
49
+ 'ahahaha','go','do','hola','pls','lol','p','mah','eh','aw','tryna','run','u','ur','d','r',
50
+ 'nw','lool','loool','looool','hehe','hehehe','aw','aww','awww','heh','ar','ooh','ooo',
51
+ 'did'
52
+ ]
53
+ TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/
54
+
55
+ def self.is?(token)
56
+ STOP_WORDS.member?(token)
57
+ end
58
+
59
+ def self.valid?(token)
60
+ (((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token))
61
+ end
62
+
63
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jake69mac-stopwords
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
+ platform: ruby
12
+ authors:
13
+ - jake mcallister
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-06-20 00:00:00 Z
19
+ dependencies: []
20
+
21
+ description:
22
+ email:
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files: []
28
+
29
+ files:
30
+ - lib/stopwords.rb
31
+ homepage:
32
+ licenses: []
33
+
34
+ post_install_message:
35
+ rdoc_options: []
36
+
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ hash: 3
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.13
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: list of stopwords handy to remove words
64
+ test_files: []
65
+