jake69mac-stopwords 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/stopwords.rb +63 -0
- metadata +65 -0
    
        data/lib/stopwords.rb
    ADDED
    
    | @@ -0,0 +1,63 @@ | |
| 1 | 
            +
            module Stopwords
         | 
| 2 | 
            +
             | 
| 3 | 
            +
              STOP_WORDS = [
         | 
| 4 | 
            +
                'a','about','above','across','after','again','against','all','almost',
         | 
| 5 | 
            +
                'alone','along','already','also','although','always','among','an','and',
         | 
| 6 | 
            +
                'another','any','anybody','anyone','anything','anywhere','are','area',
         | 
| 7 | 
            +
                'areas','around','as','ask','asked','asking','asks','at','away','back',
         | 
| 8 | 
            +
                'backed','backing','backs','be','became','because','become','becomes',
         | 
| 9 | 
            +
                'been','before','began','behind','being','beings','best','better','between',
         | 
| 10 | 
            +
                'big','both','but','by','came','can','cannot','case','cases','certain',
         | 
| 11 | 
            +
                'certainly','clear','clearly','come','could','did','differ','different',
         | 
| 12 | 
            +
                'differently','do','does','done','down','down','downed','downing','downs',
         | 
| 13 | 
            +
                'during','e','each','early','either','end','ended','ending','ends','enough',
         | 
| 14 | 
            +
                'even','evenly','ever','every','everybody','everyone','everything',
         | 
| 15 | 
            +
                'everywhere','face','faces','fact','facts','far','felt','few','find',
         | 
| 16 | 
            +
                'finds','first','for','four','from','full','fully','further','furthered',
         | 
| 17 | 
            +
                'furthering','furthers','gave','general','generally','get','gets','give',
         | 
| 18 | 
            +
                'given','gives','go','going','good','goods','got','great','greater',
         | 
| 19 | 
            +
                'greatest','group','grouped','grouping','groups','had','has','have',
         | 
| 20 | 
            +
                'having','he','her','here','herself','high','high','high','higher',
         | 
| 21 | 
            +
                'highest','him','himself','his','how','however','i','if','important','in',
         | 
| 22 | 
            +
                'interest','interested','interesting','interests','into','is','it','its',
         | 
| 23 | 
            +
                'itself','just','keep','keeps','kind','knew','know','known','knows','large',
         | 
| 24 | 
            +
                'largely','last','later','latest','least','less','let','lets','like','likely',
         | 
| 25 | 
            +
                'long','longer','longest','made','make','making','man','many','may','me',
         | 
| 26 | 
            +
                'member','members','men','might','more','most','mostly','mr','mrs','much',
         | 
| 27 | 
            +
                'must','my','myself','necessary','need','needed','needing','needs','never',
         | 
| 28 | 
            +
                'new','new','newer','newest','next','no','nobody','non','noone','not',
         | 
| 29 | 
            +
                'nothing','now','nowhere','number','numbers','of','off','often','old','older',
         | 
| 30 | 
            +
                'oldest','on','once','one','only','open','opened','opening','opens','or',
         | 
| 31 | 
            +
                'order','ordered','ordering','orders','other','others','our','out','over',
         | 
| 32 | 
            +
                'part','parted','parting','parts','per','perhaps','place','places','point',
         | 
| 33 | 
            +
                'pointed','pointing','points','possible','present','presented','presenting',
         | 
| 34 | 
            +
                'presents','problem','problems','put','puts','quite','rather','really','right',
         | 
| 35 | 
            +
                'right','room','rooms','said','same','saw','say','says','second','seconds',
         | 
| 36 | 
            +
                'see','seem','seemed','seeming','seems','sees','several','shall','she','should',
         | 
| 37 | 
            +
                'show','showed','showing','shows','side','sides','since','small','smaller',
         | 
| 38 | 
            +
                'smallest','so','some','somebody','someone','something','somewhere','state',
         | 
| 39 | 
            +
                'states','still','such','sure','take','taken','than','that','the','their',
         | 
| 40 | 
            +
                'them','then','there','therefore','these','they','thing','things','think',
         | 
| 41 | 
            +
                'thinks','this','those','though','thought','thoughts','three','through',
         | 
| 42 | 
            +
                'thus','to','today','together','too','took','toward','turn','turned','turning',
         | 
| 43 | 
            +
                'turns','two','u','under','until','up','upon','us','use','used','uses','very',
         | 
| 44 | 
            +
                'want','wanted','wanting','wants','was','way','ways','we','well','wells','went',
         | 
| 45 | 
            +
                'were','what','when','where','whether','which','while','who','whole','whose',
         | 
| 46 | 
            +
                'why','will','with','within','without','work','worked','working','works','would',
         | 
| 47 | 
            +
                'year','years','yet','you','young','younger','youngest','your','yours','bt','jst',
         | 
| 48 | 
            +
                'iv','x','xx','xxx','xxxx''xxxxx','ya','yeah','yo','here','oh','ha','haha','haaaa',
         | 
| 49 | 
            +
                'ahahaha','go','do','hola','pls','lol','p','mah','eh','aw','tryna','run','u','ur','d','r',
         | 
| 50 | 
            +
                'nw','lool','loool','looool','hehe','hehehe','aw','aww','awww','heh','ar','ooh','ooo',
         | 
| 51 | 
            +
                'did'
         | 
| 52 | 
            +
                ]
         | 
| 53 | 
            +
              TOKEN_REGEXP = /^[a-z]+$|^\w+\-\w+|^[a-z]+[0-9]+[a-z]+$|^[0-9]+[a-z]+|^[a-z]+[0-9]+$/ 
         | 
| 54 | 
            +
              
         | 
| 55 | 
            +
              def self.is?(token)
         | 
| 56 | 
            +
                STOP_WORDS.member?(token)
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
              
         | 
| 59 | 
            +
              def self.valid?(token)
         | 
| 60 | 
            +
                (((token =~ TOKEN_REGEXP) == 0)) and !(STOP_WORDS.member?(token)) 
         | 
| 61 | 
            +
              end
         | 
| 62 | 
            +
              
         | 
| 63 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,65 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            +
            name: jake69mac-stopwords
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              hash: 23
         | 
| 5 | 
            +
              prerelease: 
         | 
| 6 | 
            +
              segments: 
         | 
| 7 | 
            +
              - 1
         | 
| 8 | 
            +
              - 0
         | 
| 9 | 
            +
              - 0
         | 
| 10 | 
            +
              version: 1.0.0
         | 
| 11 | 
            +
            platform: ruby
         | 
| 12 | 
            +
            authors: 
         | 
| 13 | 
            +
            - jake mcallister
         | 
| 14 | 
            +
            autorequire: 
         | 
| 15 | 
            +
            bindir: bin
         | 
| 16 | 
            +
            cert_chain: []
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            date: 2012-06-20 00:00:00 Z
         | 
| 19 | 
            +
            dependencies: []
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            description: 
         | 
| 22 | 
            +
            email: 
         | 
| 23 | 
            +
            executables: []
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            extensions: []
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            extra_rdoc_files: []
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            files: 
         | 
| 30 | 
            +
            - lib/stopwords.rb
         | 
| 31 | 
            +
            homepage: 
         | 
| 32 | 
            +
            licenses: []
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            post_install_message: 
         | 
| 35 | 
            +
            rdoc_options: []
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            require_paths: 
         | 
| 38 | 
            +
            - lib
         | 
| 39 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 40 | 
            +
              none: false
         | 
| 41 | 
            +
              requirements: 
         | 
| 42 | 
            +
              - - ">="
         | 
| 43 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 44 | 
            +
                  hash: 3
         | 
| 45 | 
            +
                  segments: 
         | 
| 46 | 
            +
                  - 0
         | 
| 47 | 
            +
                  version: "0"
         | 
| 48 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 49 | 
            +
              none: false
         | 
| 50 | 
            +
              requirements: 
         | 
| 51 | 
            +
              - - ">="
         | 
| 52 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 53 | 
            +
                  hash: 3
         | 
| 54 | 
            +
                  segments: 
         | 
| 55 | 
            +
                  - 0
         | 
| 56 | 
            +
                  version: "0"
         | 
| 57 | 
            +
            requirements: []
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            rubyforge_project: 
         | 
| 60 | 
            +
            rubygems_version: 1.8.13
         | 
| 61 | 
            +
            signing_key: 
         | 
| 62 | 
            +
            specification_version: 3
         | 
| 63 | 
            +
            summary: list of stopwords handy to remove words
         | 
| 64 | 
            +
            test_files: []
         | 
| 65 | 
            +
             |