words_counted 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/README.md +16 -16
 - data/lib/words_counted/counter.rb +22 -18
 - data/lib/words_counted/version.rb +1 -1
 - data/lib/words_counted.rb +1 -4
 - data/spec/words_counted/counter_spec.rb +19 -9
 - data/words_counted.gemspec +1 -1
 - metadata +3 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 07c1e76ee27525e7aa28de6a61dedde8ba6eae39
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: e8062169aaf99c19947a246ff33385e1fca928a7
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 680cc6c8048a809941f4e23c53c99a2ee7fed5e3d0fe7943d9842b29967c7d19f8430143d56ed510042a493e166c99c4ae70c9b82e4b21fb4aeb3dbe9280f52e
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: ec317b1d90f3f14ba399996c8061835393d4aac6dd55f8ebc342b95a315c710bd62d3c9981e37cff6397863b8b5e10744748b1cede99e610dbf5cdd59aafcb90
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -50,7 +50,7 @@ counter = WordsCounted.count( 
     | 
|
| 
       50 
50 
     | 
    
         
             
            )
         
     | 
| 
       51 
51 
     | 
    
         | 
| 
       52 
52 
     | 
    
         
             
            # Using a file
         
     | 
| 
       53 
     | 
    
         
            -
            counter = WordsCounted.from_file("path/to/my/file.txt")
         
     | 
| 
      
 53 
     | 
    
         
            +
            counter = WordsCounted.from_file("path/or/url/to/my/file.txt")
         
     | 
| 
       54 
54 
     | 
    
         
             
            ```
         
     | 
| 
       55 
55 
     | 
    
         | 
| 
       56 
56 
     | 
    
         
             
            ### API
         
     | 
| 
         @@ -172,9 +172,9 @@ counter.words 
     | 
|
| 
       172 
172 
     | 
    
         
             
            #=> ["We", "are", "all", "in", "the", "gutter", "but", "some", "of", "us", "are", "looking", "at", "the", "stars"]
         
     | 
| 
       173 
173 
     | 
    
         
             
            ```
         
     | 
| 
       174 
174 
     | 
    
         | 
| 
       175 
     | 
    
         
            -
            #### `.word_density`
         
     | 
| 
      
 175 
     | 
    
         
            +
            #### `.word_density([ precision = 2 ])`
         
     | 
| 
       176 
176 
     | 
    
         | 
| 
       177 
     | 
    
         
            -
            Returns a two-dimentional array of words and their density.
         
     | 
| 
      
 177 
     | 
    
         
            +
            Returns a two-dimentional array of words and their density to a precision of two. It accepts a precision argument which defaults to two.
         
     | 
| 
       178 
178 
     | 
    
         | 
| 
       179 
179 
     | 
    
         
             
            ```ruby
         
     | 
| 
       180 
180 
     | 
    
         
             
            counter.word_density
         
     | 
| 
         @@ -182,17 +182,17 @@ counter.word_density 
     | 
|
| 
       182 
182 
     | 
    
         
             
            [
         
     | 
| 
       183 
183 
     | 
    
         
             
              ["are",     13.33],
         
     | 
| 
       184 
184 
     | 
    
         
             
              ["the",     13.33],
         
     | 
| 
       185 
     | 
    
         
            -
              ["but",     6.67],
         
     | 
| 
       186 
     | 
    
         
            -
              ["us",      6.67],
         
     | 
| 
       187 
     | 
    
         
            -
              ["of",      6.67],
         
     | 
| 
       188 
     | 
    
         
            -
              ["some",    6.67],
         
     | 
| 
       189 
     | 
    
         
            -
              ["looking", 6.67],
         
     | 
| 
       190 
     | 
    
         
            -
              ["gutter",  6.67],
         
     | 
| 
       191 
     | 
    
         
            -
              ["at",      6.67],
         
     | 
| 
       192 
     | 
    
         
            -
              ["in",      6.67],
         
     | 
| 
       193 
     | 
    
         
            -
              ["all",     6.67],
         
     | 
| 
       194 
     | 
    
         
            -
              ["stars",   6.67],
         
     | 
| 
       195 
     | 
    
         
            -
              ["we",      6.67]
         
     | 
| 
      
 185 
     | 
    
         
            +
              ["but",     6.67 ],
         
     | 
| 
      
 186 
     | 
    
         
            +
              ["us",      6.67 ],
         
     | 
| 
      
 187 
     | 
    
         
            +
              ["of",      6.67 ],
         
     | 
| 
      
 188 
     | 
    
         
            +
              ["some",    6.67 ],
         
     | 
| 
      
 189 
     | 
    
         
            +
              ["looking", 6.67 ],
         
     | 
| 
      
 190 
     | 
    
         
            +
              ["gutter",  6.67 ],
         
     | 
| 
      
 191 
     | 
    
         
            +
              ["at",      6.67 ],
         
     | 
| 
      
 192 
     | 
    
         
            +
              ["in",      6.67 ],
         
     | 
| 
      
 193 
     | 
    
         
            +
              ["all",     6.67 ],
         
     | 
| 
      
 194 
     | 
    
         
            +
              ["stars",   6.67 ],
         
     | 
| 
      
 195 
     | 
    
         
            +
              ["we",      6.67 ]
         
     | 
| 
       196 
196 
     | 
    
         
             
            ]
         
     | 
| 
       197 
197 
     | 
    
         
             
            ```
         
     | 
| 
       198 
198 
     | 
    
         | 
| 
         @@ -204,9 +204,9 @@ Returns the string's character count. 
     | 
|
| 
       204 
204 
     | 
    
         
             
            counter.char_count              #=> 76
         
     | 
| 
       205 
205 
     | 
    
         
             
            ```
         
     | 
| 
       206 
206 
     | 
    
         | 
| 
       207 
     | 
    
         
            -
            #### `.average_chars_per_word`
         
     | 
| 
      
 207 
     | 
    
         
            +
            #### `.average_chars_per_word([ precision = 2 ])`
         
     | 
| 
       208 
208 
     | 
    
         | 
| 
       209 
     | 
    
         
            -
            Returns the average character count per word.
         
     | 
| 
      
 209 
     | 
    
         
            +
            Returns the average character count per word. Accepts a precision argument which defaults to two.
         
     | 
| 
       210 
210 
     | 
    
         | 
| 
       211 
211 
     | 
    
         
             
            ```ruby
         
     | 
| 
       212 
212 
     | 
    
         
             
            counter.average_chars_per_word  #=> 4
         
     | 
| 
         @@ -4,14 +4,18 @@ module WordsCounted 
     | 
|
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
                WORD_REGEXP = /[\p{Alpha}\-']+/
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
      
 7 
     | 
    
         
            +
                def self.from_file(path, options = {})
         
     | 
| 
      
 8 
     | 
    
         
            +
                  File.open(path) do |file|
         
     | 
| 
      
 9 
     | 
    
         
            +
                    new file.read, options
         
     | 
| 
      
 10 
     | 
    
         
            +
                  end
         
     | 
| 
      
 11 
     | 
    
         
            +
                end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
       7 
13 
     | 
    
         
             
                def initialize(string, options = {})
         
     | 
| 
       8 
14 
     | 
    
         
             
                  @options = options
         
     | 
| 
       9 
15 
     | 
    
         
             
                  exclude = filter_proc(options[:exclude])
         
     | 
| 
       10 
16 
     | 
    
         
             
                  @words = string.scan(regexp).reject { |word| exclude.call(word) }
         
     | 
| 
       11 
17 
     | 
    
         
             
                  @char_count = @words.join.size
         
     | 
| 
       12 
     | 
    
         
            -
                  @word_occurrences = words.each_with_object(Hash.new(0))  
     | 
| 
       13 
     | 
    
         
            -
                    hash[word.downcase] += 1
         
     | 
| 
       14 
     | 
    
         
            -
                  end
         
     | 
| 
      
 18 
     | 
    
         
            +
                  @word_occurrences = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase] += 1 }
         
     | 
| 
       15 
19 
     | 
    
         
             
                  @word_lengths = words.each_with_object({}) { |word, hash| hash[word] ||= word.length }
         
     | 
| 
       16 
20 
     | 
    
         
             
                end
         
     | 
| 
       17 
21 
     | 
    
         | 
| 
         @@ -23,8 +27,8 @@ module WordsCounted 
     | 
|
| 
       23 
27 
     | 
    
         
             
                  words.uniq.size
         
     | 
| 
       24 
28 
     | 
    
         
             
                end
         
     | 
| 
       25 
29 
     | 
    
         | 
| 
       26 
     | 
    
         
            -
                def average_chars_per_word
         
     | 
| 
       27 
     | 
    
         
            -
                  (char_count / word_count).round( 
     | 
| 
      
 30 
     | 
    
         
            +
                def average_chars_per_word(precision = 2)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  (char_count.to_f / word_count.to_f).round(precision)
         
     | 
| 
       28 
32 
     | 
    
         
             
                end
         
     | 
| 
       29 
33 
     | 
    
         | 
| 
       30 
34 
     | 
    
         
             
                def most_occurring_words
         
     | 
| 
         @@ -35,28 +39,29 @@ module WordsCounted 
     | 
|
| 
       35 
39 
     | 
    
         
             
                  highest_ranking word_lengths
         
     | 
| 
       36 
40 
     | 
    
         
             
                end
         
     | 
| 
       37 
41 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
                def word_density
         
     | 
| 
       39 
     | 
    
         
            -
                  word_occurrences.each_with_object({}) do |(word, occ), hash|
         
     | 
| 
       40 
     | 
    
         
            -
                    hash[word] =  
     | 
| 
       41 
     | 
    
         
            -
                  end 
     | 
| 
      
 42 
     | 
    
         
            +
                def word_density(precision = 2)
         
     | 
| 
      
 43 
     | 
    
         
            +
                  word_densities = word_occurrences.each_with_object({}) do |(word, occ), hash|
         
     | 
| 
      
 44 
     | 
    
         
            +
                    hash[word] = (occ.to_f / word_count.to_f * 100).round(precision)
         
     | 
| 
      
 45 
     | 
    
         
            +
                  end
         
     | 
| 
      
 46 
     | 
    
         
            +
                  sort_by_descending_value word_densities
         
     | 
| 
       42 
47 
     | 
    
         
             
                end
         
     | 
| 
       43 
48 
     | 
    
         | 
| 
       44 
49 
     | 
    
         
             
                def sorted_word_occurrences
         
     | 
| 
       45 
     | 
    
         
            -
                  word_occurrences 
     | 
| 
      
 50 
     | 
    
         
            +
                  sort_by_descending_value word_occurrences
         
     | 
| 
       46 
51 
     | 
    
         
             
                end
         
     | 
| 
       47 
52 
     | 
    
         | 
| 
       48 
53 
     | 
    
         
             
                def sorted_word_lengths
         
     | 
| 
       49 
     | 
    
         
            -
                  word_lengths 
     | 
| 
      
 54 
     | 
    
         
            +
                  sort_by_descending_value word_lengths
         
     | 
| 
       50 
55 
     | 
    
         
             
                end
         
     | 
| 
       51 
56 
     | 
    
         | 
| 
       52 
57 
     | 
    
         
             
              private
         
     | 
| 
       53 
58 
     | 
    
         | 
| 
       54 
59 
     | 
    
         
             
                def highest_ranking(entries)
         
     | 
| 
       55 
     | 
    
         
            -
                  entries.group_by { | 
     | 
| 
      
 60 
     | 
    
         
            +
                  entries.group_by { |_, value| value }.sort.last.last
         
     | 
| 
       56 
61 
     | 
    
         
             
                end
         
     | 
| 
       57 
62 
     | 
    
         | 
| 
       58 
     | 
    
         
            -
                def  
     | 
| 
       59 
     | 
    
         
            -
                   
     | 
| 
      
 63 
     | 
    
         
            +
                def sort_by_descending_value(entries)
         
     | 
| 
      
 64 
     | 
    
         
            +
                  entries.sort_by { |_, value| value }.reverse
         
     | 
| 
       60 
65 
     | 
    
         
             
                end
         
     | 
| 
       61 
66 
     | 
    
         | 
| 
       62 
67 
     | 
    
         
             
                def regexp
         
     | 
| 
         @@ -74,13 +79,12 @@ module WordsCounted 
     | 
|
| 
       74 
79 
     | 
    
         
             
                    ->(word) {
         
     | 
| 
       75 
80 
     | 
    
         
             
                      exclusion_list.include?(word.downcase)
         
     | 
| 
       76 
81 
     | 
    
         
             
                    }
         
     | 
| 
       77 
     | 
    
         
            -
                  elsif Regexp.try_convert(filter)
         
     | 
| 
       78 
     | 
    
         
            -
                     
     | 
| 
       79 
     | 
    
         
            -
                    Proc.new { |word| word =~ filter }
         
     | 
| 
      
 82 
     | 
    
         
            +
                  elsif regexp_filter = Regexp.try_convert(filter)
         
     | 
| 
      
 83 
     | 
    
         
            +
                    Proc.new { |word| word =~ regexp_filter }
         
     | 
| 
       80 
84 
     | 
    
         
             
                  elsif filter.respond_to?(:to_proc)
         
     | 
| 
       81 
85 
     | 
    
         
             
                    filter.to_proc
         
     | 
| 
       82 
86 
     | 
    
         
             
                  else
         
     | 
| 
       83 
     | 
    
         
            -
                    raise ArgumentError, "Filter must String, Array,  
     | 
| 
      
 87 
     | 
    
         
            +
                    raise ArgumentError, "Filter must String, Array, Lambda, or Regexp"
         
     | 
| 
       84 
88 
     | 
    
         
             
                  end
         
     | 
| 
       85 
89 
     | 
    
         
             
                end
         
     | 
| 
       86 
90 
     | 
    
         
             
              end
         
     | 
    
        data/lib/words_counted.rb
    CHANGED
    
    
| 
         @@ -175,32 +175,42 @@ module WordsCounted 
     | 
|
| 
       175 
175 
     | 
    
         
             
                  end
         
     | 
| 
       176 
176 
     | 
    
         | 
| 
       177 
177 
     | 
    
         
             
                  it "returns words and their density in percent" do
         
     | 
| 
       178 
     | 
    
         
            -
                    counter = Counter.new("His name was  
     | 
| 
       179 
     | 
    
         
            -
                    expect(counter.word_density).to eq([["major",  
     | 
| 
      
 178 
     | 
    
         
            +
                    counter = Counter.new("His name was Major, major Major Major.")
         
     | 
| 
      
 179 
     | 
    
         
            +
                    expect(counter.word_density).to eq([["major", 57.14], ["was", 14.29], ["name", 14.29], ["his", 14.29]])
         
     | 
| 
      
 180 
     | 
    
         
            +
                  end
         
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
      
 182 
     | 
    
         
            +
                  it "accepts a precision" do
         
     | 
| 
      
 183 
     | 
    
         
            +
                    counter = Counter.new("His name was Major, major Major Major.")
         
     | 
| 
      
 184 
     | 
    
         
            +
                    expect(counter.word_density(4)).to eq([["major", 57.1429], ["was", 14.2857], ["name", 14.2857], ["his", 14.2857]])
         
     | 
| 
       180 
185 
     | 
    
         
             
                  end
         
     | 
| 
       181 
186 
     | 
    
         
             
                end
         
     | 
| 
       182 
187 
     | 
    
         | 
| 
       183 
188 
     | 
    
         
             
                describe "char_count" do
         
     | 
| 
       184 
189 
     | 
    
         
             
                  it "returns the number of chars in the passed in string" do
         
     | 
| 
       185 
     | 
    
         
            -
                    counter = Counter.new("His name was  
     | 
| 
       186 
     | 
    
         
            -
                    expect(counter.char_count).to eq( 
     | 
| 
      
 190 
     | 
    
         
            +
                    counter = Counter.new("His name was Major, major Major Major.")
         
     | 
| 
      
 191 
     | 
    
         
            +
                    expect(counter.char_count).to eq(30)
         
     | 
| 
       187 
192 
     | 
    
         
             
                  end
         
     | 
| 
       188 
193 
     | 
    
         | 
| 
       189 
194 
     | 
    
         
             
                  it "returns the number of chars in the passed in string after the filter is applied" do
         
     | 
| 
       190 
     | 
    
         
            -
                    counter = Counter.new("His name was  
     | 
| 
      
 195 
     | 
    
         
            +
                    counter = Counter.new("His name was Major, major Major Major.", exclude: "Major")
         
     | 
| 
       191 
196 
     | 
    
         
             
                    expect(counter.char_count).to eq(10)
         
     | 
| 
       192 
197 
     | 
    
         
             
                  end
         
     | 
| 
       193 
198 
     | 
    
         
             
                end
         
     | 
| 
       194 
199 
     | 
    
         | 
| 
       195 
200 
     | 
    
         
             
                describe "average_chars_per_word" do
         
     | 
| 
       196 
201 
     | 
    
         
             
                  it "returns the average number of chars per word" do
         
     | 
| 
       197 
     | 
    
         
            -
                    counter = Counter.new("His name was major, Major Major Major 
     | 
| 
       198 
     | 
    
         
            -
                    expect(counter.average_chars_per_word).to eq(4)
         
     | 
| 
      
 202 
     | 
    
         
            +
                    counter = Counter.new("His name was major, Major Major Major.")
         
     | 
| 
      
 203 
     | 
    
         
            +
                    expect(counter.average_chars_per_word).to eq(4.29)
         
     | 
| 
       199 
204 
     | 
    
         
             
                  end
         
     | 
| 
       200 
205 
     | 
    
         | 
| 
       201 
206 
     | 
    
         
             
                  it "returns the average number of chars per word after the filter is applied" do
         
     | 
| 
       202 
     | 
    
         
            -
                    counter = Counter.new("His name was  
     | 
| 
       203 
     | 
    
         
            -
                    expect(counter.average_chars_per_word).to eq(3)
         
     | 
| 
      
 207 
     | 
    
         
            +
                    counter = Counter.new("His name was Major, Major Major Major.", exclude: "Major")
         
     | 
| 
      
 208 
     | 
    
         
            +
                    expect(counter.average_chars_per_word).to eq(3.33)
         
     | 
| 
      
 209 
     | 
    
         
            +
                  end
         
     | 
| 
      
 210 
     | 
    
         
            +
             
     | 
| 
      
 211 
     | 
    
         
            +
                  it "accepts precision" do
         
     | 
| 
      
 212 
     | 
    
         
            +
                    counter = Counter.new("This line should have 39 characters minus spaces.")
         
     | 
| 
      
 213 
     | 
    
         
            +
                    expect(counter.average_chars_per_word(4)).to eq(5.5714)
         
     | 
| 
       204 
214 
     | 
    
         
             
                  end
         
     | 
| 
       205 
215 
     | 
    
         
             
                end
         
     | 
| 
       206 
216 
     | 
    
         | 
    
        data/words_counted.gemspec
    CHANGED
    
    | 
         @@ -9,7 +9,7 @@ Gem::Specification.new do |spec| 
     | 
|
| 
       9 
9 
     | 
    
         
             
              spec.version       = WordsCounted::VERSION
         
     | 
| 
       10 
10 
     | 
    
         
             
              spec.authors       = ["Mohamad El-Husseini"]
         
     | 
| 
       11 
11 
     | 
    
         
             
              spec.email         = ["husseini.mel@gmail.com"]
         
     | 
| 
       12 
     | 
    
         
            -
              spec.description   = %q{A Ruby word counter with helpful utility methods.}
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.description   = %q{A Ruby word counter and string analyser with helpful utility methods.}
         
     | 
| 
       13 
13 
     | 
    
         
             
              spec.summary       = %q{See README.}
         
     | 
| 
       14 
14 
     | 
    
         
             
              spec.homepage      = "https://github.com/abitdodgy/words_counted"
         
     | 
| 
       15 
15 
     | 
    
         
             
              spec.license       = "MIT"
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: words_counted
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.1. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.3
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Mohamad El-Husseini
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2014-10- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2014-10-24 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: bundler
         
     | 
| 
         @@ -66,7 +66,7 @@ dependencies: 
     | 
|
| 
       66 
66 
     | 
    
         
             
                - - '>='
         
     | 
| 
       67 
67 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       68 
68 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       69 
     | 
    
         
            -
            description: A Ruby word counter with helpful utility methods.
         
     | 
| 
      
 69 
     | 
    
         
            +
            description: A Ruby word counter and string analyser with helpful utility methods.
         
     | 
| 
       70 
70 
     | 
    
         
             
            email:
         
     | 
| 
       71 
71 
     | 
    
         
             
            - husseini.mel@gmail.com
         
     | 
| 
       72 
72 
     | 
    
         
             
            executables: []
         
     |