words_counted 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c2d80aa2eb60a01c71a85f3b28b02231496c79ab
4
- data.tar.gz: 93cd0029317b142161f4cb3170207fb507b82a50
3
+ metadata.gz: 07c1e76ee27525e7aa28de6a61dedde8ba6eae39
4
+ data.tar.gz: e8062169aaf99c19947a246ff33385e1fca928a7
5
5
  SHA512:
6
- metadata.gz: 2fa36912b371084ddb87af00a26db538bcd8836abc34e51a76f5560d5bbb2e133a3dd4b980fe96cc7da07f251ce188b9ca183fda5a08c731e0450572bdf816f0
7
- data.tar.gz: b91c95329ad60db96adfda332a9a933de013122660c47b5da85c290ae5dc25cbb98763afae20ee19b595aa4e8e526ee5648db461199e20dbc9df275ea23dc496
6
+ metadata.gz: 680cc6c8048a809941f4e23c53c99a2ee7fed5e3d0fe7943d9842b29967c7d19f8430143d56ed510042a493e166c99c4ae70c9b82e4b21fb4aeb3dbe9280f52e
7
+ data.tar.gz: ec317b1d90f3f14ba399996c8061835393d4aac6dd55f8ebc342b95a315c710bd62d3c9981e37cff6397863b8b5e10744748b1cede99e610dbf5cdd59aafcb90
data/README.md CHANGED
@@ -50,7 +50,7 @@ counter = WordsCounted.count(
50
50
  )
51
51
 
52
52
  # Using a file
53
- counter = WordsCounted.from_file("path/to/my/file.txt")
53
+ counter = WordsCounted.from_file("path/or/url/to/my/file.txt")
54
54
  ```
55
55
 
56
56
  ### API
@@ -172,9 +172,9 @@ counter.words
172
172
  #=> ["We", "are", "all", "in", "the", "gutter", "but", "some", "of", "us", "are", "looking", "at", "the", "stars"]
173
173
  ```
174
174
 
175
- #### `.word_density`
175
+ #### `.word_density([ precision = 2 ])`
176
176
 
177
- Returns a two-dimentional array of words and their density.
177
+ Returns a two-dimentional array of words and their density to a precision of two. It accepts a precision argument which defaults to two.
178
178
 
179
179
  ```ruby
180
180
  counter.word_density
@@ -182,17 +182,17 @@ counter.word_density
182
182
  [
183
183
  ["are", 13.33],
184
184
  ["the", 13.33],
185
- ["but", 6.67],
186
- ["us", 6.67],
187
- ["of", 6.67],
188
- ["some", 6.67],
189
- ["looking", 6.67],
190
- ["gutter", 6.67],
191
- ["at", 6.67],
192
- ["in", 6.67],
193
- ["all", 6.67],
194
- ["stars", 6.67],
195
- ["we", 6.67]
185
+ ["but", 6.67 ],
186
+ ["us", 6.67 ],
187
+ ["of", 6.67 ],
188
+ ["some", 6.67 ],
189
+ ["looking", 6.67 ],
190
+ ["gutter", 6.67 ],
191
+ ["at", 6.67 ],
192
+ ["in", 6.67 ],
193
+ ["all", 6.67 ],
194
+ ["stars", 6.67 ],
195
+ ["we", 6.67 ]
196
196
  ]
197
197
  ```
198
198
 
@@ -204,9 +204,9 @@ Returns the string's character count.
204
204
  counter.char_count #=> 76
205
205
  ```
206
206
 
207
- #### `.average_chars_per_word`
207
+ #### `.average_chars_per_word([ precision = 2 ])`
208
208
 
209
- Returns the average character count per word.
209
+ Returns the average character count per word. Accepts a precision argument which defaults to two.
210
210
 
211
211
  ```ruby
212
212
  counter.average_chars_per_word #=> 4
@@ -4,14 +4,18 @@ module WordsCounted
4
4
 
5
5
  WORD_REGEXP = /[\p{Alpha}\-']+/
6
6
 
7
+ def self.from_file(path, options = {})
8
+ File.open(path) do |file|
9
+ new file.read, options
10
+ end
11
+ end
12
+
7
13
  def initialize(string, options = {})
8
14
  @options = options
9
15
  exclude = filter_proc(options[:exclude])
10
16
  @words = string.scan(regexp).reject { |word| exclude.call(word) }
11
17
  @char_count = @words.join.size
12
- @word_occurrences = words.each_with_object(Hash.new(0)) do |word, hash|
13
- hash[word.downcase] += 1
14
- end
18
+ @word_occurrences = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase] += 1 }
15
19
  @word_lengths = words.each_with_object({}) { |word, hash| hash[word] ||= word.length }
16
20
  end
17
21
 
@@ -23,8 +27,8 @@ module WordsCounted
23
27
  words.uniq.size
24
28
  end
25
29
 
26
- def average_chars_per_word
27
- (char_count / word_count).round(2)
30
+ def average_chars_per_word(precision = 2)
31
+ (char_count.to_f / word_count.to_f).round(precision)
28
32
  end
29
33
 
30
34
  def most_occurring_words
@@ -35,28 +39,29 @@ module WordsCounted
35
39
  highest_ranking word_lengths
36
40
  end
37
41
 
38
- def word_density
39
- word_occurrences.each_with_object({}) do |(word, occ), hash|
40
- hash[word] = percent_of(occ)
41
- end.sort_by { |_, value| value }.reverse
42
+ def word_density(precision = 2)
43
+ word_densities = word_occurrences.each_with_object({}) do |(word, occ), hash|
44
+ hash[word] = (occ.to_f / word_count.to_f * 100).round(precision)
45
+ end
46
+ sort_by_descending_value word_densities
42
47
  end
43
48
 
44
49
  def sorted_word_occurrences
45
- word_occurrences.sort_by { |_, v| v }.reverse
50
+ sort_by_descending_value word_occurrences
46
51
  end
47
52
 
48
53
  def sorted_word_lengths
49
- word_lengths.sort_by { |_, v| v }.reverse
54
+ sort_by_descending_value word_lengths
50
55
  end
51
56
 
52
57
  private
53
58
 
54
59
  def highest_ranking(entries)
55
- entries.group_by { |word, value| value }.sort.last.last
60
+ entries.group_by { |_, value| value }.sort.last.last
56
61
  end
57
62
 
58
- def percent_of(n)
59
- (n.to_f / word_count.to_f * 100).round(2)
63
+ def sort_by_descending_value(entries)
64
+ entries.sort_by { |_, value| value }.reverse
60
65
  end
61
66
 
62
67
  def regexp
@@ -74,13 +79,12 @@ module WordsCounted
74
79
  ->(word) {
75
80
  exclusion_list.include?(word.downcase)
76
81
  }
77
- elsif Regexp.try_convert(filter)
78
- filter = Regexp.try_convert(filter)
79
- Proc.new { |word| word =~ filter }
82
+ elsif regexp_filter = Regexp.try_convert(filter)
83
+ Proc.new { |word| word =~ regexp_filter }
80
84
  elsif filter.respond_to?(:to_proc)
81
85
  filter.to_proc
82
86
  else
83
- raise ArgumentError, "Filter must String, Array, Proc, or Regexp"
87
+ raise ArgumentError, "Filter must String, Array, Lambda, or Regexp"
84
88
  end
85
89
  end
86
90
  end
@@ -1,3 +1,3 @@
1
1
  module WordsCounted
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.3"
3
3
  end
data/lib/words_counted.rb CHANGED
@@ -12,9 +12,6 @@ module WordsCounted
12
12
  end
13
13
 
14
14
  def self.from_file(path, options = {})
15
- file = File.open(path)
16
- data = file.read
17
- file.close
18
- count(data, options)
15
+ Counter.from_file(path, options)
19
16
  end
20
17
  end
@@ -175,32 +175,42 @@ module WordsCounted
175
175
  end
176
176
 
177
177
  it "returns words and their density in percent" do
178
- counter = Counter.new("His name was major, I mean, Major Major Major Major.")
179
- expect(counter.word_density).to eq([["major", 50.0], ["mean", 10.0], ["i", 10.0], ["was", 10.0], ["name", 10.0], ["his", 10.0]])
178
+ counter = Counter.new("His name was Major, major Major Major.")
179
+ expect(counter.word_density).to eq([["major", 57.14], ["was", 14.29], ["name", 14.29], ["his", 14.29]])
180
+ end
181
+
182
+ it "accepts a precision" do
183
+ counter = Counter.new("His name was Major, major Major Major.")
184
+ expect(counter.word_density(4)).to eq([["major", 57.1429], ["was", 14.2857], ["name", 14.2857], ["his", 14.2857]])
180
185
  end
181
186
  end
182
187
 
183
188
  describe "char_count" do
184
189
  it "returns the number of chars in the passed in string" do
185
- counter = Counter.new("His name was major, Major Major Major Major.")
186
- expect(counter.char_count).to eq(35)
190
+ counter = Counter.new("His name was Major, major Major Major.")
191
+ expect(counter.char_count).to eq(30)
187
192
  end
188
193
 
189
194
  it "returns the number of chars in the passed in string after the filter is applied" do
190
- counter = Counter.new("His name was major, Major Major Major Major.", exclude: "Major")
195
+ counter = Counter.new("His name was Major, major Major Major.", exclude: "Major")
191
196
  expect(counter.char_count).to eq(10)
192
197
  end
193
198
  end
194
199
 
195
200
  describe "average_chars_per_word" do
196
201
  it "returns the average number of chars per word" do
197
- counter = Counter.new("His name was major, Major Major Major Major.")
198
- expect(counter.average_chars_per_word).to eq(4)
202
+ counter = Counter.new("His name was major, Major Major Major.")
203
+ expect(counter.average_chars_per_word).to eq(4.29)
199
204
  end
200
205
 
201
206
  it "returns the average number of chars per word after the filter is applied" do
202
- counter = Counter.new("His name was major, Major Major Major Major.", exclude: "Major")
203
- expect(counter.average_chars_per_word).to eq(3)
207
+ counter = Counter.new("His name was Major, Major Major Major.", exclude: "Major")
208
+ expect(counter.average_chars_per_word).to eq(3.33)
209
+ end
210
+
211
+ it "accepts precision" do
212
+ counter = Counter.new("This line should have 39 characters minus spaces.")
213
+ expect(counter.average_chars_per_word(4)).to eq(5.5714)
204
214
  end
205
215
  end
206
216
 
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.version = WordsCounted::VERSION
10
10
  spec.authors = ["Mohamad El-Husseini"]
11
11
  spec.email = ["husseini.mel@gmail.com"]
12
- spec.description = %q{A Ruby word counter with helpful utility methods.}
12
+ spec.description = %q{A Ruby word counter and string analyser with helpful utility methods.}
13
13
  spec.summary = %q{See README.}
14
14
  spec.homepage = "https://github.com/abitdodgy/words_counted"
15
15
  spec.license = "MIT"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: words_counted
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mohamad El-Husseini
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-23 00:00:00.000000000 Z
11
+ date: 2014-10-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description: A Ruby word counter with helpful utility methods.
69
+ description: A Ruby word counter and string analyser with helpful utility methods.
70
70
  email:
71
71
  - husseini.mel@gmail.com
72
72
  executables: []