words_counted 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +54 -44
- data/lib/words_counted/counter.rb +5 -5
- data/lib/words_counted/version.rb +1 -1
- data/lib/words_counted.rb +10 -0
- data/spec/support/the_hart_and_the_hunter.txt +16 -0
- data/spec/words_counted/counter_spec.rb +19 -12
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e3073a0ef54dff27a5d4314dc6d59625467538c
|
4
|
+
data.tar.gz: aaf703178dd69595bfd9c7023329dc68e5de4daa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec4445ddab09b7deaad64fefb41e7e56539ebd64910e5f0cac6529ff39f7674714720d6b942176f6615d4c5ba176002c3c6291f65d4dbd25029345baa7580eab
|
7
|
+
data.tar.gz: 6300d20538fd36f00921b2e9f1f2fc665bfb14e6fa092f1ba6a623c15c2c1b60b30fd4ebcf9301f0ebf6ae7502b3f05c554512e17b47b855dec59f00317a6305
|
data/README.md
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
# Words Counted
|
2
2
|
|
3
|
-
Words Counted is a highly customisable Ruby string analyser. It includes
|
3
|
+
Words Counted is a highly customisable Ruby string analyser. It includes many handy utility methods that go beyond word counting. You can use this gem to get word density, words and the number of times they occur, the highest occurring words, and few more things.
|
4
4
|
|
5
|
-
I use
|
5
|
+
I use *word* loosely here, since you can pass the program any string you want: words, numbers, characters, etc...
|
6
6
|
|
7
|
-
|
7
|
+
Pass in your own regular expression to customise the criteria for splitting strings. This makes Words Counted very flexible, whether you want to count words, numbers, or special characters.
|
8
8
|
|
9
9
|
### Features
|
10
10
|
|
11
|
-
* Get the following data from any string:
|
11
|
+
* Get the following data from any string or readable file:
|
12
12
|
* Word count
|
13
13
|
* Unique word count
|
14
14
|
* Word density
|
@@ -19,9 +19,10 @@ You can pass in your custom criteria for splitting strings in the form of a cust
|
|
19
19
|
* The longest word(s) and its length
|
20
20
|
* The most occurring word(s) and its number of occurrences.
|
21
21
|
* A flexible way to exclude words (or anything) from the count. You can pass in a **string**, a **regexp**, an **array**, or a **lambda**.
|
22
|
-
*
|
23
|
-
*
|
24
|
-
*
|
22
|
+
* Customisable criteria. Pass in your own regexp rules to split strings if you prefer. The default regexp has two features:
|
23
|
+
* Filters special characters but respects hyphens and apostrophes.
|
24
|
+
* Plays nicely with diacritics (UTF and unicode characters): "São Paulo" is treated as `["São", "Paulo"]` and not `["S", "", "o", "Paulo"]`.
|
25
|
+
* Pass in a file path instead of a string. WordsCounted opens and reads files.
|
25
26
|
|
26
27
|
See usage instructions for details on each feature.
|
27
28
|
|
@@ -41,12 +42,13 @@ Or install it yourself as:
|
|
41
42
|
|
42
43
|
## Usage
|
43
44
|
|
44
|
-
|
45
|
+
Pass in a string or a file path, and an optional filter and/or regexp.
|
45
46
|
|
46
47
|
```ruby
|
47
|
-
counter = WordsCounted
|
48
|
-
|
49
|
-
|
48
|
+
counter = WordsCounted.count("We are all in the gutter, but some of us are looking at the stars.")
|
49
|
+
|
50
|
+
# Using a file
|
51
|
+
counter = WordsCounted.from_file("path/to/my/file.txt")
|
50
52
|
```
|
51
53
|
|
52
54
|
### API
|
@@ -67,19 +69,19 @@ Returns a hash map of words and their number of occurrences. Uppercase and lower
|
|
67
69
|
counter.word_occurrences
|
68
70
|
|
69
71
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
72
|
+
"we" => 1,
|
73
|
+
"are" => 2,
|
74
|
+
"all" => 1,
|
75
|
+
"in" => 1,
|
76
|
+
"the" => 2,
|
77
|
+
"gutter" => 1,
|
78
|
+
"but" => 1,
|
79
|
+
"some" => 1,
|
80
|
+
"of" => 1,
|
81
|
+
"us" => 1,
|
82
|
+
"looking" => 1,
|
83
|
+
"at" => 1,
|
84
|
+
"stars" => 1
|
83
85
|
}
|
84
86
|
```
|
85
87
|
|
@@ -144,19 +146,19 @@ Returns a two-dimentional array of words and their density.
|
|
144
146
|
counter.word_density
|
145
147
|
|
146
148
|
[
|
147
|
-
["are",
|
148
|
-
["the",
|
149
|
-
["but",
|
150
|
-
["us",
|
151
|
-
["of",
|
152
|
-
["some",
|
149
|
+
["are", 13.33],
|
150
|
+
["the", 13.33],
|
151
|
+
["but", 6.67],
|
152
|
+
["us", 6.67],
|
153
|
+
["of", 6.67],
|
154
|
+
["some", 6.67],
|
153
155
|
["looking", 6.67],
|
154
|
-
["gutter",
|
155
|
-
["at",
|
156
|
-
["in",
|
157
|
-
["all",
|
158
|
-
["stars",
|
159
|
-
["we",
|
156
|
+
["gutter", 6.67],
|
157
|
+
["at", 6.67],
|
158
|
+
["in", 6.67],
|
159
|
+
["all", 6.67],
|
160
|
+
["stars", 6.67],
|
161
|
+
["we", 6.67]
|
160
162
|
]
|
161
163
|
```
|
162
164
|
|
@@ -198,7 +200,7 @@ You can exclude anything you want from the string you want to analyse by passing
|
|
198
200
|
|
199
201
|
#### Using a string
|
200
202
|
```ruby
|
201
|
-
WordsCounted
|
203
|
+
WordsCounted.count(
|
202
204
|
"Magnificent! That was magnificent, Trevor.", exclude: "was magnificent"
|
203
205
|
)
|
204
206
|
counter.words
|
@@ -207,21 +209,21 @@ counter.words
|
|
207
209
|
|
208
210
|
#### Using an array
|
209
211
|
```ruby
|
210
|
-
WordsCounted
|
212
|
+
WordsCounted.count("1 2 3 4 5 6", regexp: /[0-9]/, exclude: ['1', '2', '3'])
|
211
213
|
counter.words
|
212
214
|
#=> ["4", "5", "6"]
|
213
215
|
```
|
214
216
|
|
215
217
|
#### Using a regular expression
|
216
218
|
```ruby
|
217
|
-
WordsCounted
|
219
|
+
WordsCounted.count("Hello Beirut", exclude: /Beirut/)
|
218
220
|
counter.words
|
219
221
|
#=> ["Hello"]
|
220
222
|
```
|
221
223
|
|
222
224
|
#### Using a lambda
|
223
225
|
```ruby
|
224
|
-
WordsCounted
|
226
|
+
WordsCounted.count(
|
225
227
|
"1 2 3 4 5 6", regexp: /[0-9]/, exclude: ->(w) { w.to_i.even? }
|
226
228
|
)
|
227
229
|
counter.words
|
@@ -241,17 +243,25 @@ But maybe you don't want to count words? Well, count anything you want. What you
|
|
241
243
|
For example, if you wanted to include numbers in your analysis, you can override the regular expression:
|
242
244
|
|
243
245
|
```ruby
|
244
|
-
counter = WordsCounted
|
246
|
+
counter = WordsCounted.count("Numbers 1, 2, and 3", regexp: /[\p{Alnum}\-']+/)
|
245
247
|
counter.words
|
246
248
|
#=> ["Numbers", "1", "2", "and", "3"]
|
247
249
|
```
|
248
250
|
|
251
|
+
## Opening and Reading Files
|
252
|
+
|
253
|
+
Use the `from_file` method to open files. Opening files accepts the same options as reading from a string. The file path can be a URL.
|
254
|
+
|
255
|
+
```ruby
|
256
|
+
counter = WordsCounted.from_file("url/or/path/to/file.text")
|
257
|
+
```
|
258
|
+
|
249
259
|
## Gotchas
|
250
260
|
|
251
261
|
A hyphen used in leu of an *em* or *en* dash will form part of the word and throw off the `word_occurences` algorithm.
|
252
262
|
|
253
263
|
```ruby
|
254
|
-
counter = WordsCounted
|
264
|
+
counter = WordsCounted.count("How do you do?-you are well, I see.")
|
255
265
|
counter.word_occurrences
|
256
266
|
|
257
267
|
{
|
@@ -273,10 +283,10 @@ Another gotcha is that the default criteria does not include numbers in its anal
|
|
273
283
|
|
274
284
|
## Road Map
|
275
285
|
|
276
|
-
1. Add ability to open
|
286
|
+
1. Add ability to open URLs.
|
277
287
|
2. Add paragraph, sentence, average words per sentence, and average sentence chars counters.
|
278
288
|
|
279
|
-
#### Ability to open
|
289
|
+
#### Ability to open URLs
|
280
290
|
|
281
291
|
Maybe I can some class methods to open the file and init the counter class.
|
282
292
|
|
@@ -48,7 +48,7 @@ module WordsCounted
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def percent_of(n)
|
51
|
-
(n.to_f / word_count.to_f * 100
|
51
|
+
(n.to_f / word_count.to_f * 100).round(2)
|
52
52
|
end
|
53
53
|
|
54
54
|
def regexp
|
@@ -63,16 +63,16 @@ module WordsCounted
|
|
63
63
|
}
|
64
64
|
elsif filter.respond_to?(:to_str)
|
65
65
|
exclusion_list = filter.split.collect(&:downcase)
|
66
|
-
->(
|
67
|
-
exclusion_list.include?(
|
66
|
+
->(word) {
|
67
|
+
exclusion_list.include?(word.downcase)
|
68
68
|
}
|
69
69
|
elsif Regexp.try_convert(filter)
|
70
70
|
filter = Regexp.try_convert(filter)
|
71
|
-
Proc.new { |
|
71
|
+
Proc.new { |word| word =~ filter }
|
72
72
|
elsif filter.respond_to?(:to_proc)
|
73
73
|
filter.to_proc
|
74
74
|
else
|
75
|
-
raise ArgumentError, "
|
75
|
+
raise ArgumentError, "Filter must String, Array, Proc, or Regexp"
|
76
76
|
end
|
77
77
|
end
|
78
78
|
end
|
data/lib/words_counted.rb
CHANGED
@@ -7,4 +7,14 @@ rescue LoadError
|
|
7
7
|
end
|
8
8
|
|
9
9
|
module WordsCounted
|
10
|
+
def self.count(string, options = {})
|
11
|
+
Counter.new(string, options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.from_file(path, options = {})
|
15
|
+
file = File.open(path)
|
16
|
+
data = file.read
|
17
|
+
file.close
|
18
|
+
count(data, options)
|
19
|
+
end
|
10
20
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
The Hart and the Hunter
|
2
|
+
|
3
|
+
The Hart was once drinking from a pool and admiring the noble
|
4
|
+
figure he made there. "Ah," said he, "where can you see such
|
5
|
+
noble horns as these, with such antlers! I wish I had legs more
|
6
|
+
worthy to bear such a noble crown; it is a pity they are so slim
|
7
|
+
and slight." At that moment a Hunter approached and sent an arrow
|
8
|
+
whistling after him. Away bounded the Hart, and soon, by the aid
|
9
|
+
of his nimble legs, was nearly out of sight of the Hunter; but not
|
10
|
+
noticing where he was going, he passed under some trees with
|
11
|
+
branches growing low down in which his antlers were caught, so
|
12
|
+
that the Hunter had time to come up. "Alas! alas!" cried the
|
13
|
+
Hart:
|
14
|
+
|
15
|
+
|
16
|
+
"We often despise what is most useful to us."
|
@@ -5,7 +5,7 @@ module WordsCounted
|
|
5
5
|
describe Counter do
|
6
6
|
let(:counter) { Counter.new("We are all in the gutter, but some of us are looking at the stars.") }
|
7
7
|
|
8
|
-
describe "
|
8
|
+
describe "initialize" do
|
9
9
|
it "sets @options" do
|
10
10
|
expect(counter.instance_variables).to include(:@options)
|
11
11
|
end
|
@@ -27,7 +27,7 @@ module WordsCounted
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
describe "
|
30
|
+
describe "words" do
|
31
31
|
it "returns an array" do
|
32
32
|
expect(counter.words).to be_a(Array)
|
33
33
|
end
|
@@ -97,13 +97,13 @@ module WordsCounted
|
|
97
97
|
end
|
98
98
|
end
|
99
99
|
|
100
|
-
describe "
|
100
|
+
describe "word_count" do
|
101
101
|
it "returns the correct word count" do
|
102
102
|
expect(counter.word_count).to eq(15)
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
-
describe "
|
106
|
+
describe "word_occurrences" do
|
107
107
|
it "returns a hash" do
|
108
108
|
expect(counter.word_occurrences).to be_a(Hash)
|
109
109
|
end
|
@@ -114,7 +114,7 @@ module WordsCounted
|
|
114
114
|
end
|
115
115
|
end
|
116
116
|
|
117
|
-
describe "
|
117
|
+
describe "most_occurring_words" do
|
118
118
|
it "returns an array" do
|
119
119
|
expect(counter.most_occurring_words).to be_a(Array)
|
120
120
|
end
|
@@ -125,7 +125,7 @@ module WordsCounted
|
|
125
125
|
end
|
126
126
|
end
|
127
127
|
|
128
|
-
describe '
|
128
|
+
describe 'word_lengths' do
|
129
129
|
it "returns a hash" do
|
130
130
|
expect(counter.word_lengths).to be_a(Hash)
|
131
131
|
end
|
@@ -136,7 +136,7 @@ module WordsCounted
|
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
|
-
describe "
|
139
|
+
describe "longest_words" do
|
140
140
|
it "returns an array" do
|
141
141
|
expect(counter.longest_words).to be_a(Array)
|
142
142
|
end
|
@@ -147,8 +147,8 @@ module WordsCounted
|
|
147
147
|
end
|
148
148
|
end
|
149
149
|
|
150
|
-
describe "
|
151
|
-
it "returns
|
150
|
+
describe "word_density" do
|
151
|
+
it "returns an array" do
|
152
152
|
expect(counter.word_density).to be_a(Array)
|
153
153
|
end
|
154
154
|
|
@@ -158,7 +158,7 @@ module WordsCounted
|
|
158
158
|
end
|
159
159
|
end
|
160
160
|
|
161
|
-
describe "
|
161
|
+
describe "char_count" do
|
162
162
|
it "returns the number of chars in the passed in string" do
|
163
163
|
counter = Counter.new("His name was major, Major Major Major Major.")
|
164
164
|
expect(counter.char_count).to eq(35)
|
@@ -170,7 +170,7 @@ module WordsCounted
|
|
170
170
|
end
|
171
171
|
end
|
172
172
|
|
173
|
-
describe "
|
173
|
+
describe "average_chars_per_word" do
|
174
174
|
it "returns the average number of chars per word" do
|
175
175
|
counter = Counter.new("His name was major, Major Major Major Major.")
|
176
176
|
expect(counter.average_chars_per_word).to eq(4)
|
@@ -182,10 +182,17 @@ module WordsCounted
|
|
182
182
|
end
|
183
183
|
end
|
184
184
|
|
185
|
-
describe "
|
185
|
+
describe "unique_word_count" do
|
186
186
|
it "returns the number of unique words" do
|
187
187
|
expect(counter.unique_word_count).to eq(13)
|
188
188
|
end
|
189
189
|
end
|
190
190
|
end
|
191
|
+
|
192
|
+
describe "from_file" do
|
193
|
+
it "opens and reads a text file" do
|
194
|
+
counter = WordsCounted.from_file('spec/support/the_hart_and_the_hunter.txt')
|
195
|
+
expect(counter.word_count).to eq(139)
|
196
|
+
end
|
197
|
+
end
|
191
198
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: words_counted
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mohamad El-Husseini
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -84,6 +84,7 @@ files:
|
|
84
84
|
- lib/words_counted/counter.rb
|
85
85
|
- lib/words_counted/version.rb
|
86
86
|
- spec/spec_helper.rb
|
87
|
+
- spec/support/the_hart_and_the_hunter.txt
|
87
88
|
- spec/words_counted/counter_spec.rb
|
88
89
|
- words_counted.gemspec
|
89
90
|
homepage: https://github.com/abitdodgy/words_counted
|
@@ -112,4 +113,5 @@ specification_version: 4
|
|
112
113
|
summary: See README.
|
113
114
|
test_files:
|
114
115
|
- spec/spec_helper.rb
|
116
|
+
- spec/support/the_hart_and_the_hunter.txt
|
115
117
|
- spec/words_counted/counter_spec.rb
|