word_count_analyzer 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/word_count_analyzer/counter.rb +1 -1
- data/lib/word_count_analyzer/version.rb +1 -1
- data/spec/word_count_analyzer/counter_spec.rb +10 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9100a7be1a916cb3872aa74e9e76ddfb83744d62
|
4
|
+
data.tar.gz: 99b5f6ddb28101de71f97d3b44e179d485f7ab84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e0727cd58ddf83c23ac0dde9f9c713d7cad4f5c21fd572bc0bd21064266f57e5ff3d2489326d951ec5b7e3a7d38370a36f98596bd725d08092ad9fdabea43480
|
7
|
+
data.tar.gz: decc5f7b4256f0884b61bf23191ae638509a68f948a2515333ddbef2a9e6d3d6c9004ebb1e5db1352a11a1212399497cd798bb559d1f91bb14b778342ec123c0
|
data/README.md
CHANGED
@@ -68,7 +68,7 @@ WordCountAnalyzer::Analyzer.new(text: text).analyze
|
|
68
68
|
### Count the words in a string
|
69
69
|
|
70
70
|
```ruby
|
71
|
-
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list
|
71
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
72
72
|
|
73
73
|
WordCountAnalyzer::Counter.new(text: text).count
|
74
74
|
# => 64
|
@@ -76,7 +76,7 @@ WordCountAnalyzer::Counter.new(text: text).count
|
|
76
76
|
# Overrides all settings to match the way Pages handles word count.
|
77
77
|
# N.B. The developers of Pages may change the algorithm at any time so this should just be as an approximation.
|
78
78
|
WordCountAnalyzer::Counter.new(text: text).pages_count
|
79
|
-
# => 79
|
79
|
+
# => 76 (or 79 if the list items are not formatted as a list)
|
80
80
|
|
81
81
|
# Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
|
82
82
|
# N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
|
@@ -31,7 +31,7 @@ module WordCountAnalyzer
|
|
31
31
|
@hyphenated_word = 'count_as_multiple'
|
32
32
|
@date = 'no_special_treatment'
|
33
33
|
@number = 'count'
|
34
|
-
@numbered_list = '
|
34
|
+
@numbered_list = 'ignore'
|
35
35
|
@xhtml = 'keep'
|
36
36
|
@forward_slash = 'count_as_multiple'
|
37
37
|
@backslash = 'count_as_multiple'
|
@@ -561,7 +561,7 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
561
561
|
|
562
562
|
context 'Pages Word Count' do
|
563
563
|
it 'reverse engineers Pages word count #001' do
|
564
|
-
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list
|
564
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
565
565
|
ws = WordCountAnalyzer::Counter.new(
|
566
566
|
text: text,
|
567
567
|
ellipsis: 'no_special_treatment',
|
@@ -570,7 +570,7 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
570
570
|
hyphenated_word: 'count_as_multiple',
|
571
571
|
date: 'no_special_treatment',
|
572
572
|
number: 'count',
|
573
|
-
numbered_list: '
|
573
|
+
numbered_list: 'ignore',
|
574
574
|
xhtml: 'keep',
|
575
575
|
forward_slash: 'count_as_multiple',
|
576
576
|
backslash: 'count_as_multiple',
|
@@ -579,13 +579,13 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
579
579
|
underscore: 'ignore',
|
580
580
|
stray_punctuation: 'ignore'
|
581
581
|
)
|
582
|
-
expect(ws.count).to eq(
|
582
|
+
expect(ws.count).to eq(76)
|
583
583
|
end
|
584
584
|
|
585
585
|
it 'reverse engineers Pages word count #002' do
|
586
586
|
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
587
587
|
ws = WordCountAnalyzer::Counter.new(text: text)
|
588
|
-
expect(ws.pages_count).to eq(
|
588
|
+
expect(ws.pages_count).to eq(76)
|
589
589
|
end
|
590
590
|
|
591
591
|
it 'reverse engineers Pages word count #003' do
|
@@ -593,6 +593,12 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
593
593
|
ws = WordCountAnalyzer::Counter.new(text: text)
|
594
594
|
expect(ws.pages_count).to eq(0)
|
595
595
|
end
|
596
|
+
|
597
|
+
it 'reverse engineers Pages word count #004' do
|
598
|
+
text = "1. List item a\n\n2. List item b\n\n3. List item c"
|
599
|
+
ws = WordCountAnalyzer::Counter.new(text: text)
|
600
|
+
expect(ws.pages_count).to eq(9)
|
601
|
+
end
|
596
602
|
end
|
597
603
|
|
598
604
|
context 'Microsoft Word Count' do
|