word_count_analyzer 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/word_count_analyzer/counter.rb +1 -1
- data/lib/word_count_analyzer/version.rb +1 -1
- data/spec/word_count_analyzer/counter_spec.rb +10 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9100a7be1a916cb3872aa74e9e76ddfb83744d62
|
4
|
+
data.tar.gz: 99b5f6ddb28101de71f97d3b44e179d485f7ab84
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e0727cd58ddf83c23ac0dde9f9c713d7cad4f5c21fd572bc0bd21064266f57e5ff3d2489326d951ec5b7e3a7d38370a36f98596bd725d08092ad9fdabea43480
|
7
|
+
data.tar.gz: decc5f7b4256f0884b61bf23191ae638509a68f948a2515333ddbef2a9e6d3d6c9004ebb1e5db1352a11a1212399497cd798bb559d1f91bb14b778342ec123c0
|
data/README.md
CHANGED
@@ -68,7 +68,7 @@ WordCountAnalyzer::Analyzer.new(text: text).analyze
|
|
68
68
|
### Count the words in a string
|
69
69
|
|
70
70
|
```ruby
|
71
|
-
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list
|
71
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
72
72
|
|
73
73
|
WordCountAnalyzer::Counter.new(text: text).count
|
74
74
|
# => 64
|
@@ -76,7 +76,7 @@ WordCountAnalyzer::Counter.new(text: text).count
|
|
76
76
|
# Overrides all settings to match the way Pages handles word count.
|
77
77
|
# N.B. The developers of Pages may change the algorithm at any time so this should just be as an approximation.
|
78
78
|
WordCountAnalyzer::Counter.new(text: text).pages_count
|
79
|
-
# => 79
|
79
|
+
# => 76 (or 79 if the list items are not formatted as a list)
|
80
80
|
|
81
81
|
# Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
|
82
82
|
# N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
|
@@ -31,7 +31,7 @@ module WordCountAnalyzer
|
|
31
31
|
@hyphenated_word = 'count_as_multiple'
|
32
32
|
@date = 'no_special_treatment'
|
33
33
|
@number = 'count'
|
34
|
-
@numbered_list = '
|
34
|
+
@numbered_list = 'ignore'
|
35
35
|
@xhtml = 'keep'
|
36
36
|
@forward_slash = 'count_as_multiple'
|
37
37
|
@backslash = 'count_as_multiple'
|
@@ -561,7 +561,7 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
561
561
|
|
562
562
|
context 'Pages Word Count' do
|
563
563
|
it 'reverse engineers Pages word count #001' do
|
564
|
-
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list
|
564
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list \n\n1. item a \n\n2. item b \n\n3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
565
565
|
ws = WordCountAnalyzer::Counter.new(
|
566
566
|
text: text,
|
567
567
|
ellipsis: 'no_special_treatment',
|
@@ -570,7 +570,7 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
570
570
|
hyphenated_word: 'count_as_multiple',
|
571
571
|
date: 'no_special_treatment',
|
572
572
|
number: 'count',
|
573
|
-
numbered_list: '
|
573
|
+
numbered_list: 'ignore',
|
574
574
|
xhtml: 'keep',
|
575
575
|
forward_slash: 'count_as_multiple',
|
576
576
|
backslash: 'count_as_multiple',
|
@@ -579,13 +579,13 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
579
579
|
underscore: 'ignore',
|
580
580
|
stray_punctuation: 'ignore'
|
581
581
|
)
|
582
|
-
expect(ws.count).to eq(
|
582
|
+
expect(ws.count).to eq(76)
|
583
583
|
end
|
584
584
|
|
585
585
|
it 'reverse engineers Pages word count #002' do
|
586
586
|
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
587
587
|
ws = WordCountAnalyzer::Counter.new(text: text)
|
588
|
-
expect(ws.pages_count).to eq(
|
588
|
+
expect(ws.pages_count).to eq(76)
|
589
589
|
end
|
590
590
|
|
591
591
|
it 'reverse engineers Pages word count #003' do
|
@@ -593,6 +593,12 @@ RSpec.describe WordCountAnalyzer::Counter do
|
|
593
593
|
ws = WordCountAnalyzer::Counter.new(text: text)
|
594
594
|
expect(ws.pages_count).to eq(0)
|
595
595
|
end
|
596
|
+
|
597
|
+
it 'reverse engineers Pages word count #004' do
|
598
|
+
text = "1. List item a\n\n2. List item b\n\n3. List item c"
|
599
|
+
ws = WordCountAnalyzer::Counter.new(text: text)
|
600
|
+
expect(ws.pages_count).to eq(9)
|
601
|
+
end
|
596
602
|
end
|
597
603
|
|
598
604
|
context 'Microsoft Word Count' do
|