word_count_analyzer 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/word_count_analyzer/analyzer.rb +2 -3
- data/lib/word_count_analyzer/counter.rb +4 -4
- data/lib/word_count_analyzer/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43a6fe0db074fc9255710b36242ff3144f176001
|
4
|
+
data.tar.gz: 6cdc559082c1a2cd22037953cc8450870b9f490d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a3d6823c4dd6d325d2095728e6ab058108baf29ea9158f28c9d9440807bd920884fad798b3789592eabaac33d6adce02270a7e726365e0ef5ccc84f19549723
|
7
|
+
data.tar.gz: d9e8d479f15eda8a17c0b1c4ba274685062b6092757148ed6518d991fde4face5a0b0892254058c24ce1276216110538d2a403402cbef45988600e724df19e57
|
data/README.md
CHANGED
@@ -22,6 +22,10 @@ Add this line to your application’s Gemfile:
|
|
22
22
|
gem 'word_count_analyzer'
|
23
23
|
```
|
24
24
|
|
25
|
+
##Live Demo
|
26
|
+
|
27
|
+
Try out a [live demo](https://www.tm-town.com/word-count-analyzer) of Word Count Analyzer in the browser.
|
28
|
+
|
25
29
|
## Usage
|
26
30
|
|
27
31
|
### Analyze the word count gray areas of a string
|
@@ -80,7 +84,6 @@ WordCountAnalyzer::Counter.new(text: text).pages_count
|
|
80
84
|
|
81
85
|
# Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
|
82
86
|
# N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
|
83
|
-
|
84
87
|
WordCountAnalyzer::Counter.new(text: text).mword_count
|
85
88
|
# => 71
|
86
89
|
|
@@ -520,6 +523,7 @@ wc (Unix) | 1
|
|
520
523
|
- Add language support for languages other than English
|
521
524
|
- For most languages this is probably as simple as adding in the translations and abbreviations for months and days.
|
522
525
|
- For languages that use a character count (Japanese, Chinese) there will be larger changes. For these languages need to add an option for how to handle Roman words within the text.
|
526
|
+
- Improve performace for longer strings (potentially break string into smaller parts and then sum total of each)
|
523
527
|
|
524
528
|
## Contributing
|
525
529
|
|
@@ -1,9 +1,8 @@
|
|
1
1
|
module WordCountAnalyzer
|
2
2
|
class Analyzer
|
3
|
-
attr_reader :text
|
3
|
+
attr_reader :text
|
4
4
|
def initialize(text:)
|
5
5
|
@text = text
|
6
|
-
@tgr = EngTagger.new
|
7
6
|
end
|
8
7
|
|
9
8
|
def analyze
|
@@ -12,7 +11,7 @@ module WordCountAnalyzer
|
|
12
11
|
contraction_count = 0
|
13
12
|
hyphenated_word_count = 0
|
14
13
|
WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
|
15
|
-
contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr:
|
14
|
+
contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: EngTagger.new, hyphen: 'single').contraction?
|
16
15
|
hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
|
17
16
|
end
|
18
17
|
analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module WordCountAnalyzer
|
2
2
|
class Counter
|
3
|
-
attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
|
3
|
+
attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
|
4
4
|
def initialize(text:, **args)
|
5
5
|
@text = text
|
6
6
|
@ellipsis = args[:ellipsis] || 'ignore'
|
@@ -18,7 +18,6 @@ module WordCountAnalyzer
|
|
18
18
|
@underscore = args[:underscore] || 'ignore'
|
19
19
|
@stray_punctuation = args[:stray_punctuation] || 'ignore'
|
20
20
|
@equal_sign = 'ignore'
|
21
|
-
@tgr = EngTagger.new
|
22
21
|
end
|
23
22
|
|
24
23
|
def count
|
@@ -65,9 +64,10 @@ module WordCountAnalyzer
|
|
65
64
|
private
|
66
65
|
|
67
66
|
def word_count
|
67
|
+
tgr = EngTagger.new
|
68
68
|
processed_text = process_ellipsis(text)
|
69
69
|
processed_text = process_hyperlink(processed_text)
|
70
|
-
processed_text = process_contraction(processed_text)
|
70
|
+
processed_text = process_contraction(processed_text, tgr)
|
71
71
|
processed_text = process_date(processed_text)
|
72
72
|
processed_text = process_number(processed_text)
|
73
73
|
processed_text = process_number_list(processed_text)
|
@@ -106,7 +106,7 @@ module WordCountAnalyzer
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
|
-
def process_contraction(txt)
|
109
|
+
def process_contraction(txt, tgr)
|
110
110
|
if contraction.eql?('count_as_one')
|
111
111
|
txt
|
112
112
|
elsif contraction.eql?('count_as_multiple')
|