word_count_analyzer 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/word_count_analyzer/analyzer.rb +2 -3
- data/lib/word_count_analyzer/counter.rb +4 -4
- data/lib/word_count_analyzer/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43a6fe0db074fc9255710b36242ff3144f176001
|
4
|
+
data.tar.gz: 6cdc559082c1a2cd22037953cc8450870b9f490d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a3d6823c4dd6d325d2095728e6ab058108baf29ea9158f28c9d9440807bd920884fad798b3789592eabaac33d6adce02270a7e726365e0ef5ccc84f19549723
|
7
|
+
data.tar.gz: d9e8d479f15eda8a17c0b1c4ba274685062b6092757148ed6518d991fde4face5a0b0892254058c24ce1276216110538d2a403402cbef45988600e724df19e57
|
data/README.md
CHANGED
@@ -22,6 +22,10 @@ Add this line to your application’s Gemfile:
|
|
22
22
|
gem 'word_count_analyzer'
|
23
23
|
```
|
24
24
|
|
25
|
+
##Live Demo
|
26
|
+
|
27
|
+
Try out a [live demo](https://www.tm-town.com/word-count-analyzer) of Word Count Analyzer in the browser.
|
28
|
+
|
25
29
|
## Usage
|
26
30
|
|
27
31
|
### Analyze the word count gray areas of a string
|
@@ -80,7 +84,6 @@ WordCountAnalyzer::Counter.new(text: text).pages_count
|
|
80
84
|
|
81
85
|
# Overrides all settings to match the way Microsoft Word and wc (Unix) handle word count.
|
82
86
|
# N.B. The developers of these tools may change the algorithm at any time so this should just be as an approximation.
|
83
|
-
|
84
87
|
WordCountAnalyzer::Counter.new(text: text).mword_count
|
85
88
|
# => 71
|
86
89
|
|
@@ -520,6 +523,7 @@ wc (Unix) | 1
|
|
520
523
|
- Add language support for languages other than English
|
521
524
|
- For most languages this is probably as simple as adding in the translations and abbreviations for months and days.
|
522
525
|
- For languages that use a character count (Japanese, Chinese) there will be larger changes. For these languages need to add an option for how to handle Roman words within the text.
|
526
|
+
- Improve performace for longer strings (potentially break string into smaller parts and then sum total of each)
|
523
527
|
|
524
528
|
## Contributing
|
525
529
|
|
@@ -1,9 +1,8 @@
|
|
1
1
|
module WordCountAnalyzer
|
2
2
|
class Analyzer
|
3
|
-
attr_reader :text
|
3
|
+
attr_reader :text
|
4
4
|
def initialize(text:)
|
5
5
|
@text = text
|
6
|
-
@tgr = EngTagger.new
|
7
6
|
end
|
8
7
|
|
9
8
|
def analyze
|
@@ -12,7 +11,7 @@ module WordCountAnalyzer
|
|
12
11
|
contraction_count = 0
|
13
12
|
hyphenated_word_count = 0
|
14
13
|
WordCountAnalyzer::Xhtml.new(string: text).replace.split(/\s+/).each_with_index do |token, index|
|
15
|
-
contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr:
|
14
|
+
contraction_count += 1 if WordCountAnalyzer::Contraction.new(token: token, following_token: text.split(/\s+/)[index + 1], tgr: EngTagger.new, hyphen: 'single').contraction?
|
16
15
|
hyphenated_word_count += 1 if WordCountAnalyzer::HyphenatedWord.new(token: token).hyphenated_word?
|
17
16
|
end
|
18
17
|
analysis['hyperlink'] = WordCountAnalyzer::Hyperlink.new(string: text).occurences
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module WordCountAnalyzer
|
2
2
|
class Counter
|
3
|
-
attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
|
3
|
+
attr_reader :text, :ellipsis, :hyperlink, :contraction, :hyphenated_word, :date, :number, :numbered_list, :xhtml, :forward_slash, :backslash, :dotted_line, :dashed_line, :underscore, :stray_punctuation, :equal_sign
|
4
4
|
def initialize(text:, **args)
|
5
5
|
@text = text
|
6
6
|
@ellipsis = args[:ellipsis] || 'ignore'
|
@@ -18,7 +18,6 @@ module WordCountAnalyzer
|
|
18
18
|
@underscore = args[:underscore] || 'ignore'
|
19
19
|
@stray_punctuation = args[:stray_punctuation] || 'ignore'
|
20
20
|
@equal_sign = 'ignore'
|
21
|
-
@tgr = EngTagger.new
|
22
21
|
end
|
23
22
|
|
24
23
|
def count
|
@@ -65,9 +64,10 @@ module WordCountAnalyzer
|
|
65
64
|
private
|
66
65
|
|
67
66
|
def word_count
|
67
|
+
tgr = EngTagger.new
|
68
68
|
processed_text = process_ellipsis(text)
|
69
69
|
processed_text = process_hyperlink(processed_text)
|
70
|
-
processed_text = process_contraction(processed_text)
|
70
|
+
processed_text = process_contraction(processed_text, tgr)
|
71
71
|
processed_text = process_date(processed_text)
|
72
72
|
processed_text = process_number(processed_text)
|
73
73
|
processed_text = process_number_list(processed_text)
|
@@ -106,7 +106,7 @@ module WordCountAnalyzer
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
|
-
def process_contraction(txt)
|
109
|
+
def process_contraction(txt, tgr)
|
110
110
|
if contraction.eql?('count_as_one')
|
111
111
|
txt
|
112
112
|
elsif contraction.eql?('count_as_multiple')
|