word_count_analyzer 0.0.14 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -6
- data/lib/word_count_analyzer/analyzer.rb +6 -5
- data/lib/word_count_analyzer/contraction.rb +1 -1
- data/lib/word_count_analyzer/counter.rb +15 -16
- data/lib/word_count_analyzer/date.rb +79 -106
- data/lib/word_count_analyzer/ellipsis.rb +10 -15
- data/lib/word_count_analyzer/hyperlink.rb +14 -25
- data/lib/word_count_analyzer/hyphenated_word.rb +1 -1
- data/lib/word_count_analyzer/number.rb +1 -1
- data/lib/word_count_analyzer/slash.rb +8 -7
- data/lib/word_count_analyzer/version.rb +1 -1
- data/spec/word_count_analyzer/counter_spec.rb +123 -160
- data/spec/word_count_analyzer/date_spec.rb +85 -85
- data/spec/word_count_analyzer/ellipsis_spec.rb +33 -33
- data/spec/word_count_analyzer/hyperlink_spec.rb +23 -23
- data/spec/word_count_analyzer/performance_spec.rb +46 -0
- data/word_count_analyzer.gemspec +1 -0
- metadata +18 -2
@@ -1,77 +1,77 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe WordCountAnalyzer::Hyperlink do
|
4
|
-
context '#hyperlink?' do
|
4
|
+
context '#hyperlink?(string)' do
|
5
5
|
it 'returns true if the string is a hyperlink #001' do
|
6
6
|
string = "http://www.example.com/this-IS-a_test/hello.html"
|
7
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
8
|
-
expect(ws.hyperlink?).to eq(true)
|
7
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
8
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
9
9
|
end
|
10
10
|
|
11
11
|
it 'returns true if the string is a hyperlink #002' do
|
12
12
|
string = "http://www.google.co.uk"
|
13
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
14
|
-
expect(ws.hyperlink?).to eq(true)
|
13
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
14
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
15
15
|
end
|
16
16
|
|
17
17
|
it 'returns true if the string is a hyperlink #003' do
|
18
18
|
string = "https://google.co.uk"
|
19
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
20
|
-
expect(ws.hyperlink?).to eq(true)
|
19
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
20
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'returns false if the string is not a hyperlink #004' do
|
24
24
|
string = "hello"
|
25
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
26
|
-
expect(ws.hyperlink?).to eq(false)
|
25
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
26
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'returns false if the string is not a hyperlink #005' do
|
30
30
|
string = "john@gmail.com"
|
31
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
32
|
-
expect(ws.hyperlink?).to eq(false)
|
31
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
32
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'returns false if the string is not a hyperlink #006' do
|
36
36
|
string = "date:"
|
37
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
38
|
-
expect(ws.hyperlink?).to eq(false)
|
37
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
38
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'returns false if the string is not a hyperlink #007' do
|
42
42
|
string = 'The file location is c:\Users\johndoe.'
|
43
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
44
|
-
expect(ws.hyperlink?).to eq(false)
|
43
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
44
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
48
|
context '#occurences' do
|
49
49
|
it 'returns the occurences of hyperlink tokens in a string #001' do
|
50
50
|
string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
|
51
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
52
|
-
expect(ws.occurences).to eq(2)
|
51
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
52
|
+
expect(ws.occurences(string)).to eq(2)
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
context '#replace' do
|
57
57
|
it 'replaces the hyperlinks in a string with regular tokens #001' do
|
58
58
|
string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
|
59
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
60
|
-
expect(ws.replace).to eq("Today the date is: Jan 1. Visit wslinkword or wslinkword ")
|
59
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
60
|
+
expect(ws.replace(string)).to eq("Today the date is: Jan 1. Visit wslinkword or wslinkword ")
|
61
61
|
end
|
62
62
|
|
63
63
|
it 'replaces the hyperlinks in a string with regular tokens #002' do
|
64
64
|
string = 'The file location is c:\Users\johndoe or d:\Users\john\www'
|
65
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
66
|
-
expect(ws.replace).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
|
65
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
66
|
+
expect(ws.replace(string)).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
70
|
context '#replace_split_at_period' do
|
71
71
|
it 'replaces the hyperlinks in a string with regular tokens, split at periods #001' do
|
72
72
|
string = "http://www.google.co.uk"
|
73
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
74
|
-
expect(ws.replace_split_at_period).to eq("http://www google co uk")
|
73
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
74
|
+
expect(ws.replace_split_at_period(string)).to eq("http://www google co uk")
|
75
75
|
end
|
76
76
|
end
|
77
77
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'benchmark'
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'stackprof'
|
5
|
+
|
6
|
+
RSpec.describe WordCountAnalyzer::Analyzer do
|
7
|
+
it 'is fast?' do
|
8
|
+
benchmark do
|
9
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
10
|
+
ws = WordCountAnalyzer::Analyzer.new(text: text).analyze
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'is analyzed' do
|
15
|
+
data = StackProf.run(mode: :cpu, interval: 1000) do
|
16
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
17
|
+
ws = WordCountAnalyzer::Analyzer.new(text: text).analyze
|
18
|
+
end
|
19
|
+
puts StackProf::Report.new(data).print_text
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'is analyzed 2' do
|
23
|
+
data = StackProf.run(mode: :cpu, interval: 1000) do
|
24
|
+
token = "when'd"
|
25
|
+
following_token = nil
|
26
|
+
WordCountAnalyzer::Contraction.new(token: token, following_token: following_token, tgr: EngTagger.new, hyphen: nil).contraction?
|
27
|
+
end
|
28
|
+
puts StackProf::Report.new(data).print_text
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'is analyzed 3' do
|
32
|
+
benchmark do
|
33
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
34
|
+
ws = WordCountAnalyzer::Counter.new(forward_slash: 'count_as_multiple')
|
35
|
+
300.times do
|
36
|
+
ws.count(text)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def benchmark
|
43
|
+
yield
|
44
|
+
time = Benchmark.realtime { yield }
|
45
|
+
puts "RUNTIME: #{time}"
|
46
|
+
end
|
data/word_count_analyzer.gemspec
CHANGED
@@ -22,5 +22,6 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_development_dependency "bundler"
|
23
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
24
|
spec.add_development_dependency "rspec"
|
25
|
+
spec.add_development_dependency "stackprof"
|
25
26
|
spec.add_runtime_dependency "engtagger"
|
26
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word_count_analyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: stackprof
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: engtagger
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,7 @@ files:
|
|
108
122
|
- spec/word_count_analyzer/hyphenated_word_spec.rb
|
109
123
|
- spec/word_count_analyzer/number_spec.rb
|
110
124
|
- spec/word_count_analyzer/numbered_list_spec.rb
|
125
|
+
- spec/word_count_analyzer/performance_spec.rb
|
111
126
|
- spec/word_count_analyzer/punctuation_spec.rb
|
112
127
|
- spec/word_count_analyzer/slash_spec.rb
|
113
128
|
- spec/word_count_analyzer/xhtml_spec.rb
|
@@ -148,6 +163,7 @@ test_files:
|
|
148
163
|
- spec/word_count_analyzer/hyphenated_word_spec.rb
|
149
164
|
- spec/word_count_analyzer/number_spec.rb
|
150
165
|
- spec/word_count_analyzer/numbered_list_spec.rb
|
166
|
+
- spec/word_count_analyzer/performance_spec.rb
|
151
167
|
- spec/word_count_analyzer/punctuation_spec.rb
|
152
168
|
- spec/word_count_analyzer/slash_spec.rb
|
153
169
|
- spec/word_count_analyzer/xhtml_spec.rb
|