word_count_analyzer 0.0.14 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -6
- data/lib/word_count_analyzer/analyzer.rb +6 -5
- data/lib/word_count_analyzer/contraction.rb +1 -1
- data/lib/word_count_analyzer/counter.rb +15 -16
- data/lib/word_count_analyzer/date.rb +79 -106
- data/lib/word_count_analyzer/ellipsis.rb +10 -15
- data/lib/word_count_analyzer/hyperlink.rb +14 -25
- data/lib/word_count_analyzer/hyphenated_word.rb +1 -1
- data/lib/word_count_analyzer/number.rb +1 -1
- data/lib/word_count_analyzer/slash.rb +8 -7
- data/lib/word_count_analyzer/version.rb +1 -1
- data/spec/word_count_analyzer/counter_spec.rb +123 -160
- data/spec/word_count_analyzer/date_spec.rb +85 -85
- data/spec/word_count_analyzer/ellipsis_spec.rb +33 -33
- data/spec/word_count_analyzer/hyperlink_spec.rb +23 -23
- data/spec/word_count_analyzer/performance_spec.rb +46 -0
- data/word_count_analyzer.gemspec +1 -0
- metadata +18 -2
@@ -1,77 +1,77 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe WordCountAnalyzer::Hyperlink do
|
4
|
-
context '#hyperlink?' do
|
4
|
+
context '#hyperlink?(string)' do
|
5
5
|
it 'returns true if the string is a hyperlink #001' do
|
6
6
|
string = "http://www.example.com/this-IS-a_test/hello.html"
|
7
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
8
|
-
expect(ws.hyperlink?).to eq(true)
|
7
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
8
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
9
9
|
end
|
10
10
|
|
11
11
|
it 'returns true if the string is a hyperlink #002' do
|
12
12
|
string = "http://www.google.co.uk"
|
13
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
14
|
-
expect(ws.hyperlink?).to eq(true)
|
13
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
14
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
15
15
|
end
|
16
16
|
|
17
17
|
it 'returns true if the string is a hyperlink #003' do
|
18
18
|
string = "https://google.co.uk"
|
19
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
20
|
-
expect(ws.hyperlink?).to eq(true)
|
19
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
20
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'returns false if the string is not a hyperlink #004' do
|
24
24
|
string = "hello"
|
25
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
26
|
-
expect(ws.hyperlink?).to eq(false)
|
25
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
26
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'returns false if the string is not a hyperlink #005' do
|
30
30
|
string = "john@gmail.com"
|
31
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
32
|
-
expect(ws.hyperlink?).to eq(false)
|
31
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
32
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'returns false if the string is not a hyperlink #006' do
|
36
36
|
string = "date:"
|
37
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
38
|
-
expect(ws.hyperlink?).to eq(false)
|
37
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
38
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'returns false if the string is not a hyperlink #007' do
|
42
42
|
string = 'The file location is c:\Users\johndoe.'
|
43
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
44
|
-
expect(ws.hyperlink?).to eq(false)
|
43
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
44
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
48
|
context '#occurences' do
|
49
49
|
it 'returns the occurences of hyperlink tokens in a string #001' do
|
50
50
|
string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
|
51
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
52
|
-
expect(ws.occurences).to eq(2)
|
51
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
52
|
+
expect(ws.occurences(string)).to eq(2)
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
56
|
context '#replace' do
|
57
57
|
it 'replaces the hyperlinks in a string with regular tokens #001' do
|
58
58
|
string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
|
59
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
60
|
-
expect(ws.replace).to eq("Today the date is: Jan 1. Visit wslinkword or wslinkword ")
|
59
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
60
|
+
expect(ws.replace(string)).to eq("Today the date is: Jan 1. Visit wslinkword or wslinkword ")
|
61
61
|
end
|
62
62
|
|
63
63
|
it 'replaces the hyperlinks in a string with regular tokens #002' do
|
64
64
|
string = 'The file location is c:\Users\johndoe or d:\Users\john\www'
|
65
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
66
|
-
expect(ws.replace).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
|
65
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
66
|
+
expect(ws.replace(string)).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
70
|
context '#replace_split_at_period' do
|
71
71
|
it 'replaces the hyperlinks in a string with regular tokens, split at periods #001' do
|
72
72
|
string = "http://www.google.co.uk"
|
73
|
-
ws = WordCountAnalyzer::Hyperlink.new
|
74
|
-
expect(ws.replace_split_at_period).to eq("http://www google co uk")
|
73
|
+
ws = WordCountAnalyzer::Hyperlink.new
|
74
|
+
expect(ws.replace_split_at_period(string)).to eq("http://www google co uk")
|
75
75
|
end
|
76
76
|
end
|
77
77
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'benchmark'
|
3
|
+
require 'spec_helper'
|
4
|
+
require 'stackprof'
|
5
|
+
|
6
|
+
RSpec.describe WordCountAnalyzer::Analyzer do
|
7
|
+
it 'is fast?' do
|
8
|
+
benchmark do
|
9
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
10
|
+
ws = WordCountAnalyzer::Analyzer.new(text: text).analyze
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'is analyzed' do
|
15
|
+
data = StackProf.run(mode: :cpu, interval: 1000) do
|
16
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
17
|
+
ws = WordCountAnalyzer::Analyzer.new(text: text).analyze
|
18
|
+
end
|
19
|
+
puts StackProf::Report.new(data).print_text
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'is analyzed 2' do
|
23
|
+
data = StackProf.run(mode: :cpu, interval: 1000) do
|
24
|
+
token = "when'd"
|
25
|
+
following_token = nil
|
26
|
+
WordCountAnalyzer::Contraction.new(token: token, following_token: following_token, tgr: EngTagger.new, hyphen: nil).contraction?
|
27
|
+
end
|
28
|
+
puts StackProf::Report.new(data).print_text
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'is analyzed 3' do
|
32
|
+
benchmark do
|
33
|
+
text = "This string has a date: Monday, November 3rd, 2011. I was thinking... it also shouldn't have too many contractions, maybe 2. <html> Some HTML and a hyphenated-word</html>. Don't count punctuation ? ? ? Please visit the ____________ ------------ ........ go-to site: https://www.example-site.com today. Let's add a list 1. item a 2. item b 3. item c. Now let's add he/she/it or a c:\\Users\\john. 2/15/2012 is the date! { HYPERLINK 'http://www.hello.com' }"
|
34
|
+
ws = WordCountAnalyzer::Counter.new(forward_slash: 'count_as_multiple')
|
35
|
+
300.times do
|
36
|
+
ws.count(text)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def benchmark
|
43
|
+
yield
|
44
|
+
time = Benchmark.realtime { yield }
|
45
|
+
puts "RUNTIME: #{time}"
|
46
|
+
end
|
data/word_count_analyzer.gemspec
CHANGED
@@ -22,5 +22,6 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_development_dependency "bundler"
|
23
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
24
|
spec.add_development_dependency "rspec"
|
25
|
+
spec.add_development_dependency "stackprof"
|
25
26
|
spec.add_runtime_dependency "engtagger"
|
26
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word_count_analyzer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: stackprof
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: engtagger
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +122,7 @@ files:
|
|
108
122
|
- spec/word_count_analyzer/hyphenated_word_spec.rb
|
109
123
|
- spec/word_count_analyzer/number_spec.rb
|
110
124
|
- spec/word_count_analyzer/numbered_list_spec.rb
|
125
|
+
- spec/word_count_analyzer/performance_spec.rb
|
111
126
|
- spec/word_count_analyzer/punctuation_spec.rb
|
112
127
|
- spec/word_count_analyzer/slash_spec.rb
|
113
128
|
- spec/word_count_analyzer/xhtml_spec.rb
|
@@ -148,6 +163,7 @@ test_files:
|
|
148
163
|
- spec/word_count_analyzer/hyphenated_word_spec.rb
|
149
164
|
- spec/word_count_analyzer/number_spec.rb
|
150
165
|
- spec/word_count_analyzer/numbered_list_spec.rb
|
166
|
+
- spec/word_count_analyzer/performance_spec.rb
|
151
167
|
- spec/word_count_analyzer/punctuation_spec.rb
|
152
168
|
- spec/word_count_analyzer/slash_spec.rb
|
153
169
|
- spec/word_count_analyzer/xhtml_spec.rb
|