word_count_analyzer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +554 -0
  8. data/Rakefile +2 -0
  9. data/lib/word_count_analyzer.rb +14 -0
  10. data/lib/word_count_analyzer/analyzer.rb +34 -0
  11. data/lib/word_count_analyzer/contraction.rb +176 -0
  12. data/lib/word_count_analyzer/counter.rb +230 -0
  13. data/lib/word_count_analyzer/date.rb +149 -0
  14. data/lib/word_count_analyzer/ellipsis.rb +48 -0
  15. data/lib/word_count_analyzer/hyperlink.rb +53 -0
  16. data/lib/word_count_analyzer/hyphenated_word.rb +23 -0
  17. data/lib/word_count_analyzer/number.rb +23 -0
  18. data/lib/word_count_analyzer/numbered_list.rb +61 -0
  19. data/lib/word_count_analyzer/punctuation.rb +52 -0
  20. data/lib/word_count_analyzer/slash.rb +84 -0
  21. data/lib/word_count_analyzer/version.rb +3 -0
  22. data/lib/word_count_analyzer/xhtml.rb +26 -0
  23. data/spec/spec_helper.rb +1 -0
  24. data/spec/word_count_analyzer/analyzer_spec.rb +11 -0
  25. data/spec/word_count_analyzer/contraction_spec.rb +124 -0
  26. data/spec/word_count_analyzer/counter_spec.rb +647 -0
  27. data/spec/word_count_analyzer/date_spec.rb +257 -0
  28. data/spec/word_count_analyzer/ellipsis_spec.rb +69 -0
  29. data/spec/word_count_analyzer/hyperlink_spec.rb +77 -0
  30. data/spec/word_count_analyzer/hyphenated_word_spec.rb +81 -0
  31. data/spec/word_count_analyzer/number_spec.rb +63 -0
  32. data/spec/word_count_analyzer/numbered_list_spec.rb +69 -0
  33. data/spec/word_count_analyzer/punctuation_spec.rb +91 -0
  34. data/spec/word_count_analyzer/slash_spec.rb +105 -0
  35. data/spec/word_count_analyzer/xhtml_spec.rb +65 -0
  36. data/word_count_analyzer.gemspec +26 -0
  37. metadata +153 -0
@@ -0,0 +1,149 @@
1
+ module WordCountAnalyzer
2
+ class Date
3
+ DOW = %w(monday tuesday wednesday thursday friday saturday sunday)
4
+ DOW_ABBR = %w(mon tu tue tues wed th thu thur thurs fri sat sun)
5
+ MONTHS = %w(january february march april may june july august september october november december)
6
+ MONTH_ABBR = %w(jan feb mar apr jun jul aug sep sept oct nov dec)
7
+ # Rubular: http://rubular.com/r/73CZ2HU0q6
8
+ DMY_MDY_REGEX = /(\d{1,2}(\/|\.|-)){2}\d{4}\.?/
9
+
10
+ # Rubular: http://rubular.com/r/GWbuWXw4t0
11
+ YMD_YDM_REGEX = /\d{4}(\/|\.|-)(\d{1,2}(\/|\.|-)){2}\.?/
12
+
13
+ # Rubular: http://rubular.com/r/SRZ27XNlvR
14
+ DIGIT_ONLY_YEAR_FIRST_REGEX = /[12]\d{7}\D\.?/
15
+
16
+ # Rubular: http://rubular.com/r/mpVSeaKwdY
17
+ DIGIT_ONLY_YEAR_LAST_REGEX = /\d{4}[12]\d{3}\D\.?/
18
+
19
+ attr_reader :string
20
+ def initialize(string:)
21
+ @string = string
22
+ end
23
+
24
+ def includes_date?
25
+ long_date || number_only_date
26
+ end
27
+
28
+ def replace
29
+ new_string = string.dup
30
+ counter = 0
31
+ DOW_ABBR.each do |day|
32
+ counter +=1 if string.include?('day')
33
+ end
34
+ if counter > 0
35
+ DOW_ABBR.each do |day|
36
+ MONTHS.each do |month|
37
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
38
+ end
39
+ MONTH_ABBR.each do |month|
40
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
41
+ end
42
+ end
43
+ DOW.each do |day|
44
+ MONTHS.each do |month|
45
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
46
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
47
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
48
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
49
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
50
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
51
+ end
52
+ MONTH_ABBR.each do |month|
53
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
54
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
55
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
56
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
57
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
58
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
59
+ end
60
+ end
61
+ else
62
+ DOW.each do |day|
63
+ MONTHS.each do |month|
64
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
65
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
66
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
67
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
68
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
69
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
70
+ end
71
+ MONTH_ABBR.each do |month|
72
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
73
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
74
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
75
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
76
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
77
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
78
+ end
79
+ end
80
+ DOW_ABBR.each do |day|
81
+ MONTHS.each do |month|
82
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
83
+ end
84
+ MONTH_ABBR.each do |month|
85
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
86
+ end
87
+ end
88
+ end
89
+ new_string = new_string.gsub(DMY_MDY_REGEX, ' wsdateword ')
90
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
91
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
92
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
93
+ end
94
+
95
+ def occurences
96
+ replace.scan(/wsdateword/).size
97
+ end
98
+
99
+ def replace_number_only_date
100
+ string.gsub(DMY_MDY_REGEX, ' wsdateword ')
101
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
102
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
103
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
104
+ end
105
+
106
+ private
107
+
108
+ def long_date
109
+ match_found = false
110
+ DOW.each do |day|
111
+ MONTHS.each do |month|
112
+ break if match_found
113
+ match_found = check_for_matches(day, month)
114
+ end
115
+ MONTH_ABBR.each do |month|
116
+ break if match_found
117
+ match_found = check_for_matches(day, month)
118
+ end
119
+ end
120
+ DOW_ABBR.each do |day|
121
+ MONTHS.each do |month|
122
+ break if match_found
123
+ match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th)*(,)*\s\d{4}/i)
124
+ end
125
+ MONTH_ABBR.each do |month|
126
+ break if match_found
127
+ match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}/i)
128
+ end
129
+ end
130
+ match_found
131
+ end
132
+
133
+ def number_only_date
134
+ !(string !~ DMY_MDY_REGEX) ||
135
+ !(string !~ YMD_YDM_REGEX) ||
136
+ !(string !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
137
+ !(string !~ DIGIT_ONLY_YEAR_LAST_REGEX)
138
+ end
139
+
140
+ def check_for_matches(day, month)
141
+ !(string !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}/i) ||
142
+ !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}/i) ||
143
+ !(string !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*/i) ||
144
+ !(string !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
145
+ !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*/i) ||
146
+ !(string !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,48 @@
1
+ module WordCountAnalyzer
2
+ class Ellipsis
3
+ # Rubular: http://rubular.com/r/i60hCK81fz
4
+ THREE_CONSECUTIVE_REGEX = /\.{3}(?=\s+[A-Z])/
5
+
6
+ # Rubular: http://rubular.com/r/mfdtSeuIf2
7
+ FOUR_CONSECUTIVE_REGEX = /(?<=[^\.])\.{3}\.(?=[^\.])/
8
+
9
+ # Rubular: http://rubular.com/r/YBG1dIHTRu
10
+ THREE_SPACE_REGEX = /(\s\.){3}\s/
11
+
12
+ # Rubular: http://rubular.com/r/2VvZ8wRbd8
13
+ FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
14
+
15
+ OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}[^\.]/
16
+
17
+ UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
18
+
19
+ attr_reader :string
20
+ def initialize(string:)
21
+ @string = string
22
+ end
23
+
24
+ def includes_ellipsis?
25
+ !(string !~ THREE_CONSECUTIVE_REGEX) ||
26
+ !(string !~ FOUR_CONSECUTIVE_REGEX) ||
27
+ !(string !~ THREE_SPACE_REGEX) ||
28
+ !(string !~ FOUR_SPACE_REGEX) ||
29
+ !(string !~ OTHER_THREE_PERIOD_REGEX) ||
30
+ !(string !~ UNICODE_ELLIPSIS)
31
+ end
32
+
33
+ def replace
34
+ string.gsub(THREE_CONSECUTIVE_REGEX, ' wseword ')
35
+ .gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
36
+ .gsub(THREE_SPACE_REGEX, ' wseword ')
37
+ .gsub(FOUR_SPACE_REGEX, ' wseword ')
38
+ .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
39
+ .gsub(UNICODE_ELLIPSIS, ' wseword ')
40
+ end
41
+
42
+ def occurences
43
+ count = 0
44
+ replace.split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
45
+ count
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,53 @@
1
+ module WordCountAnalyzer
2
+ class Hyperlink
3
+ NON_HYPERLINK_REGEX = /\A\w+:$/
4
+
5
+ # Rubular: http://rubular.com/r/fXa4lp0gfS
6
+ HYPERLINK_REGEX = /(http|https|www)(\.|:)/
7
+
8
+ attr_reader :string
9
+ def initialize(string:)
10
+ @string = string
11
+ end
12
+
13
+ def hyperlink?
14
+ !(string !~ URI.regexp) && string !~ NON_HYPERLINK_REGEX && !(string !~ HYPERLINK_REGEX)
15
+ end
16
+
17
+ def occurences
18
+ counter = 0
19
+ string.scan(URI.regexp).each do |link|
20
+ counter += 1 if link.compact.size > 1
21
+ end
22
+ counter
23
+ end
24
+
25
+ def replace
26
+ new_string = string.dup
27
+ string.split(/\s+/).each do |token|
28
+ if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
29
+ new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
30
+ elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
31
+ new_string = new_string.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
32
+ end
33
+ end
34
+ new_string
35
+ end
36
+
37
+ def replace_split_at_period
38
+ new_string = string.dup
39
+ string.split(/\s+/).each do |token|
40
+ if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
41
+ new_string.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
42
+ match.split('.').join(' ')
43
+ end
44
+ elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
45
+ new_string.gsub!(/#{Regexp.escape(token)}/) do |match|
46
+ match.split('.').join(' ')
47
+ end
48
+ end
49
+ end
50
+ new_string
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,23 @@
1
+ module WordCountAnalyzer
2
+ class HyphenatedWord
3
+ # Rubular: http://rubular.com/r/RjZ7qi0uFf
4
+ DASHED_LINE_REGEX = /\s-{2,}(\s|$)|\A-{2,}(\s|$)/
5
+
6
+ attr_reader :token
7
+ def initialize(token:)
8
+ @token = token.gsub(DASHED_LINE_REGEX, '')
9
+ end
10
+
11
+ def hyphenated_word?
12
+ (token.include?('-') || token.include?('﹘')) && !WordCountAnalyzer::Hyperlink.new(string: token).hyperlink?
13
+ end
14
+
15
+ def count_as_multiple
16
+ token.split(/[﹘,-]/).length
17
+ end
18
+
19
+ def replace
20
+ token.split(/[﹘,-]/).join(' ')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ module WordCountAnalyzer
2
+ class Number
3
+ # Rubular: http://rubular.com/r/OGj82uEu8d
4
+ NUMBER_REGEX = /(?<=\A)\D?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s)\D?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)/
5
+
6
+ attr_reader :string
7
+ def initialize(string:)
8
+ @string = WordCountAnalyzer::NumberedList.new(string: WordCountAnalyzer::Date.new(string: string).replace).replace
9
+ end
10
+
11
+ def includes_number?
12
+ !(string !~ NUMBER_REGEX)
13
+ end
14
+
15
+ def replace
16
+ string.gsub(NUMBER_REGEX, ' wsnumword ')
17
+ end
18
+
19
+ def occurences
20
+ replace.scan(/wsnumword/).size
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,61 @@
1
+ module WordCountAnalyzer
2
+ class NumberedList
3
+ # Rubular: http://rubular.com/r/RKmRH9Y4oO
4
+ NUMBERED_LIST_REGEX = /(?<=\s)\d{1,2}\.(?=\s)|^\d{1,2}\.(?=\s)|(?<=\s)\d{1,2}\.\)|^\d{1,2}\.\)/
5
+
6
+ attr_reader :string
7
+ def initialize(string:)
8
+ @string = string
9
+ end
10
+
11
+ def includes_numbered_list?
12
+ !(string !~ NUMBERED_LIST_REGEX) && has_at_least_two_items?
13
+ end
14
+
15
+ def replace
16
+ new_string = string.dup
17
+ list_array = string.scan(NUMBERED_LIST_REGEX).map(&:to_i)
18
+ skips = 0
19
+ list_array.each_with_index do |a, i|
20
+ if (a + 1).eql?(list_array[i + 1]) ||
21
+ (a - 1).eql?(list_array[i - 1]) ||
22
+ (a.eql?(0) && list_array[i - 1].eql?(9)) ||
23
+ (a.eql?(9) && list_array[i + 1].eql?(0))
24
+ new_string.gsub!(NUMBERED_LIST_REGEX).with_index do |match, index|
25
+ if i.eql?(index + (i - skips)) && match.chomp('.').eql?(a.to_s)
26
+ ''
27
+ else
28
+ match
29
+ end
30
+ end
31
+ else
32
+ skips +=1
33
+ end
34
+ end
35
+ new_string
36
+ end
37
+
38
+ def occurences
39
+ count_list_items_in_array
40
+ end
41
+
42
+ private
43
+
44
+ def has_at_least_two_items?
45
+ count_list_items_in_array >= 2
46
+ end
47
+
48
+ def count_list_items_in_array
49
+ list_array = string.scan(NUMBERED_LIST_REGEX).map(&:to_i)
50
+ counter = 0
51
+ list_array.each_with_index do |a, i|
52
+ next unless (a + 1).eql?(list_array[i + 1]) ||
53
+ (a - 1).eql?(list_array[i - 1]) ||
54
+ (a.eql?(0) && list_array[i - 1].eql?(9)) ||
55
+ (a.eql?(9) && list_array[i + 1].eql?(0))
56
+ counter += 1
57
+ end
58
+ counter
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,52 @@
1
+ module WordCountAnalyzer
2
+ class Punctuation
3
+ # Rubular: http://rubular.com/r/ZVBsZVkiqC
4
+ DOTTED_LINE_REGEX = /…{2,}|\.{5,}/
5
+
6
+ # Rubular: http://rubular.com/r/RjZ7qi0uFf
7
+ DASHED_LINE_REGEX = /(?<=\s)-{2,}(\s|$)|\A-{2,}(?=(\s|$))/
8
+
9
+ # Rubular: http://rubular.com/r/hNofimZwdh
10
+ UNDERSCORE_REGEX = /(?<=\s)_{2,}(\s|$)|\A_{2,}(?=(\s|$))/
11
+
12
+ # Rubular: http://rubular.com/r/FexKxGUuIe
13
+ STRAY_PUNCTUATION_REGEX = /(?<=\s)[[:punct:]](?=(\s|$))|(?<=\s)\|(?=(\s|$))/
14
+
15
+ attr_reader :string
16
+ def initialize(string:)
17
+ @string = string
18
+ end
19
+
20
+ def dotted_line_ocurrances
21
+ string.scan(DOTTED_LINE_REGEX).size
22
+ end
23
+
24
+ def dashed_line_ocurrances
25
+ string.scan(DASHED_LINE_REGEX).size
26
+ end
27
+
28
+ def underscore_ocurrances
29
+ string.scan(UNDERSCORE_REGEX).size
30
+ end
31
+
32
+ def stray_punctuation_occurences
33
+ string.scan(STRAY_PUNCTUATION_REGEX).size
34
+ end
35
+
36
+ def replace_dotted_line
37
+ string.gsub(DOTTED_LINE_REGEX, '')
38
+ end
39
+
40
+ def replace_dashed_line
41
+ string.gsub(DASHED_LINE_REGEX, '')
42
+ end
43
+
44
+ def replace_underscore
45
+ string.gsub(UNDERSCORE_REGEX, '')
46
+ end
47
+
48
+ def replace_stray_punctuation
49
+ string.gsub(STRAY_PUNCTUATION_REGEX, '')
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,84 @@
1
+ module WordCountAnalyzer
2
+ class Slash
3
+ # Rubular: http://rubular.com/r/AqvcH29sgg
4
+ FORWARD_SLASH_REGEX = /(?<=\s)(\S+\/)+\S+|(?<=\A)(\S+\/)+\S+/
5
+
6
+ # Rubular: http://rubular.com/r/tuFWtdMs4G
7
+ BACKSLASH_REGEX = /\S+\\\S+/
8
+
9
+ attr_reader :string, :processed_string, :date, :xhtml, :hyperlink
10
+ def initialize(string:, **args)
11
+ @string = string
12
+ @date = args[:date] || nil
13
+ @xhtml = args[:xhtml] || nil
14
+ @hyperlink = args[:hyperlink] || nil
15
+ if date.eql?('no_special_treatment')
16
+ if xhtml.eql?('keep')
17
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
18
+ @processed_string = string
19
+ else
20
+ @processed_string = WordCountAnalyzer::Hyperlink.new(string: string).replace
21
+ end
22
+ else
23
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
24
+ @processed_string = WordCountAnalyzer::Xhtml.new(string: string).replace
25
+ else
26
+ @processed_string = WordCountAnalyzer::Xhtml.new(string: WordCountAnalyzer::Hyperlink.new(string: string).replace).replace
27
+ end
28
+ end
29
+ else
30
+ if xhtml.eql?('keep')
31
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
32
+ @processed_string = WordCountAnalyzer::Date.new(string: string).replace
33
+ else
34
+ @processed_string = WordCountAnalyzer::Date.new(string: WordCountAnalyzer::Hyperlink.new(string: string).replace).replace
35
+ end
36
+ else
37
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
38
+ @processed_string = WordCountAnalyzer::Date.new(string: WordCountAnalyzer::Xhtml.new(string: string).replace).replace
39
+ else
40
+ @processed_string = WordCountAnalyzer::Date.new(string: WordCountAnalyzer::Xhtml.new(string: WordCountAnalyzer::Hyperlink.new(string: string).replace).replace).replace
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ def includes_forward_slash?
47
+ !(processed_string !~ FORWARD_SLASH_REGEX)
48
+ end
49
+
50
+ def includes_backslash?
51
+ !(processed_string !~ BACKSLASH_REGEX)
52
+ end
53
+
54
+ def forward_slash_occurences
55
+ processed_string.scan(FORWARD_SLASH_REGEX).size
56
+ end
57
+
58
+ def replace_forward_slashes
59
+ return processed_string if processed_string !~ FORWARD_SLASH_REGEX
60
+ processed_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
61
+ match.split(/\/+/).join(' ')
62
+ end
63
+ end
64
+
65
+ def replace_forward_slashes_except_dates
66
+ return processed_string if processed_string !~ FORWARD_SLASH_REGEX
67
+ except_date_string = WordCountAnalyzer::Date.new(string: processed_string).replace_number_only_date
68
+ except_date_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
69
+ match.split(/\/+/).join(' ')
70
+ end
71
+ end
72
+
73
+ def backslash_occurences
74
+ processed_string.scan(BACKSLASH_REGEX).size
75
+ end
76
+
77
+ def replace_backslashes
78
+ return processed_string if processed_string !~ BACKSLASH_REGEX
79
+ processed_string.gsub!(BACKSLASH_REGEX).each do |match|
80
+ ' word ' * match.split(/\\+/).length
81
+ end
82
+ end
83
+ end
84
+ end