word_count_analyzer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +554 -0
  8. data/Rakefile +2 -0
  9. data/lib/word_count_analyzer.rb +14 -0
  10. data/lib/word_count_analyzer/analyzer.rb +34 -0
  11. data/lib/word_count_analyzer/contraction.rb +176 -0
  12. data/lib/word_count_analyzer/counter.rb +230 -0
  13. data/lib/word_count_analyzer/date.rb +149 -0
  14. data/lib/word_count_analyzer/ellipsis.rb +48 -0
  15. data/lib/word_count_analyzer/hyperlink.rb +53 -0
  16. data/lib/word_count_analyzer/hyphenated_word.rb +23 -0
  17. data/lib/word_count_analyzer/number.rb +23 -0
  18. data/lib/word_count_analyzer/numbered_list.rb +61 -0
  19. data/lib/word_count_analyzer/punctuation.rb +52 -0
  20. data/lib/word_count_analyzer/slash.rb +84 -0
  21. data/lib/word_count_analyzer/version.rb +3 -0
  22. data/lib/word_count_analyzer/xhtml.rb +26 -0
  23. data/spec/spec_helper.rb +1 -0
  24. data/spec/word_count_analyzer/analyzer_spec.rb +11 -0
  25. data/spec/word_count_analyzer/contraction_spec.rb +124 -0
  26. data/spec/word_count_analyzer/counter_spec.rb +647 -0
  27. data/spec/word_count_analyzer/date_spec.rb +257 -0
  28. data/spec/word_count_analyzer/ellipsis_spec.rb +69 -0
  29. data/spec/word_count_analyzer/hyperlink_spec.rb +77 -0
  30. data/spec/word_count_analyzer/hyphenated_word_spec.rb +81 -0
  31. data/spec/word_count_analyzer/number_spec.rb +63 -0
  32. data/spec/word_count_analyzer/numbered_list_spec.rb +69 -0
  33. data/spec/word_count_analyzer/punctuation_spec.rb +91 -0
  34. data/spec/word_count_analyzer/slash_spec.rb +105 -0
  35. data/spec/word_count_analyzer/xhtml_spec.rb +65 -0
  36. data/word_count_analyzer.gemspec +26 -0
  37. metadata +153 -0
@@ -0,0 +1,149 @@
1
+ module WordCountAnalyzer
2
+ class Date
3
+ DOW = %w(monday tuesday wednesday thursday friday saturday sunday)
4
+ DOW_ABBR = %w(mon tu tue tues wed th thu thur thurs fri sat sun)
5
+ MONTHS = %w(january february march april may june july august september october november december)
6
+ MONTH_ABBR = %w(jan feb mar apr jun jul aug sep sept oct nov dec)
7
+ # Rubular: http://rubular.com/r/73CZ2HU0q6
8
+ DMY_MDY_REGEX = /(\d{1,2}(\/|\.|-)){2}\d{4}\.?/
9
+
10
+ # Rubular: http://rubular.com/r/GWbuWXw4t0
11
+ YMD_YDM_REGEX = /\d{4}(\/|\.|-)(\d{1,2}(\/|\.|-)){2}\.?/
12
+
13
+ # Rubular: http://rubular.com/r/SRZ27XNlvR
14
+ DIGIT_ONLY_YEAR_FIRST_REGEX = /[12]\d{7}\D\.?/
15
+
16
+ # Rubular: http://rubular.com/r/mpVSeaKwdY
17
+ DIGIT_ONLY_YEAR_LAST_REGEX = /\d{4}[12]\d{3}\D\.?/
18
+
19
+ attr_reader :string
20
+ def initialize(string:)
21
+ @string = string
22
+ end
23
+
24
+ def includes_date?
25
+ long_date || number_only_date
26
+ end
27
+
28
+ def replace
29
+ new_string = string.dup
30
+ counter = 0
31
+ DOW_ABBR.each do |day|
32
+ counter +=1 if string.include?('day')
33
+ end
34
+ if counter > 0
35
+ DOW_ABBR.each do |day|
36
+ MONTHS.each do |month|
37
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
38
+ end
39
+ MONTH_ABBR.each do |month|
40
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
41
+ end
42
+ end
43
+ DOW.each do |day|
44
+ MONTHS.each do |month|
45
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
46
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
47
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
48
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
49
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
50
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
51
+ end
52
+ MONTH_ABBR.each do |month|
53
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
54
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
55
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
56
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
57
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
58
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
59
+ end
60
+ end
61
+ else
62
+ DOW.each do |day|
63
+ MONTHS.each do |month|
64
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
65
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
66
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
67
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
68
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
69
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
70
+ end
71
+ MONTH_ABBR.each do |month|
72
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
73
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
74
+ .gsub(/\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*\.?/i, ' wsdateword ')
75
+ .gsub(/\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+\.?/i, ' wsdateword ')
76
+ .gsub(/#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*\.?/i, ' wsdateword ')
77
+ .gsub(/\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})\.?/i, ' wsdateword ')
78
+ end
79
+ end
80
+ DOW_ABBR.each do |day|
81
+ MONTHS.each do |month|
82
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
83
+ end
84
+ MONTH_ABBR.each do |month|
85
+ new_string = new_string.gsub(/#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}\.?/i, ' wsdateword ')
86
+ end
87
+ end
88
+ end
89
+ new_string = new_string.gsub(DMY_MDY_REGEX, ' wsdateword ')
90
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
91
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
92
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
93
+ end
94
+
95
+ def occurences
96
+ replace.scan(/wsdateword/).size
97
+ end
98
+
99
+ def replace_number_only_date
100
+ string.gsub(DMY_MDY_REGEX, ' wsdateword ')
101
+ .gsub(YMD_YDM_REGEX, ' wsdateword ')
102
+ .gsub(DIGIT_ONLY_YEAR_FIRST_REGEX, ' wsdateword ')
103
+ .gsub(DIGIT_ONLY_YEAR_LAST_REGEX, ' wsdateword ')
104
+ end
105
+
106
+ private
107
+
108
+ def long_date
109
+ match_found = false
110
+ DOW.each do |day|
111
+ MONTHS.each do |month|
112
+ break if match_found
113
+ match_found = check_for_matches(day, month)
114
+ end
115
+ MONTH_ABBR.each do |month|
116
+ break if match_found
117
+ match_found = check_for_matches(day, month)
118
+ end
119
+ end
120
+ DOW_ABBR.each do |day|
121
+ MONTHS.each do |month|
122
+ break if match_found
123
+ match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}\s\d+(rd|th)*(,)*\s\d{4}/i)
124
+ end
125
+ MONTH_ABBR.each do |month|
126
+ break if match_found
127
+ match_found = !(string !~ /#{Regexp.escape(day)}(\.)*(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}/i)
128
+ end
129
+ end
130
+ match_found
131
+ end
132
+
133
+ def number_only_date
134
+ !(string !~ DMY_MDY_REGEX) ||
135
+ !(string !~ YMD_YDM_REGEX) ||
136
+ !(string !~ DIGIT_ONLY_YEAR_FIRST_REGEX) ||
137
+ !(string !~ DIGIT_ONLY_YEAR_LAST_REGEX)
138
+ end
139
+
140
+ def check_for_matches(day, month)
141
+ !(string !~ /#{Regexp.escape(day)}(,)*\s#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}/i) ||
142
+ !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*(,)*\s\d{4}/i) ||
143
+ !(string !~ /\d{4}\.*\s#{Regexp.escape(month)}\s\d+(rd|th)*/i) ||
144
+ !(string !~ /\d{4}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*\d+/i) ||
145
+ !(string !~ /#{Regexp.escape(month)}(\.)*\s\d+(rd|th)*/i) ||
146
+ !(string !~ /\d{2}(\.|-|\/)*#{Regexp.escape(month)}(\.|-|\/)*(\d{4}|\d{2})/i)
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,48 @@
1
+ module WordCountAnalyzer
2
+ class Ellipsis
3
+ # Rubular: http://rubular.com/r/i60hCK81fz
4
+ THREE_CONSECUTIVE_REGEX = /\.{3}(?=\s+[A-Z])/
5
+
6
+ # Rubular: http://rubular.com/r/mfdtSeuIf2
7
+ FOUR_CONSECUTIVE_REGEX = /(?<=[^\.])\.{3}\.(?=[^\.])/
8
+
9
+ # Rubular: http://rubular.com/r/YBG1dIHTRu
10
+ THREE_SPACE_REGEX = /(\s\.){3}\s/
11
+
12
+ # Rubular: http://rubular.com/r/2VvZ8wRbd8
13
+ FOUR_SPACE_REGEX = /(?<=[a-z])(\.\s){3}\.(\z|$|\n)/
14
+
15
+ OTHER_THREE_PERIOD_REGEX = /[^\.]\.{3}[^\.]/
16
+
17
+ UNICODE_ELLIPSIS = /(?<=[^…])…{1}(?=[^…])/
18
+
19
+ attr_reader :string
20
+ def initialize(string:)
21
+ @string = string
22
+ end
23
+
24
+ def includes_ellipsis?
25
+ !(string !~ THREE_CONSECUTIVE_REGEX) ||
26
+ !(string !~ FOUR_CONSECUTIVE_REGEX) ||
27
+ !(string !~ THREE_SPACE_REGEX) ||
28
+ !(string !~ FOUR_SPACE_REGEX) ||
29
+ !(string !~ OTHER_THREE_PERIOD_REGEX) ||
30
+ !(string !~ UNICODE_ELLIPSIS)
31
+ end
32
+
33
+ def replace
34
+ string.gsub(THREE_CONSECUTIVE_REGEX, ' wseword ')
35
+ .gsub(FOUR_CONSECUTIVE_REGEX, ' wseword ')
36
+ .gsub(THREE_SPACE_REGEX, ' wseword ')
37
+ .gsub(FOUR_SPACE_REGEX, ' wseword ')
38
+ .gsub(OTHER_THREE_PERIOD_REGEX, ' wseword ')
39
+ .gsub(UNICODE_ELLIPSIS, ' wseword ')
40
+ end
41
+
42
+ def occurences
43
+ count = 0
44
+ replace.split(' ').map { |token| count += 1 if token.strip.eql?('wseword') }
45
+ count
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,53 @@
1
+ module WordCountAnalyzer
2
+ class Hyperlink
3
+ NON_HYPERLINK_REGEX = /\A\w+:$/
4
+
5
+ # Rubular: http://rubular.com/r/fXa4lp0gfS
6
+ HYPERLINK_REGEX = /(http|https|www)(\.|:)/
7
+
8
+ attr_reader :string
9
+ def initialize(string:)
10
+ @string = string
11
+ end
12
+
13
+ def hyperlink?
14
+ !(string !~ URI.regexp) && string !~ NON_HYPERLINK_REGEX && !(string !~ HYPERLINK_REGEX)
15
+ end
16
+
17
+ def occurences
18
+ counter = 0
19
+ string.scan(URI.regexp).each do |link|
20
+ counter += 1 if link.compact.size > 1
21
+ end
22
+ counter
23
+ end
24
+
25
+ def replace
26
+ new_string = string.dup
27
+ string.split(/\s+/).each do |token|
28
+ if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
29
+ new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0])}/, ' wslinkword ')
30
+ elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
31
+ new_string = new_string.gsub(/#{Regexp.escape(token)}/, ' wslinkword ')
32
+ end
33
+ end
34
+ new_string
35
+ end
36
+
37
+ def replace_split_at_period
38
+ new_string = string.dup
39
+ string.split(/\s+/).each do |token|
40
+ if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
41
+ new_string.gsub!(/#{Regexp.escape(token.split('">')[0])}/) do |match|
42
+ match.split('.').join(' ')
43
+ end
44
+ elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
45
+ new_string.gsub!(/#{Regexp.escape(token)}/) do |match|
46
+ match.split('.').join(' ')
47
+ end
48
+ end
49
+ end
50
+ new_string
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,23 @@
1
+ module WordCountAnalyzer
2
+ class HyphenatedWord
3
+ # Rubular: http://rubular.com/r/RjZ7qi0uFf
4
+ DASHED_LINE_REGEX = /\s-{2,}(\s|$)|\A-{2,}(\s|$)/
5
+
6
+ attr_reader :token
7
+ def initialize(token:)
8
+ @token = token.gsub(DASHED_LINE_REGEX, '')
9
+ end
10
+
11
+ def hyphenated_word?
12
+ (token.include?('-') || token.include?('﹘')) && !WordCountAnalyzer::Hyperlink.new(string: token).hyperlink?
13
+ end
14
+
15
+ def count_as_multiple
16
+ token.split(/[﹘,-]/).length
17
+ end
18
+
19
+ def replace
20
+ token.split(/[﹘,-]/).join(' ')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ module WordCountAnalyzer
2
+ class Number
3
+ # Rubular: http://rubular.com/r/OGj82uEu8d
4
+ NUMBER_REGEX = /(?<=\A)\D?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s)\D?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)/
5
+
6
+ attr_reader :string
7
+ def initialize(string:)
8
+ @string = WordCountAnalyzer::NumberedList.new(string: WordCountAnalyzer::Date.new(string: string).replace).replace
9
+ end
10
+
11
+ def includes_number?
12
+ !(string !~ NUMBER_REGEX)
13
+ end
14
+
15
+ def replace
16
+ string.gsub(NUMBER_REGEX, ' wsnumword ')
17
+ end
18
+
19
+ def occurences
20
+ replace.scan(/wsnumword/).size
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,61 @@
1
+ module WordCountAnalyzer
2
+ class NumberedList
3
+ # Rubular: http://rubular.com/r/RKmRH9Y4oO
4
+ NUMBERED_LIST_REGEX = /(?<=\s)\d{1,2}\.(?=\s)|^\d{1,2}\.(?=\s)|(?<=\s)\d{1,2}\.\)|^\d{1,2}\.\)/
5
+
6
+ attr_reader :string
7
+ def initialize(string:)
8
+ @string = string
9
+ end
10
+
11
+ def includes_numbered_list?
12
+ !(string !~ NUMBERED_LIST_REGEX) && has_at_least_two_items?
13
+ end
14
+
15
+ def replace
16
+ new_string = string.dup
17
+ list_array = string.scan(NUMBERED_LIST_REGEX).map(&:to_i)
18
+ skips = 0
19
+ list_array.each_with_index do |a, i|
20
+ if (a + 1).eql?(list_array[i + 1]) ||
21
+ (a - 1).eql?(list_array[i - 1]) ||
22
+ (a.eql?(0) && list_array[i - 1].eql?(9)) ||
23
+ (a.eql?(9) && list_array[i + 1].eql?(0))
24
+ new_string.gsub!(NUMBERED_LIST_REGEX).with_index do |match, index|
25
+ if i.eql?(index + (i - skips)) && match.chomp('.').eql?(a.to_s)
26
+ ''
27
+ else
28
+ match
29
+ end
30
+ end
31
+ else
32
+ skips +=1
33
+ end
34
+ end
35
+ new_string
36
+ end
37
+
38
+ def occurences
39
+ count_list_items_in_array
40
+ end
41
+
42
+ private
43
+
44
+ def has_at_least_two_items?
45
+ count_list_items_in_array >= 2
46
+ end
47
+
48
+ def count_list_items_in_array
49
+ list_array = string.scan(NUMBERED_LIST_REGEX).map(&:to_i)
50
+ counter = 0
51
+ list_array.each_with_index do |a, i|
52
+ next unless (a + 1).eql?(list_array[i + 1]) ||
53
+ (a - 1).eql?(list_array[i - 1]) ||
54
+ (a.eql?(0) && list_array[i - 1].eql?(9)) ||
55
+ (a.eql?(9) && list_array[i + 1].eql?(0))
56
+ counter += 1
57
+ end
58
+ counter
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,52 @@
1
+ module WordCountAnalyzer
2
+ class Punctuation
3
+ # Rubular: http://rubular.com/r/ZVBsZVkiqC
4
+ DOTTED_LINE_REGEX = /…{2,}|\.{5,}/
5
+
6
+ # Rubular: http://rubular.com/r/RjZ7qi0uFf
7
+ DASHED_LINE_REGEX = /(?<=\s)-{2,}(\s|$)|\A-{2,}(?=(\s|$))/
8
+
9
+ # Rubular: http://rubular.com/r/hNofimZwdh
10
+ UNDERSCORE_REGEX = /(?<=\s)_{2,}(\s|$)|\A_{2,}(?=(\s|$))/
11
+
12
+ # Rubular: http://rubular.com/r/FexKxGUuIe
13
+ STRAY_PUNCTUATION_REGEX = /(?<=\s)[[:punct:]](?=(\s|$))|(?<=\s)\|(?=(\s|$))/
14
+
15
+ attr_reader :string
16
+ def initialize(string:)
17
+ @string = string
18
+ end
19
+
20
+ def dotted_line_ocurrances
21
+ string.scan(DOTTED_LINE_REGEX).size
22
+ end
23
+
24
+ def dashed_line_ocurrances
25
+ string.scan(DASHED_LINE_REGEX).size
26
+ end
27
+
28
+ def underscore_ocurrances
29
+ string.scan(UNDERSCORE_REGEX).size
30
+ end
31
+
32
+ def stray_punctuation_occurences
33
+ string.scan(STRAY_PUNCTUATION_REGEX).size
34
+ end
35
+
36
+ def replace_dotted_line
37
+ string.gsub(DOTTED_LINE_REGEX, '')
38
+ end
39
+
40
+ def replace_dashed_line
41
+ string.gsub(DASHED_LINE_REGEX, '')
42
+ end
43
+
44
+ def replace_underscore
45
+ string.gsub(UNDERSCORE_REGEX, '')
46
+ end
47
+
48
+ def replace_stray_punctuation
49
+ string.gsub(STRAY_PUNCTUATION_REGEX, '')
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,84 @@
1
+ module WordCountAnalyzer
2
+ class Slash
3
+ # Rubular: http://rubular.com/r/AqvcH29sgg
4
+ FORWARD_SLASH_REGEX = /(?<=\s)(\S+\/)+\S+|(?<=\A)(\S+\/)+\S+/
5
+
6
+ # Rubular: http://rubular.com/r/tuFWtdMs4G
7
+ BACKSLASH_REGEX = /\S+\\\S+/
8
+
9
+ attr_reader :string, :processed_string, :date, :xhtml, :hyperlink
10
+ def initialize(string:, **args)
11
+ @string = string
12
+ @date = args[:date] || nil
13
+ @xhtml = args[:xhtml] || nil
14
+ @hyperlink = args[:hyperlink] || nil
15
+ if date.eql?('no_special_treatment')
16
+ if xhtml.eql?('keep')
17
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
18
+ @processed_string = string
19
+ else
20
+ @processed_string = WordCountAnalyzer::Hyperlink.new(string: string).replace
21
+ end
22
+ else
23
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
24
+ @processed_string = WordCountAnalyzer::Xhtml.new(string: string).replace
25
+ else
26
+ @processed_string = WordCountAnalyzer::Xhtml.new(string: WordCountAnalyzer::Hyperlink.new(string: string).replace).replace
27
+ end
28
+ end
29
+ else
30
+ if xhtml.eql?('keep')
31
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
32
+ @processed_string = WordCountAnalyzer::Date.new(string: string).replace
33
+ else
34
+ @processed_string = WordCountAnalyzer::Date.new(string: WordCountAnalyzer::Hyperlink.new(string: string).replace).replace
35
+ end
36
+ else
37
+ if hyperlink.eql?('no_special_treatment') || hyperlink.eql?('split_at_period')
38
+ @processed_string = WordCountAnalyzer::Date.new(string: WordCountAnalyzer::Xhtml.new(string: string).replace).replace
39
+ else
40
+ @processed_string = WordCountAnalyzer::Date.new(string: WordCountAnalyzer::Xhtml.new(string: WordCountAnalyzer::Hyperlink.new(string: string).replace).replace).replace
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ def includes_forward_slash?
47
+ !(processed_string !~ FORWARD_SLASH_REGEX)
48
+ end
49
+
50
+ def includes_backslash?
51
+ !(processed_string !~ BACKSLASH_REGEX)
52
+ end
53
+
54
+ def forward_slash_occurences
55
+ processed_string.scan(FORWARD_SLASH_REGEX).size
56
+ end
57
+
58
+ def replace_forward_slashes
59
+ return processed_string if processed_string !~ FORWARD_SLASH_REGEX
60
+ processed_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
61
+ match.split(/\/+/).join(' ')
62
+ end
63
+ end
64
+
65
+ def replace_forward_slashes_except_dates
66
+ return processed_string if processed_string !~ FORWARD_SLASH_REGEX
67
+ except_date_string = WordCountAnalyzer::Date.new(string: processed_string).replace_number_only_date
68
+ except_date_string.gsub!(FORWARD_SLASH_REGEX).each do |match|
69
+ match.split(/\/+/).join(' ')
70
+ end
71
+ end
72
+
73
+ def backslash_occurences
74
+ processed_string.scan(BACKSLASH_REGEX).size
75
+ end
76
+
77
+ def replace_backslashes
78
+ return processed_string if processed_string !~ BACKSLASH_REGEX
79
+ processed_string.gsub!(BACKSLASH_REGEX).each do |match|
80
+ ' word ' * match.split(/\\+/).length
81
+ end
82
+ end
83
+ end
84
+ end