crm_formatter 1.0.7.pre.rc.1 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ url,act_name,street,city,state,zip,phone
2
+ http://www.courtesyfordsales.com,Courtesy Ford,__����__����____1410 West Pine Street Hattiesburg,Wexford,MS,39401,512-555-1212
@@ -1,38 +1,46 @@
1
- module CRMFormatter
2
- class Phone
1
+ # frozen_string_literal: false
3
2
 
4
- ## Checks every phone number in table to verify that it meets phone criteria, then calls format_phone method to format Valid results. Otherwise destroys Invalid phone fields and associations.
3
+ module CrmFormatter
4
+ class Phone
5
+ ## Checks every phone number in table to verify that it meets phone criteria, then calls format_phone method to wrap Valid results. Otherwise destroys Invalid phone fields and associations.
5
6
 
6
- # Call: Formatter.new.validate_phone(phone)
7
+ # Call: Wrap.new.validate_phone(phone)
7
8
  def validate_phone(phone)
8
- phone_hsh = { phone: phone, valid_phone: nil, phone_edit: false }
9
- if phone.present?
10
- phone = phone&.gsub(/\s/, ' ')&.strip
11
- reg = Regexp.new("[(]?[0-9]{3}[ ]?[)-.]?[ ]?[0-9]{3}[ ]?[-. ][ ]?[0-9]{4}")
12
- return phone_hsh if phone.first == "0" || phone.include?("(0") || !reg.match(phone)
13
- phone_hsh[:valid_phone] = format_phone(phone)
14
- phone_hsh[:phone_edit] = phone_hsh[:phone] != phone_hsh[:valid_phone]
15
- end
9
+ phone_hsh = { phone_status: nil, phone: phone, phone_f: nil }
10
+ return phone_hsh unless phone.present?
11
+ phone = phone&.gsub(/\s/, ' ')&.strip
12
+ reg = Regexp.new('[(]?[0-9]{3}[ ]?[)-.]?[ ]?[0-9]{3}[ ]?[-. ][ ]?[0-9]{4}')
13
+ phone = nil if phone.first == '0' || phone.include?('(0') || !reg.match(phone)
14
+ phone_hsh[:phone_f] = format_phone(phone) if phone.present?
15
+ phone_hsh = check_phone_status(phone_hsh)
16
16
  phone_hsh
17
17
  end
18
18
 
19
+ ####### COMPARE ORIGINAL AND FORMATTED PHONE ######
20
+ def check_phone_status(hsh)
21
+ phone = hsh[:phone]
22
+ phone_f = hsh[:phone_f]
23
+ status = 'invalid'
24
+ status = phone != phone_f ? 'formatted' : 'unchanged' if phone && phone_f
25
+ hsh[:phone_status] = status if status.present?
26
+ hsh
27
+ end
28
+
19
29
  #################################
20
30
  ## FORMATS PHONE AS: (000) 000-0000
21
31
  ## Assumes phone is legitimate, then formats. Not designed to detect Valid phone number.
22
32
 
23
- # Call: Formatter.new.format_phone(phone)
33
+ # Call: Wrap.new.format_phone(phone)
24
34
  def format_phone(phone)
25
- regex = Regexp.new("[A-Z]+[a-z]+")
26
- if !phone.blank? && (phone != "N/A" || phone != "0") && !regex.match(phone)
27
- phone_stripped = phone.gsub(/[^0-9]/, "")
28
- (phone_stripped && phone_stripped[0] == "1") ? phone_step2 = phone_stripped[1..-1] : phone_step2 = phone_stripped
29
-
35
+ regex = Regexp.new('[A-Z]+[a-z]+')
36
+ if !phone.blank? && (phone != 'N/A' || phone != '0') && !regex.match(phone)
37
+ phone_stripped = phone.gsub(/[^0-9]/, '')
38
+ phone_step2 = phone_stripped && phone_stripped[0] == '1' ? phone_stripped[1..-1] : phone_stripped
30
39
  final_phone = !(phone_step2 && phone_step2.length < 10) ? "(#{phone_step2[0..2]}) #{(phone_step2[3..5])}-#{(phone_step2[6..9])}" : phone
31
40
  else
32
41
  final_phone = nil
33
42
  end
34
- return final_phone
43
+ final_phone
35
44
  end
36
-
37
45
  end
38
46
  end
@@ -1,3 +1,5 @@
1
- module CRMFormatter
2
- VERSION = "1.0.7-rc.1"
1
+ # frozen_string_literal: false
2
+
3
+ module CrmFormatter
4
+ VERSION = "2.0"
3
5
  end
@@ -1,64 +1,59 @@
1
+ # frozen_string_literal: false
2
+
3
+ # require 'rubygems'
4
+ # require 'active_support'
1
5
  require 'csv'
2
6
 
3
- module CRMFormatter
7
+ # StartCrm.run_webs
8
+ module CrmFormatter
4
9
  class Web
5
-
6
- def initialize(args={})
7
- @empty_oa = args.empty?
8
- @pos_urls = args.fetch(:pos_urls, [])
9
- @neg_urls = args.fetch(:neg_urls, [])
10
- @pos_links = args.fetch(:pos_links, [])
11
- @neg_links = args.fetch(:neg_links, [])
12
- @pos_hrefs = args.fetch(:pos_hrefs, [])
13
- @neg_hrefs = args.fetch(:neg_hrefs, [])
14
- @pos_exts = args.fetch(:pos_exts, [])
15
- @neg_exts = args.fetch(:neg_exts, [])
16
- @min_length = args.fetch(:min_length, 2)
17
- @max_length = args.fetch(:max_length, 100)
18
- end
19
-
20
- def banned_symbols
21
- banned_symbols = ["!", "$", "%", "'", "(", ")", "*", "+", ",", "<", ">", "@", "[", "]", "^", "{", "}", "~"]
22
- end
23
-
24
- ##Call: StartCrm.run_webs
25
10
  def format_url(url)
26
11
  prep_result = prep_for_uri(url)
27
12
  url_hash = prep_result[:url_hash]
28
13
  url = prep_result[:url]
29
- url = nil if has_errors(url_hash)
30
-
31
- if url.present?
32
- uri_result = run_uri(url_hash, url)
33
- url_hash = uri_result[:url_hash]
34
- url = uri_result[:url]
35
- (url = nil if has_errors(url_hash)) if url.present?
14
+ url = nil if errors?(url_hash)
15
+
16
+ if url&.present?
17
+ url = normalize_url(url)
18
+ ext_result = validate_extension(url_hash, url)
19
+ url_hash = ext_result[:url_hash]
20
+ url = ext_result[:url]
21
+ (url = nil if errors?(url_hash)) if url.present?
36
22
  end
37
23
 
38
- url_hash[:formatted_url] = url
39
- url_hash = check_reformatted_status(url_hash) if url.present?
24
+ url_hash = consolidate_negs(url_hash)
25
+ url_hash[:url_f] = url
26
+ url_hash = extract_path(url_hash) if url.present?
27
+ url_hash = check_web_status(url_hash)
40
28
  url_hash
41
29
  end
42
30
 
31
+ ### COMPARE ORIGINAL AND FORMATTED URL ###
32
+ def check_web_status(hsh)
33
+ status = 'invalid' if hsh[:web_neg]&.include?('error')
43
34
 
44
- def check_reformatted_status(url_hash)
45
- formatted = url_hash[:formatted_url]
46
- if formatted.present?
47
- url_hash[:is_reformatted] = url_hash[:url_path] != formatted
35
+ if hsh[:url] && hsh[:url_f] && status.nil?
36
+ status = hsh[:url] != hsh[:url_f] ? 'formatted' : 'unchanged'
48
37
  end
49
- url_hash
38
+
39
+ hsh[:web_status] = status if status.present?
40
+ hsh
50
41
  end
51
42
 
43
+ def consolidate_negs(hsh)
44
+ neg = hsh[:web_neg].join(', ')
45
+ hsh[:web_neg] = neg.present? ? neg : nil
46
+ hsh
47
+ end
52
48
 
53
- def has_errors(url_hash)
54
- errors = url_hash[:neg].map { |neg| neg.include?('error') }
49
+ def errors?(url_hash)
50
+ errors = url_hash[:web_neg].map { |web_neg| web_neg.include?('error') }
55
51
  errors.any?
56
52
  end
57
53
 
58
-
59
- ##Call: StartCrm.run_webs
60
54
  def prep_for_uri(url)
61
- url_hash = { is_reformatted: false, url_path: url, formatted_url: nil, neg: [], pos: [] }
55
+ url_hash = { web_status: nil, url: url, url_f: nil, url_path: nil, web_neg: [] }
56
+
62
57
  begin
63
58
  url = url&.split('|')&.first
64
59
  url = url&.split('\\')&.first
@@ -74,236 +69,108 @@ module CRMFormatter
74
69
  url = url[0..-2] if url.present? && url[-1] == '/'
75
70
  end
76
71
 
77
- url = nil if url.present? && banned_symbols.any? {|symb| url&.include?(symb) }
78
-
79
- if url.present?
80
- url_hash = compare_criteria(url_hash, url, 'pos_urls', 'include') if !@empty_oa
81
- url_hash = compare_criteria(url_hash, url, 'neg_urls', 'include') if !@empty_oa
82
- else
83
- url_hash[:neg] << "error: syntax"
84
- url_hash[:formatted_url] = url
72
+ banned_symbols = ['!', '$', '%', "'", '(', ')', '*', '+', ',', '<', '>', '@', '[', ']', '^', '{', '}', '~']
73
+ url = nil if url.present? && banned_symbols.any? { |symb| url&.include?(symb) }
74
+ unless url.present?
75
+ url_hash[:web_neg] << 'error: syntax'
76
+ url_hash[:url_f] = url
85
77
  end
86
-
87
- rescue Exception => e
88
- url_hash[:neg] << "error: #{e}"
89
- url = nil
90
- url_hash
91
- end
92
-
93
- prep_result = { url_hash: url_hash, url: url }
94
- end
95
-
96
-
97
- ##Call: StartCrm.run_webs
98
- def run_uri(url_hash, url)
99
- begin
100
- uri = URI(url)
101
- host_parts = uri.host&.split(".")
102
-
103
- url_hash = compare_criteria(url_hash, host_parts, 'pos_exts', 'equal') if !@empty_oa
104
- url_hash = compare_criteria(url_hash, host_parts, 'neg_exts', 'equal') if !@empty_oa
105
-
106
- host = uri.host
107
- scheme = uri.scheme
108
- url = "#{scheme}://#{host}" if host.present? && scheme.present?
109
- url = "http://#{url}" if url[0..3] != "http"
110
- url = url.gsub("//", "//www.") if !url.include?("www.")
111
- samp_url = convert_to_scheme_host(url)
112
-
113
- url = convert_to_scheme_host(url) if url.present?
114
- url_extens_result = check_url_extens(url_hash, url)
115
- url_hash = url_extens_result[:url_hash]
116
- url = url_extens_result[:url]
117
-
118
- rescue Exception => e
119
- url_hash[:neg] << "error: #{e}"
78
+ rescue StandardError => error
79
+ url_hash[:web_neg] << "error: #{error}"
120
80
  url = nil
121
81
  url_hash
122
82
  end
123
-
124
- uri_result = { url_hash: url_hash, url: url }
125
- end
126
-
127
-
128
- #Source: http://www.iana.org/domains/root/db
129
- #Text: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
130
- def check_url_extens(url_hash, url)
131
- if url.present?
132
- url_extens = URI(url).host&.split(".")[2..-1]
133
- if url_extens.count > 1
134
- file_path = "./lib/crm_formatter/extensions.csv"
135
- extens_list = CSV.read(file_path).flatten
136
- valid_url_extens = extens_list & url_extens
137
-
138
- if valid_url_extens.count != 1
139
- extens_str = valid_url_extens.map { |ext| ".#{ext}" }.join(', ')
140
- url_hash[:neg] << "error: exts.count > 1 [#{extens_str}]"
141
- url = nil
142
- end
143
- end
144
- end
145
-
146
- url_hash[:formatted_url] = url
147
- url_extens_result = {url_hash: url_hash, url: url}
148
- end
149
-
150
-
151
- ## This process, compare_criteria only runs if client OA args were passed at initialization.
152
- ## Results listed in url_hash[:neg]/[:pos], and don't impact or hinder final formatted url.
153
- ## Simply adds more details about user's preferences and criteria for the url are.
154
-
155
- def compare_criteria(hash, target, list_name, include_or_equal)
156
- unless @empty_oa
157
- if list_name.present?
158
- criteria_list = instance_variable_get("@#{list_name}")
159
-
160
- if criteria_list.present?
161
- if target.is_a?(::String)
162
- tars = target.split(', ')
163
- else
164
- tars = target
165
- end
166
-
167
- pn_matches = tars.map do |tar|
168
- if criteria_list.present?
169
- if include_or_equal == 'include'
170
- criteria_list.select { |el| el if tar.include?(el) }.join(', ')
171
- elsif include_or_equal == 'equal'
172
- criteria_list.select { |el| el if tar == el }.join(', ')
173
- end
174
- end
175
- end
176
-
177
- pn_match = pn_matches&.uniq&.sort&.join(', ')
178
- if pn_match.present?
179
- if list_name.include?('neg')
180
- hash[:neg] << "#{list_name}: #{pn_match}"
181
- else
182
- hash[:pos] << "#{list_name}: #{pn_match}"
183
- end
184
- end
185
- end
186
-
83
+ hsh = { url_hash: url_hash, url: url }
84
+ hsh
85
+ end
86
+
87
+ def normalize_url(url)
88
+ return unless url.present?
89
+ uri = URI(url)
90
+ scheme = uri&.scheme
91
+ host = uri&.host
92
+ url = "#{scheme}://#{host}" if host.present? && scheme.present?
93
+ url = "http://#{url}" if url[0..3] != 'http'
94
+
95
+ return unless url.present?
96
+ url.gsub!('//', '//www.') unless url.include?('www.')
97
+ url
98
+ end
99
+
100
+ # Source: http://www.iana.org/domains/root/db
101
+ # Text: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
102
+ def validate_extension(url_hash, url)
103
+ return unless url.present?
104
+ uri_parts = URI(url).host&.split('.')
105
+ url_exts = uri_parts[2..-1]
106
+
107
+ ### Finds Errors
108
+ if url_exts.empty? ## Missing ext.
109
+ err_msg = 'error: ext.none'
110
+ else ## Has ext(s), but need to verify validity and count.
111
+ file_path = './lib/crm_formatter/csv/extensions.csv'
112
+ iana_list = CSV.read(file_path).flatten
113
+ matched_exts = iana_list & url_exts
114
+
115
+ if matched_exts.empty? ## Has ext, but not valid.
116
+ err_msg = "error: ext.invalid [#{url_exts.join(', ')}]"
117
+ elsif matched_exts.count > 1 ## Has too many valid exts, Limit 1.
118
+ err_msg = "error: ext.valid > 1 [#{matched_exts.join(', ')}]"
187
119
  end
188
120
  end
189
-
190
- hash
191
- end
192
-
193
- ###### Supporting Methods Below #######
194
-
195
- def extract_link(url_path)
196
- url_hash = format_url(url_path)
197
- url = url_hash[:formatted_url]
198
- link = url_path
199
- link_hsh = {url_path: url_path, url: url, link: nil }
200
- if url.present? && link.present? && link.length > @min_length
201
- url = strip_down_url(url)
202
- link = strip_down_url(link)
203
- link&.gsub!(url, '')
204
- link = link&.split('.net')&.last
205
- link = link&.split('.com')&.last
206
- link = link&.split('.org')&.last
207
- link = "/#{link.split("/").reject(&:empty?).join("/")}" if link.present?
208
- link_hsh[:link] = link if link.present? && link.length > @min_length
209
- end
210
- link_hsh
211
- end
212
121
 
213
-
214
- def strip_down_url(url)
215
- if url.present?
216
- url = url.downcase.strip
217
- url = url.gsub('www.', '')
218
- url = url.split('://')
219
- url = url[-1]
220
- return url
122
+ if err_msg
123
+ url_hash[:web_neg] << err_msg
124
+ url = nil
125
+ url_hash[:url_f] = nil
126
+ return { url_hash: url_hash, url: url }
221
127
  end
222
- end
223
-
224
128
 
225
- def remove_invalid_links(link)
226
- link_hsh = {link: link, valid_link: nil, flags: nil }
227
- if link.present?
228
- @neg_links += get_symbs
229
- flags = @neg_links.select { |red| link&.include?(red) }
230
- flags << "below #{@min_length}" if link.length < @min_length
231
- flags << "over #{@max_length}" if link.length > @max_length
232
- flags = flags.flatten.compact
233
- flags.any? ? valid_link = nil : valid_link = link
234
- link_hsh[:valid_link] = valid_link
235
- link_hsh[:flags] = flags.join(', ')
129
+ ### Only Non-Errors Get Here ###
130
+ ## Has one valid ext, but need to check if original url exts were > 1. Replace if so.
131
+ if url_exts.count > matched_exts.count
132
+ inv_ext = (url_exts - matched_exts).join
133
+ url = url.gsub(".#{inv_ext}", '')
236
134
  end
237
- link_hsh
238
- end
239
-
240
-
241
- def remove_invalid_hrefs(href)
242
- href_hsh = {href: href, valid_href: nil, flags: nil }
243
- if href.present?
244
- @neg_hrefs += get_symbs
245
- href = href.split('|').join(' ')
246
- href = href.split('/').join(' ')
247
- href&.gsub!("(", ' ')
248
- href&.gsub!(")", ' ')
249
- href&.gsub!("[", ' ')
250
- href&.gsub!("]", ' ')
251
- href&.gsub!(",", ' ')
252
- href&.gsub!("'", ' ')
253
135
 
254
- flags = []
255
- flags << "over #{@max_length}" if href.length > @max_length
256
- invalid_text = Regexp.new(/[0-9]/)
257
- flags << invalid_text&.match(href)
258
- href = href&.downcase
259
- href = href&.strip
260
-
261
- flags << @neg_hrefs.select { |red| href&.include?(red) }
262
- flags = flags.flatten.compact.uniq
263
- href_hsh[:valid_href] = href unless flags.any?
264
- href_hsh[:flags] = flags.join(', ')
265
- end
266
- href_hsh
136
+ ext_result = { url_hash: url_hash, url: url }
137
+ ext_result
267
138
  end
268
139
 
140
+ ###### Supporting Methods Below #######
269
141
 
270
- def convert_to_scheme_host(url)
271
- if url.present?
272
- uri = URI(url)
273
- scheme = uri&.scheme
274
- host = uri&.host
275
- url = "#{scheme}://#{host}" if (scheme.present? && host.present?)
276
- return url
142
+ def extract_path(url_hash)
143
+ path_parts = url_hash[:url_f].split('//').last.split('/')[1..-1]
144
+ path = "/#{path_parts.join('/')}"
145
+ if path&.length > 2
146
+ url_hash[:url_path] = path
147
+ url_hash[:url_f] = url_hash[:url_f].gsub(url_hash[:url_path], '')
277
148
  end
149
+ url_hash
278
150
  end
279
151
 
280
-
281
- #CALL: Formatter.new.remove_ww3(url)
152
+ # CALL: Wrap.new.remove_ww3(url)
282
153
  def remove_ww3(url)
283
- if url.present?
284
- url.split('.').map { |part| url.gsub!(part,'www') if part.scan(/ww[0-9]/).any? }
285
- url&.gsub!("www.www", "www")
286
- end
154
+ return unless url.present?
155
+ url.split('.').map { |part| url.gsub!(part, 'www') if part.scan(/ww[0-9]/).any? }
156
+ url&.gsub!('www.www', 'www')
157
+ url
287
158
  end
288
159
 
289
-
290
160
  # For rare cases w/ urls with mistaken double slash twice.
291
161
  def remove_slashes(url)
292
- if url.present? && url.include?('//')
293
- parts = url.split('//')
294
- return parts[0..1].join if parts.length > 2
295
- end
296
- return url
297
- end
298
-
299
- ##Call: StartCrm.run_webs
300
- # def get_ext_list
301
- # # Source: http://www.iana.org/domains/root/db
302
- # # .txt list: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
303
- # file_path = "./lib/crm_formatter/extensions.csv"
304
- # extensions = CSV.read(file_path)
162
+ return url unless url.present? && url.include?('//')
163
+ parts = url.split('//')
164
+ return parts[0..1].join if parts.length > 2
165
+ url
166
+ end
167
+
168
+ # def strip_down_url(url)
169
+ # return unless url.present?
170
+ # url = url.downcase.strip
171
+ # url = url.gsub('www.', '')
172
+ # url = url.split('://')
173
+ # url[-1]
305
174
  # end
306
-
307
-
308
175
  end
309
176
  end