crm_formatter 1.0.7.pre.rc.1 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2 @@
1
+ url,act_name,street,city,state,zip,phone
2
+ http://www.courtesyfordsales.com,Courtesy Ford,__����__����____1410 West Pine Street Hattiesburg,Wexford,MS,39401,512-555-1212
@@ -1,38 +1,46 @@
1
- module CRMFormatter
2
- class Phone
1
+ # frozen_string_literal: false
3
2
 
4
- ## Checks every phone number in table to verify that it meets phone criteria, then calls format_phone method to format Valid results. Otherwise destroys Invalid phone fields and associations.
3
+ module CrmFormatter
4
+ class Phone
5
+ ## Checks every phone number in table to verify that it meets phone criteria, then calls format_phone method to wrap Valid results. Otherwise destroys Invalid phone fields and associations.
5
6
 
6
- # Call: Formatter.new.validate_phone(phone)
7
+ # Call: Wrap.new.validate_phone(phone)
7
8
  def validate_phone(phone)
8
- phone_hsh = { phone: phone, valid_phone: nil, phone_edit: false }
9
- if phone.present?
10
- phone = phone&.gsub(/\s/, ' ')&.strip
11
- reg = Regexp.new("[(]?[0-9]{3}[ ]?[)-.]?[ ]?[0-9]{3}[ ]?[-. ][ ]?[0-9]{4}")
12
- return phone_hsh if phone.first == "0" || phone.include?("(0") || !reg.match(phone)
13
- phone_hsh[:valid_phone] = format_phone(phone)
14
- phone_hsh[:phone_edit] = phone_hsh[:phone] != phone_hsh[:valid_phone]
15
- end
9
+ phone_hsh = { phone_status: nil, phone: phone, phone_f: nil }
10
+ return phone_hsh unless phone.present?
11
+ phone = phone&.gsub(/\s/, ' ')&.strip
12
+ reg = Regexp.new('[(]?[0-9]{3}[ ]?[)-.]?[ ]?[0-9]{3}[ ]?[-. ][ ]?[0-9]{4}')
13
+ phone = nil if phone.first == '0' || phone.include?('(0') || !reg.match(phone)
14
+ phone_hsh[:phone_f] = format_phone(phone) if phone.present?
15
+ phone_hsh = check_phone_status(phone_hsh)
16
16
  phone_hsh
17
17
  end
18
18
 
19
+ ####### COMPARE ORIGINAL AND FORMATTED PHONE ######
20
+ def check_phone_status(hsh)
21
+ phone = hsh[:phone]
22
+ phone_f = hsh[:phone_f]
23
+ status = 'invalid'
24
+ status = phone != phone_f ? 'formatted' : 'unchanged' if phone && phone_f
25
+ hsh[:phone_status] = status if status.present?
26
+ hsh
27
+ end
28
+
19
29
  #################################
20
30
  ## FORMATS PHONE AS: (000) 000-0000
21
31
  ## Assumes phone is legitimate, then formats. Not designed to detect Valid phone number.
22
32
 
23
- # Call: Formatter.new.format_phone(phone)
33
+ # Call: Wrap.new.format_phone(phone)
24
34
  def format_phone(phone)
25
- regex = Regexp.new("[A-Z]+[a-z]+")
26
- if !phone.blank? && (phone != "N/A" || phone != "0") && !regex.match(phone)
27
- phone_stripped = phone.gsub(/[^0-9]/, "")
28
- (phone_stripped && phone_stripped[0] == "1") ? phone_step2 = phone_stripped[1..-1] : phone_step2 = phone_stripped
29
-
35
+ regex = Regexp.new('[A-Z]+[a-z]+')
36
+ if !phone.blank? && (phone != 'N/A' || phone != '0') && !regex.match(phone)
37
+ phone_stripped = phone.gsub(/[^0-9]/, '')
38
+ phone_step2 = phone_stripped && phone_stripped[0] == '1' ? phone_stripped[1..-1] : phone_stripped
30
39
  final_phone = !(phone_step2 && phone_step2.length < 10) ? "(#{phone_step2[0..2]}) #{(phone_step2[3..5])}-#{(phone_step2[6..9])}" : phone
31
40
  else
32
41
  final_phone = nil
33
42
  end
34
- return final_phone
43
+ final_phone
35
44
  end
36
-
37
45
  end
38
46
  end
@@ -1,3 +1,5 @@
1
- module CRMFormatter
2
- VERSION = "1.0.7-rc.1"
1
+ # frozen_string_literal: false
2
+
3
+ module CrmFormatter
4
+ VERSION = "2.0"
3
5
  end
@@ -1,64 +1,59 @@
1
+ # frozen_string_literal: false
2
+
3
+ # require 'rubygems'
4
+ # require 'active_support'
1
5
  require 'csv'
2
6
 
3
- module CRMFormatter
7
+ # StartCrm.run_webs
8
+ module CrmFormatter
4
9
  class Web
5
-
6
- def initialize(args={})
7
- @empty_oa = args.empty?
8
- @pos_urls = args.fetch(:pos_urls, [])
9
- @neg_urls = args.fetch(:neg_urls, [])
10
- @pos_links = args.fetch(:pos_links, [])
11
- @neg_links = args.fetch(:neg_links, [])
12
- @pos_hrefs = args.fetch(:pos_hrefs, [])
13
- @neg_hrefs = args.fetch(:neg_hrefs, [])
14
- @pos_exts = args.fetch(:pos_exts, [])
15
- @neg_exts = args.fetch(:neg_exts, [])
16
- @min_length = args.fetch(:min_length, 2)
17
- @max_length = args.fetch(:max_length, 100)
18
- end
19
-
20
- def banned_symbols
21
- banned_symbols = ["!", "$", "%", "'", "(", ")", "*", "+", ",", "<", ">", "@", "[", "]", "^", "{", "}", "~"]
22
- end
23
-
24
- ##Call: StartCrm.run_webs
25
10
  def format_url(url)
26
11
  prep_result = prep_for_uri(url)
27
12
  url_hash = prep_result[:url_hash]
28
13
  url = prep_result[:url]
29
- url = nil if has_errors(url_hash)
30
-
31
- if url.present?
32
- uri_result = run_uri(url_hash, url)
33
- url_hash = uri_result[:url_hash]
34
- url = uri_result[:url]
35
- (url = nil if has_errors(url_hash)) if url.present?
14
+ url = nil if errors?(url_hash)
15
+
16
+ if url&.present?
17
+ url = normalize_url(url)
18
+ ext_result = validate_extension(url_hash, url)
19
+ url_hash = ext_result[:url_hash]
20
+ url = ext_result[:url]
21
+ (url = nil if errors?(url_hash)) if url.present?
36
22
  end
37
23
 
38
- url_hash[:formatted_url] = url
39
- url_hash = check_reformatted_status(url_hash) if url.present?
24
+ url_hash = consolidate_negs(url_hash)
25
+ url_hash[:url_f] = url
26
+ url_hash = extract_path(url_hash) if url.present?
27
+ url_hash = check_web_status(url_hash)
40
28
  url_hash
41
29
  end
42
30
 
31
+ ### COMPARE ORIGINAL AND FORMATTED URL ###
32
+ def check_web_status(hsh)
33
+ status = 'invalid' if hsh[:web_neg]&.include?('error')
43
34
 
44
- def check_reformatted_status(url_hash)
45
- formatted = url_hash[:formatted_url]
46
- if formatted.present?
47
- url_hash[:is_reformatted] = url_hash[:url_path] != formatted
35
+ if hsh[:url] && hsh[:url_f] && status.nil?
36
+ status = hsh[:url] != hsh[:url_f] ? 'formatted' : 'unchanged'
48
37
  end
49
- url_hash
38
+
39
+ hsh[:web_status] = status if status.present?
40
+ hsh
50
41
  end
51
42
 
43
+ def consolidate_negs(hsh)
44
+ neg = hsh[:web_neg].join(', ')
45
+ hsh[:web_neg] = neg.present? ? neg : nil
46
+ hsh
47
+ end
52
48
 
53
- def has_errors(url_hash)
54
- errors = url_hash[:neg].map { |neg| neg.include?('error') }
49
+ def errors?(url_hash)
50
+ errors = url_hash[:web_neg].map { |web_neg| web_neg.include?('error') }
55
51
  errors.any?
56
52
  end
57
53
 
58
-
59
- ##Call: StartCrm.run_webs
60
54
  def prep_for_uri(url)
61
- url_hash = { is_reformatted: false, url_path: url, formatted_url: nil, neg: [], pos: [] }
55
+ url_hash = { web_status: nil, url: url, url_f: nil, url_path: nil, web_neg: [] }
56
+
62
57
  begin
63
58
  url = url&.split('|')&.first
64
59
  url = url&.split('\\')&.first
@@ -74,236 +69,108 @@ module CRMFormatter
74
69
  url = url[0..-2] if url.present? && url[-1] == '/'
75
70
  end
76
71
 
77
- url = nil if url.present? && banned_symbols.any? {|symb| url&.include?(symb) }
78
-
79
- if url.present?
80
- url_hash = compare_criteria(url_hash, url, 'pos_urls', 'include') if !@empty_oa
81
- url_hash = compare_criteria(url_hash, url, 'neg_urls', 'include') if !@empty_oa
82
- else
83
- url_hash[:neg] << "error: syntax"
84
- url_hash[:formatted_url] = url
72
+ banned_symbols = ['!', '$', '%', "'", '(', ')', '*', '+', ',', '<', '>', '@', '[', ']', '^', '{', '}', '~']
73
+ url = nil if url.present? && banned_symbols.any? { |symb| url&.include?(symb) }
74
+ unless url.present?
75
+ url_hash[:web_neg] << 'error: syntax'
76
+ url_hash[:url_f] = url
85
77
  end
86
-
87
- rescue Exception => e
88
- url_hash[:neg] << "error: #{e}"
89
- url = nil
90
- url_hash
91
- end
92
-
93
- prep_result = { url_hash: url_hash, url: url }
94
- end
95
-
96
-
97
- ##Call: StartCrm.run_webs
98
- def run_uri(url_hash, url)
99
- begin
100
- uri = URI(url)
101
- host_parts = uri.host&.split(".")
102
-
103
- url_hash = compare_criteria(url_hash, host_parts, 'pos_exts', 'equal') if !@empty_oa
104
- url_hash = compare_criteria(url_hash, host_parts, 'neg_exts', 'equal') if !@empty_oa
105
-
106
- host = uri.host
107
- scheme = uri.scheme
108
- url = "#{scheme}://#{host}" if host.present? && scheme.present?
109
- url = "http://#{url}" if url[0..3] != "http"
110
- url = url.gsub("//", "//www.") if !url.include?("www.")
111
- samp_url = convert_to_scheme_host(url)
112
-
113
- url = convert_to_scheme_host(url) if url.present?
114
- url_extens_result = check_url_extens(url_hash, url)
115
- url_hash = url_extens_result[:url_hash]
116
- url = url_extens_result[:url]
117
-
118
- rescue Exception => e
119
- url_hash[:neg] << "error: #{e}"
78
+ rescue StandardError => error
79
+ url_hash[:web_neg] << "error: #{error}"
120
80
  url = nil
121
81
  url_hash
122
82
  end
123
-
124
- uri_result = { url_hash: url_hash, url: url }
125
- end
126
-
127
-
128
- #Source: http://www.iana.org/domains/root/db
129
- #Text: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
130
- def check_url_extens(url_hash, url)
131
- if url.present?
132
- url_extens = URI(url).host&.split(".")[2..-1]
133
- if url_extens.count > 1
134
- file_path = "./lib/crm_formatter/extensions.csv"
135
- extens_list = CSV.read(file_path).flatten
136
- valid_url_extens = extens_list & url_extens
137
-
138
- if valid_url_extens.count != 1
139
- extens_str = valid_url_extens.map { |ext| ".#{ext}" }.join(', ')
140
- url_hash[:neg] << "error: exts.count > 1 [#{extens_str}]"
141
- url = nil
142
- end
143
- end
144
- end
145
-
146
- url_hash[:formatted_url] = url
147
- url_extens_result = {url_hash: url_hash, url: url}
148
- end
149
-
150
-
151
- ## This process, compare_criteria only runs if client OA args were passed at initialization.
152
- ## Results listed in url_hash[:neg]/[:pos], and don't impact or hinder final formatted url.
153
- ## Simply adds more details about user's preferences and criteria for the url are.
154
-
155
- def compare_criteria(hash, target, list_name, include_or_equal)
156
- unless @empty_oa
157
- if list_name.present?
158
- criteria_list = instance_variable_get("@#{list_name}")
159
-
160
- if criteria_list.present?
161
- if target.is_a?(::String)
162
- tars = target.split(', ')
163
- else
164
- tars = target
165
- end
166
-
167
- pn_matches = tars.map do |tar|
168
- if criteria_list.present?
169
- if include_or_equal == 'include'
170
- criteria_list.select { |el| el if tar.include?(el) }.join(', ')
171
- elsif include_or_equal == 'equal'
172
- criteria_list.select { |el| el if tar == el }.join(', ')
173
- end
174
- end
175
- end
176
-
177
- pn_match = pn_matches&.uniq&.sort&.join(', ')
178
- if pn_match.present?
179
- if list_name.include?('neg')
180
- hash[:neg] << "#{list_name}: #{pn_match}"
181
- else
182
- hash[:pos] << "#{list_name}: #{pn_match}"
183
- end
184
- end
185
- end
186
-
83
+ hsh = { url_hash: url_hash, url: url }
84
+ hsh
85
+ end
86
+
87
+ def normalize_url(url)
88
+ return unless url.present?
89
+ uri = URI(url)
90
+ scheme = uri&.scheme
91
+ host = uri&.host
92
+ url = "#{scheme}://#{host}" if host.present? && scheme.present?
93
+ url = "http://#{url}" if url[0..3] != 'http'
94
+
95
+ return unless url.present?
96
+ url.gsub!('//', '//www.') unless url.include?('www.')
97
+ url
98
+ end
99
+
100
+ # Source: http://www.iana.org/domains/root/db
101
+ # Text: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
102
+ def validate_extension(url_hash, url)
103
+ return unless url.present?
104
+ uri_parts = URI(url).host&.split('.')
105
+ url_exts = uri_parts[2..-1]
106
+
107
+ ### Finds Errors
108
+ if url_exts.empty? ## Missing ext.
109
+ err_msg = 'error: ext.none'
110
+ else ## Has ext(s), but need to verify validity and count.
111
+ file_path = './lib/crm_formatter/csv/extensions.csv'
112
+ iana_list = CSV.read(file_path).flatten
113
+ matched_exts = iana_list & url_exts
114
+
115
+ if matched_exts.empty? ## Has ext, but not valid.
116
+ err_msg = "error: ext.invalid [#{url_exts.join(', ')}]"
117
+ elsif matched_exts.count > 1 ## Has too many valid exts, Limit 1.
118
+ err_msg = "error: ext.valid > 1 [#{matched_exts.join(', ')}]"
187
119
  end
188
120
  end
189
-
190
- hash
191
- end
192
-
193
- ###### Supporting Methods Below #######
194
-
195
- def extract_link(url_path)
196
- url_hash = format_url(url_path)
197
- url = url_hash[:formatted_url]
198
- link = url_path
199
- link_hsh = {url_path: url_path, url: url, link: nil }
200
- if url.present? && link.present? && link.length > @min_length
201
- url = strip_down_url(url)
202
- link = strip_down_url(link)
203
- link&.gsub!(url, '')
204
- link = link&.split('.net')&.last
205
- link = link&.split('.com')&.last
206
- link = link&.split('.org')&.last
207
- link = "/#{link.split("/").reject(&:empty?).join("/")}" if link.present?
208
- link_hsh[:link] = link if link.present? && link.length > @min_length
209
- end
210
- link_hsh
211
- end
212
121
 
213
-
214
- def strip_down_url(url)
215
- if url.present?
216
- url = url.downcase.strip
217
- url = url.gsub('www.', '')
218
- url = url.split('://')
219
- url = url[-1]
220
- return url
122
+ if err_msg
123
+ url_hash[:web_neg] << err_msg
124
+ url = nil
125
+ url_hash[:url_f] = nil
126
+ return { url_hash: url_hash, url: url }
221
127
  end
222
- end
223
-
224
128
 
225
- def remove_invalid_links(link)
226
- link_hsh = {link: link, valid_link: nil, flags: nil }
227
- if link.present?
228
- @neg_links += get_symbs
229
- flags = @neg_links.select { |red| link&.include?(red) }
230
- flags << "below #{@min_length}" if link.length < @min_length
231
- flags << "over #{@max_length}" if link.length > @max_length
232
- flags = flags.flatten.compact
233
- flags.any? ? valid_link = nil : valid_link = link
234
- link_hsh[:valid_link] = valid_link
235
- link_hsh[:flags] = flags.join(', ')
129
+ ### Only Non-Errors Get Here ###
130
+ ## Has one valid ext, but need to check if original url exts were > 1. Replace if so.
131
+ if url_exts.count > matched_exts.count
132
+ inv_ext = (url_exts - matched_exts).join
133
+ url = url.gsub(".#{inv_ext}", '')
236
134
  end
237
- link_hsh
238
- end
239
-
240
-
241
- def remove_invalid_hrefs(href)
242
- href_hsh = {href: href, valid_href: nil, flags: nil }
243
- if href.present?
244
- @neg_hrefs += get_symbs
245
- href = href.split('|').join(' ')
246
- href = href.split('/').join(' ')
247
- href&.gsub!("(", ' ')
248
- href&.gsub!(")", ' ')
249
- href&.gsub!("[", ' ')
250
- href&.gsub!("]", ' ')
251
- href&.gsub!(",", ' ')
252
- href&.gsub!("'", ' ')
253
135
 
254
- flags = []
255
- flags << "over #{@max_length}" if href.length > @max_length
256
- invalid_text = Regexp.new(/[0-9]/)
257
- flags << invalid_text&.match(href)
258
- href = href&.downcase
259
- href = href&.strip
260
-
261
- flags << @neg_hrefs.select { |red| href&.include?(red) }
262
- flags = flags.flatten.compact.uniq
263
- href_hsh[:valid_href] = href unless flags.any?
264
- href_hsh[:flags] = flags.join(', ')
265
- end
266
- href_hsh
136
+ ext_result = { url_hash: url_hash, url: url }
137
+ ext_result
267
138
  end
268
139
 
140
+ ###### Supporting Methods Below #######
269
141
 
270
- def convert_to_scheme_host(url)
271
- if url.present?
272
- uri = URI(url)
273
- scheme = uri&.scheme
274
- host = uri&.host
275
- url = "#{scheme}://#{host}" if (scheme.present? && host.present?)
276
- return url
142
+ def extract_path(url_hash)
143
+ path_parts = url_hash[:url_f].split('//').last.split('/')[1..-1]
144
+ path = "/#{path_parts.join('/')}"
145
+ if path&.length > 2
146
+ url_hash[:url_path] = path
147
+ url_hash[:url_f] = url_hash[:url_f].gsub(url_hash[:url_path], '')
277
148
  end
149
+ url_hash
278
150
  end
279
151
 
280
-
281
- #CALL: Formatter.new.remove_ww3(url)
152
+ # CALL: Wrap.new.remove_ww3(url)
282
153
  def remove_ww3(url)
283
- if url.present?
284
- url.split('.').map { |part| url.gsub!(part,'www') if part.scan(/ww[0-9]/).any? }
285
- url&.gsub!("www.www", "www")
286
- end
154
+ return unless url.present?
155
+ url.split('.').map { |part| url.gsub!(part, 'www') if part.scan(/ww[0-9]/).any? }
156
+ url&.gsub!('www.www', 'www')
157
+ url
287
158
  end
288
159
 
289
-
290
160
  # For rare cases w/ urls with mistaken double slash twice.
291
161
  def remove_slashes(url)
292
- if url.present? && url.include?('//')
293
- parts = url.split('//')
294
- return parts[0..1].join if parts.length > 2
295
- end
296
- return url
297
- end
298
-
299
- ##Call: StartCrm.run_webs
300
- # def get_ext_list
301
- # # Source: http://www.iana.org/domains/root/db
302
- # # .txt list: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
303
- # file_path = "./lib/crm_formatter/extensions.csv"
304
- # extensions = CSV.read(file_path)
162
+ return url unless url.present? && url.include?('//')
163
+ parts = url.split('//')
164
+ return parts[0..1].join if parts.length > 2
165
+ url
166
+ end
167
+
168
+ # def strip_down_url(url)
169
+ # return unless url.present?
170
+ # url = url.downcase.strip
171
+ # url = url.gsub('www.', '')
172
+ # url = url.split('://')
173
+ # url[-1]
305
174
  # end
306
-
307
-
308
175
  end
309
176
  end