url_verifier 0.0.1.pre.rc.01 → 2.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 173471784a4c797364efaee8b96757c992057df65e62d1b7373b65f4386abfe4
4
- data.tar.gz: 591e35185ed9065cd2386912c04ea45fb891a08b9eda7da1415fb754b8bd4905
3
+ metadata.gz: 8393c5aa642d2da6e0552c011926170c29da0832237d9395632d5d5f6a4ac634
4
+ data.tar.gz: c5bcab090867ae584abc32c15d0c8a990432c94332a2743e7a68155474568369
5
5
  SHA512:
6
- metadata.gz: 3981b75a7351316126d9ba85b6d5a349121a571e1119c45d02669a063ff704cc9d30669dd028884165ac1112b44dcc8d326415f858d44e3a0b256064ae6e7a5f
7
- data.tar.gz: 7a65c63fd747f53357eb9b64903cdfe01b0a649ee8ea2e9a08aae52dba7c9e8ecd50ef5c8f99894fd84aa428e70f81fd28287d9ab572e2e4303f1fb1a5b18950
6
+ metadata.gz: ab1cb8b4a865ba81c12649872f26fe4916c163e511590f8352bd0cb4efbd6b554f0d97e3f2a624e328ef2fdbba7ff29e006b262b253cf2e5e26f9a3c6ee79933
7
+ data.tar.gz: 7184195ca8d85da4d161d769691b20572943f0fa33756a9c7caf06c0998b57769af51cb583b9dd2a492fb9467b46bd1744c3278ae82a704fc2cfb3a780bd0b84
data/.gitignore CHANGED
@@ -1,11 +1,14 @@
1
1
  /.bundle/
2
2
  /.yardoc
3
+ /Gemfile.lock
3
4
  /_yardoc/
4
5
  /coverage/
5
- /doc/
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
9
 
10
- # rspec failure tracking
11
- .rspec_status
10
+ utf8_sanitizer-*.gem
11
+ .DS_Store
12
+ .idea/
13
+ .xlsx
14
+ .txt
data/Gemfile.lock CHANGED
@@ -1,10 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- url_verifier (0.0.1.pre.rc.01)
5
- activesupport (~> 5.2, >= 5.2.0)
6
- crm_formatter (~> 2.4)
7
- utf8_sanitizer (~> 2.0)
4
+ url_verifier (2.10)
5
+ activesupport (~> 5.2)
6
+ crm_formatter (~> 2.62)
7
+ curb (~> 0.9.6)
8
+ net-ping (~> 1.7, >= 1.7.8)
9
+ utf8_sanitizer (~> 2.16)
8
10
 
9
11
  GEM
10
12
  remote: https://rubygems.org/
@@ -14,74 +16,21 @@ GEM
14
16
  i18n (>= 0.7, < 2)
15
17
  minitest (~> 5.1)
16
18
  tzinfo (~> 1.1)
17
- ast (2.4.0)
18
- binding.repl (3.0.0)
19
- byebug (10.0.2)
20
- cd (1.0.1)
21
- class_indexer (0.3.0)
22
- json
23
- parser
24
- clipboard (1.1.2)
25
- code (0.9.2)
26
- coderay (~> 1.1)
27
- method_source (~> 0.9)
28
19
  coderay (1.1.2)
29
20
  concurrent-ruby (1.0.5)
30
- crm_formatter (2.4)
31
- activesupport (~> 5.2, >= 5.2.0)
32
- utf8_sanitizer (~> 2.0)
33
- debugging (1.1.1)
34
- binding.repl (~> 3.0)
35
- paint (>= 0.9, < 3.0)
21
+ crm_formatter (2.62)
22
+ activesupport (~> 5.2)
23
+ utf8_sanitizer (~> 2.16)
24
+ curb (0.9.6)
36
25
  diff-lcs (1.3)
37
- every_day_irb (2.0.0)
38
- cd (~> 1.0)
39
- fancy_irb (1.1.0)
40
- paint (>= 0.9, < 3.0)
41
- unicode-display_width (~> 1.1)
42
- ffi (1.9.25)
43
- hirb (0.7.3)
44
26
  i18n (1.0.1)
45
27
  concurrent-ruby (~> 1.0)
46
- instance (0.2.0)
47
- interactive_editor (0.0.11)
48
- spoon (>= 0.0.1)
49
- irbtools (2.2.1)
50
- binding.repl (~> 3.0)
51
- clipboard (~> 1.1)
52
- code (~> 0.9)
53
- coderay (~> 1.1)
54
- debugging (~> 1.1)
55
- every_day_irb (~> 2.0)
56
- fancy_irb (~> 1.1)
57
- hirb (~> 0.7, >= 0.7.3)
58
- instance (~> 0.2)
59
- interactive_editor (~> 0.0, >= 0.0.10)
60
- method_locator (~> 0.0, >= 0.0.4)
61
- methodfinder (~> 2.0)
62
- ori (~> 0.1.0)
63
- os
64
- paint (>= 0.9, < 3.0)
65
- ruby_engine (~> 1.0)
66
- ruby_info (~> 1.0)
67
- ruby_version (~> 1.0)
68
- wirb (~> 2.0)
69
- json (2.1.0)
70
- method_locator (0.0.4)
71
28
  method_source (0.9.0)
72
- methodfinder (2.2.1)
73
29
  minitest (5.11.3)
74
- ori (0.1.0)
75
- os (1.0.0)
76
- paint (2.0.1)
77
- parallel (1.12.1)
78
- parser (2.5.1.0)
79
- ast (~> 2.4.0)
80
- powerpack (0.1.2)
30
+ net-ping (1.7.8)
81
31
  pry (0.11.3)
82
32
  coderay (~> 1.1.0)
83
33
  method_source (~> 0.9.0)
84
- rainbow (3.0.0)
85
34
  rake (12.3.1)
86
35
  rspec (3.7.0)
87
36
  rspec-core (~> 3.7.0)
@@ -96,42 +45,20 @@ GEM
96
45
  diff-lcs (>= 1.2.0, < 2.0)
97
46
  rspec-support (~> 3.7.0)
98
47
  rspec-support (3.7.1)
99
- rubocop (0.56.0)
100
- parallel (~> 1.10)
101
- parser (>= 2.5)
102
- powerpack (~> 0.1)
103
- rainbow (>= 2.2.2, < 4.0)
104
- ruby-progressbar (~> 1.7)
105
- unicode-display_width (~> 1.0, >= 1.0.1)
106
- ruby-beautify (0.97.4)
107
- ruby-progressbar (1.9.0)
108
- ruby_engine (1.0.1)
109
- ruby_info (1.0.1)
110
- ruby_version (1.0.1)
111
- spoon (0.0.6)
112
- ffi
113
48
  thread_safe (0.3.6)
114
49
  tzinfo (1.2.5)
115
50
  thread_safe (~> 0.1)
116
- unicode-display_width (1.4.0)
117
- utf8_sanitizer (2.0)
118
- activesupport (~> 5.2, >= 5.2.0)
119
- wirb (2.1.2)
120
- paint (>= 0.9, < 3.0)
51
+ utf8_sanitizer (2.16)
52
+ activesupport (~> 5.2)
121
53
 
122
54
  PLATFORMS
123
55
  ruby
124
56
 
125
57
  DEPENDENCIES
126
58
  bundler (~> 1.16, >= 1.16.2)
127
- byebug (~> 10.0, >= 10.0.2)
128
- class_indexer (~> 0.3.0)
129
- irbtools (~> 2.2, >= 2.2.1)
130
59
  pry (~> 0.11.3)
131
60
  rake (~> 12.3, >= 12.3.1)
132
61
  rspec (~> 3.7)
133
- rubocop (~> 0.56.0)
134
- ruby-beautify (~> 0.97.4)
135
62
  url_verifier!
136
63
 
137
64
  BUNDLED WITH
data/README.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # UrlVerifier
2
- ## Verify url validity and follow redirects. Currently in Beta Version while Building.
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/url_verifier.svg)](https://badge.fury.io/rb/url_verifier)
4
+ [![MIT License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+
6
+
7
+ ### Format, Verify & Follow URL redirects with detailed reports.
8
+
9
+ Following url redirects can sometimes take a few minutes and often creates various exceptions. UrlVerifier is built with exceptional error handling, reformatting, and optional time limits you can set; default is set to 60 sec limit, but typically only takes 5-10 seconds per url. UrlVerifier has been developed and improved upon for several years in an enterprise level app and is now available as an open source gem. It is perfect for high-volume, yet smooth, uninterrupted url formatting and verification.
10
+
11
+ Example:
12
+
13
+ ```
14
+ {
15
+ url: 'blackwellford.com/staff',
16
+ verified_url: 'https://www.blackwellford.com',
17
+ url_path: '/staff',
18
+ response_code:'200',
19
+ url_redirected: true,
20
+ url_sts: 'Valid'
21
+ }
22
+ ```
3
23
 
4
24
  ## Installation
5
25
 
@@ -19,13 +39,137 @@ Or install it yourself as:
19
39
 
20
40
  ## Usage
21
41
 
22
- Coming soon. Currently in Beta Version while Developing.
42
+ ### Available Methods
43
+
44
+ #### 1. Verify one URL as a string:
45
+
46
+ ```
47
+ verifier = UrlVerifier::RunCurler.new
48
+ verified_url_hashes = verifier.verify_urls(array_of_urls)
49
+ ```
50
+
51
+
52
+ #### 2. Verify Array of URL strings:
53
+
54
+ ```
55
+ verifier = UrlVerifier::RunCurler.new
56
+ verified_url_hashes = verifier.verify_url('example.com')
57
+ ```
58
+
59
+
60
+ ### Usage Example
61
+
62
+ ```
63
+ array_of_urls = %w[blackwellford.com/staff www.mccrea.subaru.com/inventory www.sofake.sofake https://www.century1chevy.com https://www.mccreasubaru.com]
64
+
65
+ args = { timeout_limit: 30 }
66
+ verifier = UrlVerifier::RunCurler.new(args)
67
+ verified_url_hashes = verifier.verify_urls(array_of_urls)
68
+ ```
69
+ Note: `:timeout_limit` default is 60 seconds per url, but most urls take under 10 sec each. You can override the default by passing in your desired limit. If 60 seconds is fine, no need to pass any args. You could simply instantiate like this:
70
+
71
+ ```
72
+ verifier = UrlVerifier::RunCurler.new
73
+ verified_url_hashes = verifier.verify_urls(array_of_urls)
74
+ ```
75
+
76
+ Returns Data in Hash Format with detailed report:
77
+
78
+ Notice the URLs in the input array above were NOT uniformly formatted. UrlVerifier leverages the `utf8_sanitizer gem` and `crm_formatter gem` to pre-format the URLs before verifying, so the results will be uniformly formatted, like in the hashes below `:url_f`.
79
+
80
+ `:verified_url` is the final verified URL. `:url_redirected` indicates that the verified URL is different than `:url_f`.
81
+
82
+ If `url_sts: 'Invalid'`, `:wx_date` will be timestamped, which helps keep track of when it became invalid, incase you are running period database URL verifications and want to include these details in your reports.
83
+
84
+ `:response_code` in the 200's is ideal. If it has recently been forwarded it will be in the 300's, and 400's indicates an issue with the URL domain or server. Some 400's could be run later when they resolve their issues, so don't always give up on them.
85
+
86
+ Here is a reference guide: [List of HTTP status codes](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes)
87
+
88
+ ```
89
+ verified_url_hashes = [
90
+ {
91
+ :web_status=>"formatted",
92
+ :url=>"blackwellford.com/staff",
93
+ :url_f=>"http://www.blackwellford.com",
94
+ :url_path=>"/staff",
95
+ :web_neg=>nil,
96
+ :verified_url=>"https://www.blackwellford.com",
97
+ :url_redirected=>true,
98
+ :response_code=>"200",
99
+ :url_sts=>"Valid",
100
+ :url_date=>2018-07-02 09:16:19 -0500,
101
+ :wx_date=>nil,
102
+ :timeout=>0
103
+ },
104
+ {
105
+ :web_status=>"formatted",
106
+ :url=>"www.mccrea.subaru.com/inventory",
107
+ :url_f=>"http://www.mccrea.subaru.com",
108
+ :url_path=>nil,
109
+ :web_neg=>nil,
110
+ :verified_url=>"https://www.mccreasubaru.com",
111
+ :url_redirected=>true,
112
+ :response_code=>"200",
113
+ :url_sts=>"Valid",
114
+ :url_date=>2018-07-02 09:16:38 -0500,
115
+ :wx_date=>nil,
116
+ :timeout=>0
117
+ },
118
+ {
119
+ :web_status=>"invalid",
120
+ :url=>"www.sofake.sofake",
121
+ :url_f=>nil,
122
+ :url_path=>nil,
123
+ :web_neg=>"error: ext.invalid [sofake]",
124
+ :verified_url=>nil,
125
+ :url_redirected=>false,
126
+ :response_code=>nil,
127
+ :url_sts=>"Invalid",
128
+ :url_date=>2018-07-02 09:16:58 -0500,
129
+ :wx_date=>2018-07-02 09:16:58 -0500,
130
+ :timeout=>nil
131
+ },
132
+ {
133
+ :web_status=>"unchanged",
134
+ :url=>"https://www.century1chevy.com",
135
+ :url_f=>"https://www.century1chevy.com",
136
+ :url_path=>nil,
137
+ :web_neg=>nil,
138
+ :verified_url=>"http://www.centurychevy.com",
139
+ :url_redirected=>true,
140
+ :response_code=>"405",
141
+ :url_sts=>"Valid",
142
+ :url_date=>2018-07-02 09:16:58 -0500,
143
+ :wx_date=>nil,
144
+ :timeout=>0
145
+ },
146
+ {
147
+ :web_status=>"unchanged",
148
+ :url=>"https://www.mccreasubaru.com",
149
+ :url_f=>"https://www.mccreasubaru.com",
150
+ :url_path=>nil,
151
+ :web_neg=>nil,
152
+ :verified_url=>"https://www.mccreasubaru.com",
153
+ :url_redirected=>false,
154
+ :response_code=>"200",
155
+ :url_sts=>"Valid",
156
+ :url_date=>2018-07-02 09:16:59 -0500,
157
+ :wx_date=>nil,
158
+ :timeout=>0
159
+ }
160
+ ]
161
+ ```
162
+
163
+ ## Author
164
+
165
+ Adam J Booth - [4rlm](https://github.com/4rlm)
166
+
23
167
 
24
168
  ## Development
25
169
 
26
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
170
+ After checking out the repo, verify `bin/setup` to install dependencies. Then, verify `rake spec` to verify the tests. You can also verify `bin/console` for an interactive prompt that will allow you to experiment.
27
171
 
28
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
172
+ To install this gem onto your local machine, verify `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then verify `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
29
173
 
30
174
  ## Contributing
31
175
 
data/Rakefile CHANGED
@@ -1,6 +1,34 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rspec/core/rake_task"
3
+ require 'url_verifier'
4
+
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
8
  task :default => :spec
9
+ task :test => :spec
10
+
11
+ ###################
12
+ task :console do
13
+ require 'irb'
14
+ require 'irb/completion'
15
+ require 'url_verifier'
16
+ require "active_support/all"
17
+ ARGV.clear
18
+
19
+ verified_urls = run_verify_urls
20
+ # binding.pry
21
+
22
+ IRB.start
23
+ end
24
+
25
+
26
+ def run_verify_urls
27
+ # urls = %w[https://www.century1chevy.com www.sofake.sofake http://www.mccrea.subaru.com blackwellford.com minooka.subaru.com texarkana.mercedesdealer.com www.bobilya.com hammondautoplex.com www.harbinfordscottsboro.net http://www.lancaster.subaru.com loufusz.subaru.com www.mastro.subaru.com www.muller.subaru.com reinekefamilydealerships.com]
28
+
29
+ array_of_urls = %w[blackwellford.com/staff www.mccrea.subaru.com/inventory www.sofake.sofake https://www.century1chevy.com https://www.mccreasubaru.com]
30
+
31
+ args = { timeout_limit: 60 }
32
+ verifier = UrlVerifier::RunCurler.new(args)
33
+ verified_url_hashes = verifier.verify_urls(array_of_urls)
34
+ end
@@ -0,0 +1,102 @@
1
+ # require 'check_int'
2
+ # require 'timeout'
3
+ # require 'net/ping'
4
+ # gem 'net-ping', '~> 1.7', '>= 1.7.8'
5
+ # gem 'curb', '~> 0.9.3'
6
+
7
+ module UrlVerifier
8
+ class Curler
9
+
10
+ def initialize
11
+ @web_formatter = CrmFormatter::Web.new
12
+ @ran_again = false
13
+ end
14
+
15
+ def start_curl(url, timeout)
16
+ curl_result = { verified_url: nil, response_code: nil, curl_err: nil }
17
+ if url.present?
18
+ result = nil
19
+
20
+ begin # Curl Exception Handling
21
+ begin # Timeout Exception Handling
22
+ Timeout.timeout(timeout) do
23
+ pre_curl_msg(url, timeout)
24
+
25
+ result = Curl::Easy.perform(url) do |curl|
26
+ curl.follow_location = true
27
+ curl.useragent = "curb"
28
+ curl.connect_timeout = timeout
29
+ curl.enable_cookies = true
30
+ curl.head = true #testing - new
31
+ end # result
32
+
33
+ curl_result[:response_code] = result&.response_code.to_s
34
+ web_hsh = @web_formatter.format_url(result&.last_effective_url)
35
+
36
+ url_f = web_hsh[:url_f]
37
+ curl_result[:verified_url] = url_f if url_f.present?
38
+ end
39
+
40
+ rescue Timeout::Error # Timeout Exception Handling
41
+ curl_result[:curl_err] = "Error: Timeout"
42
+ end
43
+
44
+ # rescue LoadError => e # Curl Exception Handling
45
+ rescue StandardError => e
46
+ curl_err = error_parser("Error: #{$!.message}")
47
+ # CheckInt.new.check_int if curl_err.include?('TCP')
48
+ curl_result[:curl_err] = curl_err
49
+ end
50
+ else ## If no url present?
51
+ curl_result[:curl_err] = 'URL Nil'
52
+ end
53
+
54
+ curl_result = run_again(curl_result, url, timeout)
55
+ curl_result
56
+ end
57
+
58
+ def run_again(curl_result, url, timeout)
59
+ if curl_result[:curl_err].present?
60
+ if @ran_again == false
61
+ @ran_again = true
62
+ url = https_to_http(url)
63
+ curl_result = start_curl(url, timeout)
64
+ else
65
+ @ran_again = false
66
+ end
67
+ else
68
+ @ran_again = false
69
+ end
70
+ curl_result
71
+ end
72
+
73
+ def https_to_http(url)
74
+ url = url.gsub('https://', 'http://')
75
+ end
76
+
77
+ def pre_curl_msg(url, timeout)
78
+ puts "\n\n#{'='*40}\nVERIFYING: #{url}\nMax Wait Set: #{timeout} Seconds\n\n"
79
+ end
80
+
81
+ def error_parser(curl_err)
82
+ if curl_err.include?("Couldn't connect to server")
83
+ curl_err = "Error: Expired Url"
84
+ elsif curl_err.include?("SSL connect error")
85
+ curl_err = "Error: SSL"
86
+ elsif curl_err.include?("Couldn't resolve host name")
87
+ curl_err = "Error: Host"
88
+ elsif curl_err.include?("Peer certificate")
89
+ curl_err = "Error: Certificate"
90
+ elsif curl_err.include?("Failure when receiving data")
91
+ curl_err = "Error: Transfer"
92
+ elsif curl_err.include?("TCP connection")
93
+ curl_err = "Error: TCP"
94
+ else
95
+ curl_err = "Error: Undefined"
96
+ end
97
+
98
+ curl_err
99
+ end
100
+
101
+ end
102
+ end
@@ -0,0 +1,47 @@
1
+
2
+
3
+ ## INACTIVE - CONSIDER LATER FOR ADVANCED OPTION ###
4
+ module UrlVerifier
5
+ module Iterate
6
+ #
7
+ # def iterate_query(query)
8
+ # # Call: VerUrl.new.start_ver_url
9
+ # # Delayed::Worker.max_run_time = 2.seconds
10
+ # query.in_groups(@group_count).each do |batch_of_ids|
11
+ # @query_count -= batch_of_ids&.count
12
+ # pause_iteration
13
+ # format_query_results(batch_of_ids)
14
+ # end
15
+ # end
16
+ #
17
+ #
18
+ # def pause_iteration
19
+ # until get_dj_count <= @dj_count_limit
20
+ # puts "\nCurrent Process: #{@current_process}"
21
+ # puts "Waiting on #{get_dj_count} Queued Jobs | Queue Limit: #{@dj_count_limit}"
22
+ # puts "Total Query Count: #{@query_count}, Refresh Rate: #{@dj_refresh_interval} seconds\n\n"
23
+ # sleep(@dj_refresh_interval)
24
+ # end
25
+ # end
26
+ #
27
+ #
28
+ # def get_dj_count
29
+ # Delayed::Job.all.count
30
+ # end
31
+ #
32
+ #
33
+ # def format_query_results(batch_of_ids)
34
+ # batch_of_ids.in_groups(@dj_workers).each do |group_of_ids|
35
+ # standard_iterator(group_of_ids)
36
+ # # delay.standard_iterator(group_of_ids)
37
+ # end
38
+ # end
39
+ #
40
+ #
41
+ # def standard_iterator(ids)
42
+ # # ids.each { |id| template_starter(id) if id }
43
+ # ids.each { |id| delay(priority: 10).template_starter(id) if id }
44
+ # end
45
+
46
+ end
47
+ end
@@ -0,0 +1,130 @@
1
+
2
+ ## INACTIVE - CONSIDER LATER FOR ADVANCED OPTION ###
3
+
4
+ module UrlVerifier
5
+ class Query
6
+ # include Curler
7
+
8
+ # def initialize
9
+ # @dj_on = false
10
+ # @dj_count_limit = 0
11
+ # @dj_workers = 3
12
+ # @obj_in_grp = 10
13
+ # @dj_refresh_interval = 10
14
+ # @db_timeout_limit = 120
15
+ # @cut_off = 10.days.ago
16
+ # @formatter = Formatter.new
17
+ # @mig = Mig.new
18
+ # @current_process = "VerUrl"
19
+ # end
20
+
21
+
22
+ # def get_query
23
+ # err_sts_arr = ['Error: Timeout', 'Error: Host', 'Error: TCP']
24
+ # query = Web.select(:id)
25
+ # .where(url_sts: ['Valid', nil])
26
+ # .where('url_date < ? OR url_date IS NULL', @cut_off)
27
+ # .or(Web.select(:id)
28
+ # .where(url_sts: err_sts_arr)
29
+ # .where('timeout < ?', @db_timeout_limit)
30
+ # ).order("timeout ASC").pluck(:id)
31
+ # end
32
+
33
+
34
+ # def start_ver_url
35
+ # query = get_query[0..20]
36
+ # while query.any?
37
+ # setup_iterator(query)
38
+ # query = get_query[0..20]
39
+ # break unless query.any?
40
+ # end
41
+ # end
42
+
43
+
44
+ # def setup_iterator(query)
45
+ # @query_count = query.count
46
+ # (@query_count & @query_count > @obj_in_grp) ? @group_count = (@query_count / @obj_in_grp) : @group_count = 2
47
+ # @dj_on ? iterate_query(query) : query.each { |id| template_starter(id) }
48
+ # end
49
+
50
+ #
51
+ # #Call: VerUrl.new.start_ver_url
52
+ # def template_starter(id)
53
+ # web = Web.find(id)
54
+ # web_url = web.url
55
+ # db_timeout = web.timeout
56
+ # db_timeout == 0 ? timeout = @dj_refresh_interval : timeout = (db_timeout * 3)
57
+ #
58
+ # begin
59
+ # url_f = @formatter.format_url(web_url)
60
+ # if !url_f.present?
61
+ # web.update!(response_code: nil, url_sts: 'Invalid', url_date: Time.now, wx_date: Time.now, timeout: timeout)
62
+ # elsif url_f != web_url
63
+ # fwd_web_obj = Web.find_by(url: url_f)
64
+ # AssocWeb.transfer_web_associations(web, fwd_web_obj) if fwd_web_obj&.url.present?
65
+ # end
66
+ #
67
+ # ####### CURL-BEGINS - FORMATTED URLS ONLY!! #######
68
+ # #Call: VerUrl.new.start_ver_url
69
+ # if url_f.present?
70
+ # curl_hsh = start_curl(url_f, timeout)
71
+ # curl_err = curl_hsh[:curl_err]
72
+ # if !curl_err.present?
73
+ # update_db(web, curl_hsh)
74
+ # elsif curl_err == "Error: Timeout" || curl_err == "Error: Host"
75
+ # puts "curl_err: #{curl_err}"
76
+ # web.update!(url_sts: curl_err, url_date: Time.now, timeout: timeout)
77
+ # else
78
+ # web.update!(response_code: nil, url_sts: curl_err, url_date: Time.now, timeout: 0)
79
+ # end
80
+ # end
81
+ # rescue
82
+ # web = delete_duplicates(web_url)
83
+ # end
84
+ #
85
+ # end
86
+
87
+
88
+ # def update_db(web, curl_hsh)
89
+ # web_url = web.url
90
+ # response_code = curl_hsh[:response_code]
91
+ # last_effective_url = curl_hsh[:last_effective_url]
92
+ # print_curl_results(web_url, last_effective_url, response_code)
93
+ #
94
+ # begin
95
+ # if !last_effective_url.present?
96
+ # web.update!(url_sts: "Error: Nil", response_code: nil, url_date: Time.now, timeout: 0)
97
+ # elsif last_effective_url.present? && last_effective_url == web_url
98
+ # web.update!(url_sts: 'Valid', response_code: response_code, url_date: Time.now, timeout: 0)
99
+ # elsif last_effective_url.present? && last_effective_url != web_url
100
+ # fwd_web_obj = Web.find_or_create_by(url: last_effective_url)
101
+ # AssocWeb.transfer_web_associations(web, fwd_web_obj) if fwd_web_obj&.url.present?
102
+ # end
103
+ # rescue
104
+ # original_web_obj = delete_duplicates(original_web_obj.url)
105
+ # end
106
+ # end
107
+
108
+
109
+
110
+ # def delete_duplicates(web_url)
111
+ # duplicate_web_objs = Web.where(url: web_url).order("id ASC")
112
+ # duplicate_web_objs.last.destroy if duplicate_web_objs.count > 1
113
+ # non_duplicate_web_obj = duplicate_web_objs.first
114
+ # non_duplicate_web_obj
115
+ # end
116
+
117
+
118
+ # def print_curl_results(web_url, last_effective_url, response_code)
119
+ # puts "=================================="
120
+ # puts "W: #{web_url}"
121
+ # puts "C: #{last_effective_url}"
122
+ # puts "S: #{response_code}\n\n\n"
123
+ # end
124
+
125
+
126
+
127
+
128
+ end
129
+
130
+ end
@@ -0,0 +1,129 @@
1
+
2
+ module UrlVerifier
3
+ class RunCurler
4
+
5
+ def initialize(args={})
6
+ @timeout_limit = args.fetch(:timeout_limit, 60)
7
+ @web_formatter = CrmFormatter::Web.new
8
+ @curler = UrlVerifier::Curler.new
9
+
10
+ # @dj_on = false
11
+ # @dj_count_limit = 0
12
+ # @dj_workers = 3
13
+ # @obj_in_grp = 10
14
+ # @dj_refresh_interval = 10
15
+ # @cut_off = 10.days.ago
16
+ # @current_process = "VerUrl"
17
+ # @url_hash = {}
18
+ end
19
+
20
+ def verify_urls(urls=[])
21
+ url_hashes = urls.map { |url| verify_url(url) }
22
+ end
23
+
24
+ def verify_url(url)
25
+ url_hash = @web_formatter.format_url(url)
26
+ url_hash = merge_url_hash(url_hash)
27
+
28
+ if url_hash[:url_f].present?
29
+ url_hash = send_to_curl(url_hash)
30
+ url_hash = check_for_redirect(url_hash)
31
+ else
32
+ url_hash = evaluate_formatted_url(url_hash)
33
+ end
34
+
35
+ puts url_hash.inspect
36
+ url_hash
37
+ end
38
+
39
+ def merge_url_hash(url_hash)
40
+ url_hash_fields = {
41
+ verified_url: nil,
42
+ url_redirected: false,
43
+ response_code: nil,
44
+ url_sts: nil,
45
+ url_date: Time.now,
46
+ wx_date: nil,
47
+ timeout: 0
48
+ }
49
+ url_hash.merge(url_hash_fields)
50
+ end
51
+
52
+ def evaluate_formatted_url(url_hash)
53
+ url_hash = url_hash.merge({url_sts: 'Invalid', wx_date: Time.now })
54
+ end
55
+
56
+ def check_for_redirect(url_hash)
57
+ ver = url_hash[:verified_url]
58
+ form = url_hash[:url_f]
59
+ url_hash[:url_redirected] = ver.present? && ver != form
60
+ url_hash
61
+ end
62
+
63
+ def send_to_curl(url_hash)
64
+ curl_result = @curler.start_curl(url_hash[:url_f], @timeout_limit)
65
+ curl_err = curl_result[:curl_err]
66
+
67
+ if curl_err.present?
68
+ url_hash = url_hash.merge({ url_sts: curl_err, timeout: evaluate_curl_err(curl_err) })
69
+ else
70
+ url_hash = process_valid_curl_response(url_hash, curl_result)
71
+ end
72
+
73
+ url_hash
74
+ end
75
+
76
+ def evaluate_curl_err(curl_err)
77
+ curl_err == "Error: Timeout" || curl_err == "Error: Host" ? timeout = @timeout_limit : timeout = 0
78
+ end
79
+
80
+ def process_valid_curl_response(url_hash, curl_result)
81
+ curl_result[:verified_url].present? ? url_sts = 'Valid' : url_sts = "Error: Nil"
82
+
83
+ valid_hash = {
84
+ verified_url: curl_result[:verified_url],
85
+ url_sts: url_sts,
86
+ response_code: curl_result[:response_code],
87
+ timeout: 0
88
+ }
89
+ url_hash = url_hash.merge(valid_hash)
90
+ end
91
+
92
+
93
+ ##### ADVANCED USAGE - REVISIT LATER #####
94
+ # def get_query
95
+ # err_sts_arr = ['Error: Timeout', 'Error: Host', 'Error: TCP']
96
+ # query = Web.select(:id)
97
+ # .where(url_sts: ['Valid', nil])
98
+ # .where('url_date < ? OR url_date IS NULL', @cut_off)
99
+ # .or(Web.select(:id)
100
+ # .where(url_sts: err_sts_arr)
101
+ # .where('timeout < ?', @timeout_limit)
102
+ # ).order("timeout ASC").pluck(:id)
103
+ # end
104
+
105
+ # def start_ver_url
106
+ # query = get_query[0..20]
107
+ # while query.any?
108
+ # setup_iterator(query)
109
+ # query = get_query[0..20]
110
+ # break unless query.any?
111
+ # end
112
+ # end
113
+
114
+ # def setup_iterator(query)
115
+ # @query_count = query.count
116
+ # (@query_count & @query_count > @obj_in_grp) ? @group_count = (@query_count / @obj_in_grp) : @group_count = 2
117
+ # @dj_on ? iterate_query(query) : query.each { |id| template_starter(id) }
118
+ # end
119
+
120
+ # def template_starter(url)
121
+ # web = Web.find(id)
122
+ # web_url = web.url
123
+ # db_timeout = web.timeout
124
+ # db_timeout == 0 ? timeout = @dj_refresh_interval : timeout = (db_timeout * 3)
125
+ # end
126
+
127
+ end
128
+
129
+ end
@@ -1,3 +1,3 @@
1
1
  module UrlVerifier
2
- VERSION = "0.0.1.pre.rc.01"
2
+ VERSION = "2.10"
3
3
  end
data/lib/url_verifier.rb CHANGED
@@ -1,4 +1,15 @@
1
- require "url_verifier/version"
1
+ require 'url_verifier/version'
2
+ require 'url_verifier/curler'
3
+ require 'url_verifier/run_curler'
4
+
5
+ require 'pry'
6
+ require 'utf8_sanitizer'
7
+ require 'crm_formatter'
8
+
9
+ require 'timeout'
10
+ require 'net/ping'
11
+ require 'curb'
12
+
2
13
 
3
14
  module UrlVerifier
4
15
  # Your code goes here...
data/url_verifier.gemspec CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Adam Booth"]
10
10
  spec.email = ["4rlm@protonmail.ch"]
11
11
 
12
- spec.summary = %q{Beta Version - Verify url validity and follow redirects. Note: Reserving Namespace for Gem in Development.}
13
- spec.description = %q{Beta Version - Verify url validity and follow redirects. Note: Reserving Namespace for Gem in Development. - Coming Soon.}
12
+ spec.summary = %q{Format, Verify & Follow URL redirects with detailed reports. (url: 'blackwellford.com/staff', verified_url: 'https://www.blackwellford.com', response_code:'200', url_redirected: true) }
13
+ spec.description = %q{Format, Verify & Follow URL redirects with detailed reports. (Ex => url: 'blackwellford.com/staff', verified_url: 'https://www.blackwellford.com', response_code:'200', url_redirected: true, url_sts: 'Valid', url_path: '/staff'). Following url redirects can sometimes take a few minutes and often creates various exceptions. UrlVerifier is built with exceptional error handling, reformatting, and optional time limits you can set; default is set to 60 sec limit, but typically only takes 5-10 seconds per url. UrlVerifier has been developed and improved upon for several years in an enterprise level app and is now available as an open source gem. It is perfect for high-volume, yet smooth, uninterrupted url formatting and verification.}
14
14
  spec.homepage = 'https://github.com/4rlm/url_verifier'
15
15
  spec.license = "MIT"
16
16
 
@@ -36,18 +36,43 @@ Gem::Specification.new do |spec|
36
36
  spec.require_paths = ["lib"]
37
37
 
38
38
  spec.required_ruby_version = '~> 2.5.1'
39
- spec.add_dependency 'activesupport', '~> 5.2', '>= 5.2.0'
39
+ spec.add_dependency 'activesupport', '~> 5.2'
40
+ spec.add_dependency 'utf8_sanitizer', '~> 2.16'
41
+ spec.add_dependency 'crm_formatter', '~> 2.62'
40
42
 
41
- spec.add_dependency "utf8_sanitizer", "~> 2.0"
42
- spec.add_dependency "crm_formatter", "~> 2.4"
43
+ spec.add_dependency 'net-ping', '~> 1.7', '>= 1.7.8'
44
+ spec.add_dependency 'curb', '~> 0.9.6'
43
45
 
46
+ # spec.add_dependency "activesupport-inflector", ['~> 0.1.0']
44
47
  spec.add_development_dependency 'bundler', '~> 1.16', '>= 1.16.2'
45
- spec.add_development_dependency 'byebug', '~> 10.0', '>= 10.0.2'
46
- spec.add_development_dependency 'class_indexer', '~> 0.3.0'
47
- spec.add_development_dependency 'irbtools', '~> 2.2', '>= 2.2.1'
48
48
  spec.add_development_dependency 'pry', '~> 0.11.3'
49
49
  spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.1'
50
50
  spec.add_development_dependency 'rspec', '~> 3.7'
51
- spec.add_development_dependency 'rubocop', '~> 0.56.0'
52
- spec.add_development_dependency 'ruby-beautify', '~> 0.97.4'
51
+ # spec.add_development_dependency 'byebug', '~> 10.0', '>= 10.0.2'
52
+ # spec.add_development_dependency 'class_indexer', '~> 0.3.0'
53
+ # spec.add_development_dependency 'irbtools', '~> 2.2', '>= 2.2.1'
54
+ # spec.add_development_dependency 'rubocop', '~> 0.56.0'
55
+ # spec.add_development_dependency 'ruby-beautify', '~> 0.97.4'
56
+ # spec.add_runtime_dependency 'library', '~> 2.2'
57
+ # spec.add_dependency 'activerecord', '>= 3.0'
58
+ # spec.add_dependency 'actionpack', '>= 3.0'
59
+ # spec.add_dependency 'polyamorous', '~> 1.3.2'
60
+ # spec.add_development_dependency 'machinist', '~> 1.0.6'
61
+ # spec.add_development_dependency 'faker', '~> 0.9.5'
62
+ # spec.add_development_dependency 'sqlite3', '~> 1.3.3'
63
+ # spec.add_development_dependency 'pg', '~> 0.21'
64
+ # spec.add_development_dependency 'mysql2', '0.3.20'
65
+
66
+ # spec.requirements << 'libmagick, v6.0'
67
+ # spec.requirements << 'A good graphics card'
68
+ # # This gem will work with 1.8.6 or greater...
69
+ # spec.required_ruby_version = '>= 1.8.6'
70
+ #
71
+ # # Only with ruby 2.0.x
72
+ # spec.required_ruby_version = '~> 2.0'
73
+ #
74
+ # # Only with ruby between 2.2.0 and 2.2.2
75
+ # spec.required_ruby_version = ['>= 2.2.0', '< 2.2.3']
76
+
77
+
53
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_verifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.pre.rc.01
4
+ version: '2.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Booth
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-06-27 00:00:00.000000000 Z
11
+ date: 2018-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -17,9 +17,6 @@ dependencies:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '5.2'
20
- - - ">="
21
- - !ruby/object:Gem::Version
22
- version: 5.2.0
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
@@ -27,111 +24,88 @@ dependencies:
27
24
  - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '5.2'
30
- - - ">="
31
- - !ruby/object:Gem::Version
32
- version: 5.2.0
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: utf8_sanitizer
35
29
  requirement: !ruby/object:Gem::Requirement
36
30
  requirements:
37
31
  - - "~>"
38
32
  - !ruby/object:Gem::Version
39
- version: '2.0'
33
+ version: '2.16'
40
34
  type: :runtime
41
35
  prerelease: false
42
36
  version_requirements: !ruby/object:Gem::Requirement
43
37
  requirements:
44
38
  - - "~>"
45
39
  - !ruby/object:Gem::Version
46
- version: '2.0'
40
+ version: '2.16'
47
41
  - !ruby/object:Gem::Dependency
48
42
  name: crm_formatter
49
43
  requirement: !ruby/object:Gem::Requirement
50
44
  requirements:
51
45
  - - "~>"
52
46
  - !ruby/object:Gem::Version
53
- version: '2.4'
47
+ version: '2.62'
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
51
  requirements:
58
52
  - - "~>"
59
53
  - !ruby/object:Gem::Version
60
- version: '2.4'
61
- - !ruby/object:Gem::Dependency
62
- name: bundler
63
- requirement: !ruby/object:Gem::Requirement
64
- requirements:
65
- - - "~>"
66
- - !ruby/object:Gem::Version
67
- version: '1.16'
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- version: 1.16.2
71
- type: :development
72
- prerelease: false
73
- version_requirements: !ruby/object:Gem::Requirement
74
- requirements:
75
- - - "~>"
76
- - !ruby/object:Gem::Version
77
- version: '1.16'
78
- - - ">="
79
- - !ruby/object:Gem::Version
80
- version: 1.16.2
54
+ version: '2.62'
81
55
  - !ruby/object:Gem::Dependency
82
- name: byebug
56
+ name: net-ping
83
57
  requirement: !ruby/object:Gem::Requirement
84
58
  requirements:
85
59
  - - "~>"
86
60
  - !ruby/object:Gem::Version
87
- version: '10.0'
61
+ version: '1.7'
88
62
  - - ">="
89
63
  - !ruby/object:Gem::Version
90
- version: 10.0.2
91
- type: :development
64
+ version: 1.7.8
65
+ type: :runtime
92
66
  prerelease: false
93
67
  version_requirements: !ruby/object:Gem::Requirement
94
68
  requirements:
95
69
  - - "~>"
96
70
  - !ruby/object:Gem::Version
97
- version: '10.0'
71
+ version: '1.7'
98
72
  - - ">="
99
73
  - !ruby/object:Gem::Version
100
- version: 10.0.2
74
+ version: 1.7.8
101
75
  - !ruby/object:Gem::Dependency
102
- name: class_indexer
76
+ name: curb
103
77
  requirement: !ruby/object:Gem::Requirement
104
78
  requirements:
105
79
  - - "~>"
106
80
  - !ruby/object:Gem::Version
107
- version: 0.3.0
108
- type: :development
81
+ version: 0.9.6
82
+ type: :runtime
109
83
  prerelease: false
110
84
  version_requirements: !ruby/object:Gem::Requirement
111
85
  requirements:
112
86
  - - "~>"
113
87
  - !ruby/object:Gem::Version
114
- version: 0.3.0
88
+ version: 0.9.6
115
89
  - !ruby/object:Gem::Dependency
116
- name: irbtools
90
+ name: bundler
117
91
  requirement: !ruby/object:Gem::Requirement
118
92
  requirements:
119
93
  - - "~>"
120
94
  - !ruby/object:Gem::Version
121
- version: '2.2'
95
+ version: '1.16'
122
96
  - - ">="
123
97
  - !ruby/object:Gem::Version
124
- version: 2.2.1
98
+ version: 1.16.2
125
99
  type: :development
126
100
  prerelease: false
127
101
  version_requirements: !ruby/object:Gem::Requirement
128
102
  requirements:
129
103
  - - "~>"
130
104
  - !ruby/object:Gem::Version
131
- version: '2.2'
105
+ version: '1.16'
132
106
  - - ">="
133
107
  - !ruby/object:Gem::Version
134
- version: 2.2.1
108
+ version: 1.16.2
135
109
  - !ruby/object:Gem::Dependency
136
110
  name: pry
137
111
  requirement: !ruby/object:Gem::Requirement
@@ -180,36 +154,16 @@ dependencies:
180
154
  - - "~>"
181
155
  - !ruby/object:Gem::Version
182
156
  version: '3.7'
183
- - !ruby/object:Gem::Dependency
184
- name: rubocop
185
- requirement: !ruby/object:Gem::Requirement
186
- requirements:
187
- - - "~>"
188
- - !ruby/object:Gem::Version
189
- version: 0.56.0
190
- type: :development
191
- prerelease: false
192
- version_requirements: !ruby/object:Gem::Requirement
193
- requirements:
194
- - - "~>"
195
- - !ruby/object:Gem::Version
196
- version: 0.56.0
197
- - !ruby/object:Gem::Dependency
198
- name: ruby-beautify
199
- requirement: !ruby/object:Gem::Requirement
200
- requirements:
201
- - - "~>"
202
- - !ruby/object:Gem::Version
203
- version: 0.97.4
204
- type: :development
205
- prerelease: false
206
- version_requirements: !ruby/object:Gem::Requirement
207
- requirements:
208
- - - "~>"
209
- - !ruby/object:Gem::Version
210
- version: 0.97.4
211
- description: 'Beta Version - Verify url validity and follow redirects. Note: Reserving
212
- Namespace for Gem in Development. - Coming Soon.'
157
+ description: 'Format, Verify & Follow URL redirects with detailed reports. (Ex =>
158
+ url: ''blackwellford.com/staff'', verified_url: ''https://www.blackwellford.com'',
159
+ response_code:''200'', url_redirected: true, url_sts: ''Valid'', url_path: ''/staff'').
160
+ Following url redirects can sometimes take a few minutes and often creates various
161
+ exceptions. UrlVerifier is built with exceptional error handling, reformatting,
162
+ and optional time limits you can set; default is set to 60 sec limit, but typically
163
+ only takes 5-10 seconds per url. UrlVerifier has been developed and improved upon
164
+ for several years in an enterprise level app and is now available as an open source
165
+ gem. It is perfect for high-volume, yet smooth, uninterrupted url formatting and
166
+ verification.'
213
167
  email:
214
168
  - 4rlm@protonmail.ch
215
169
  executables: []
@@ -228,6 +182,10 @@ files:
228
182
  - bin/console
229
183
  - bin/setup
230
184
  - lib/url_verifier.rb
185
+ - lib/url_verifier/curler.rb
186
+ - lib/url_verifier/iterate.rb
187
+ - lib/url_verifier/query.rb
188
+ - lib/url_verifier/run_curler.rb
231
189
  - lib/url_verifier/version.rb
232
190
  - url_verifier.gemspec
233
191
  homepage: https://github.com/4rlm/url_verifier
@@ -246,14 +204,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
246
204
  version: 2.5.1
247
205
  required_rubygems_version: !ruby/object:Gem::Requirement
248
206
  requirements:
249
- - - ">"
207
+ - - ">="
250
208
  - !ruby/object:Gem::Version
251
- version: 1.3.1
209
+ version: '0'
252
210
  requirements: []
253
211
  rubyforge_project:
254
212
  rubygems_version: 2.7.6
255
213
  signing_key:
256
214
  specification_version: 4
257
- summary: 'Beta Version - Verify url validity and follow redirects. Note: Reserving
258
- Namespace for Gem in Development.'
215
+ summary: 'Format, Verify & Follow URL redirects with detailed reports. (url: ''blackwellford.com/staff'',
216
+ verified_url: ''https://www.blackwellford.com'', response_code:''200'', url_redirected:
217
+ true)'
259
218
  test_files: []