deadfinder 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a9dba525c92f09cb14019fe890f73dc85eaa975179ac19efca534f0fdc76b14
4
- data.tar.gz: fee3168a2912dacd9a2740b1aaf2c495b0b68b48b4d024c208523d8a8a5bb5e1
3
+ metadata.gz: d8c1697ba3269001737ec44650a3f27ffd73cbf5467bd110df7e09cc7fc85ba5
4
+ data.tar.gz: f7ac73c33b9862cdbaaa661712d4969a999d73e12cdf4e6e725f9280fe67203b
5
5
  SHA512:
6
- metadata.gz: be806719c1ed5ca905885222d485f376c7e1a61dff0388f812e05d52e279e21315af08fdcfb5b66e9773c88be2f1a5c8c434eba62e6340f91db76ea29f4643db
7
- data.tar.gz: d966f9237d2a4f62ba98684346b045c49bd14b024723eda22ac5f0c9c2fb0e62fc358d6607d924c0cdd80cd10e8e5f4eb37a13a067188d8394d142fe0476001b
6
+ metadata.gz: c59678fbca26d0caa131cc31c931ddb43712bae58f389fb8b5207eb4564904e8ed29b746645ec2802d7379be2f98e502a87207dbcf5d56bb5dfbe5d8b211448d
7
+ data.tar.gz: ddb8e07bda9415278724e09b559bc3e76d49c56a9bc8143d7a5e4426914c6034884a9b90bccf34982c2097d79e28d80833bf1025beee16b3d8dfe9e1b3a9f2d6
data/bin/deadfinder CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'deadfinder'
4
- DeadFinder.start(ARGV)
4
+ DeadFinder::CLI.start(ARGV)
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- VERSION = '1.6.0'
3
+ VERSION = '1.6.1'
data/lib/deadfinder.rb CHANGED
@@ -13,245 +13,240 @@ require 'json'
13
13
  require 'yaml'
14
14
  require 'csv'
15
15
 
16
- Channel = Concurrent::Channel
17
- CACHE_SET = Concurrent::Map.new
18
- CACHE_QUE = Concurrent::Map.new
19
- OUTPUT = {}
20
-
21
- class DeadFinderRunner
22
- def default_options
23
- {
24
- 'concurrency' => 50,
25
- 'timeout' => 10,
26
- 'output' => '',
27
- 'output_format' => 'json',
28
- 'headers' => [],
29
- 'worker_headers' => [],
30
- 'silent' => true,
31
- 'verbose' => false,
32
- 'include30x' => false
33
- }
16
+ module DeadFinder
17
+ Channel = Concurrent::Channel
18
+ CACHE_SET = Concurrent::Map.new
19
+ CACHE_QUE = Concurrent::Map.new
20
+
21
+ @output = {}
22
+ def self.output
23
+ @output
34
24
  end
35
25
 
36
- def run(target, options)
37
- Logger.set_silent if options['silent']
38
- headers = options['headers'].each_with_object({}) do |header, hash|
39
- kv = header.split(': ')
40
- hash[kv[0]] = kv[1]
41
- rescue StandardError
26
+ def self.output=(val)
27
+ @output = val
28
+ end
29
+
30
+ class Runner
31
+ def default_options
32
+ {
33
+ 'concurrency' => 50,
34
+ 'timeout' => 10,
35
+ 'output' => '',
36
+ 'output_format' => 'json',
37
+ 'headers' => [],
38
+ 'worker_headers' => [],
39
+ 'silent' => true,
40
+ 'verbose' => false,
41
+ 'include30x' => false
42
+ }
42
43
  end
43
- page = Nokogiri::HTML(URI.open(target, headers))
44
- links = extract_links(page)
45
44
 
46
- total_links_count = links.values.flatten.length
47
- # Generate link info string for non-empty link types
48
- link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
45
+ def run(target, options)
46
+ Logger.set_silent if options['silent']
47
+ headers = options['headers'].each_with_object({}) do |header, hash|
48
+ kv = header.split(': ')
49
+ hash[kv[0]] = kv[1]
50
+ rescue StandardError
51
+ end
52
+ page = Nokogiri::HTML(URI.open(target, headers))
53
+ links = extract_links(page)
49
54
 
50
- # Log the information if there are any links
51
- Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
52
- Logger.sub_info 'Checking'
55
+ total_links_count = links.values.flatten.length
56
+ link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
57
+ .compact.join(' / ')
58
+ Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
59
+ Logger.sub_info 'Checking'
53
60
 
54
- jobs = Channel.new(buffer: :buffered, capacity: 1000)
55
- results = Channel.new(buffer: :buffered, capacity: 1000)
61
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
62
+ results = Channel.new(buffer: :buffered, capacity: 1000)
56
63
 
57
- (1..options['concurrency']).each do |w|
58
- Channel.go { worker(w, jobs, results, target, options) }
59
- end
64
+ (1..options['concurrency']).each do |w|
65
+ Channel.go { worker(w, jobs, results, target, options) }
66
+ end
60
67
 
61
- links.values.flatten.uniq.each do |node|
62
- result = generate_url(node, target)
63
- jobs << result unless result.nil?
64
- end
68
+ links.values.flatten.uniq.each do |node|
69
+ result = generate_url(node, target)
70
+ jobs << result unless result.nil?
71
+ end
65
72
 
66
- jobs_size = jobs.size
67
- jobs.close
73
+ jobs_size = jobs.size
74
+ jobs.close
68
75
 
69
- (1..jobs_size).each do
70
- ~results
76
+ (1..jobs_size).each { ~results }
77
+ Logger.sub_done 'Done'
78
+ rescue StandardError => e
79
+ Logger.error "[#{e}] #{target}"
71
80
  end
72
- Logger.sub_done 'Done'
73
- rescue StandardError => e
74
- Logger.error "[#{e}] #{target}"
75
- end
76
81
 
77
- def worker(_id, jobs, results, target, options)
78
- jobs.each do |j|
79
- if CACHE_SET[j]
80
- Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
81
- else
82
- CACHE_SET[j] = true
83
- begin
84
- CACHE_QUE[j] = true
85
- uri = URI.parse(j)
86
-
87
- # Create HTTP request with timeout and headers
88
- proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
89
- http = if proxy_uri
90
- Net::HTTP.new(uri.host, uri.port, proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password)
91
- else
92
- Net::HTTP.new(uri.host, uri.port)
93
- end
94
- http.use_ssl = (uri.scheme == 'https')
95
- http.read_timeout = options['timeout'].to_i if options['timeout']
96
-
97
- # Set SSL verification mode
98
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
99
-
100
- request = Net::HTTP::Get.new(uri.request_uri)
101
-
102
- # Add User-Agent header
103
- request['User-Agent'] = options['user_agent']
104
-
105
- # Add worker headers if provided
106
- options['worker_headers']&.each do |header|
107
- key, value = header.split(':', 2)
108
- request[key.strip] = value.strip
109
- end
82
+ def worker(_id, jobs, results, target, options)
83
+ jobs.each do |j|
84
+ if CACHE_SET[j]
85
+ Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
86
+ else
87
+ CACHE_SET[j] = true
88
+ begin
89
+ CACHE_QUE[j] = true
90
+ uri = URI.parse(j)
91
+ proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
92
+ http = if proxy_uri
93
+ Net::HTTP.new(uri.host, uri.port,
94
+ proxy_uri.host, proxy_uri.port,
95
+ proxy_uri.user, proxy_uri.password)
96
+ else
97
+ Net::HTTP.new(uri.host, uri.port)
98
+ end
99
+ http.use_ssl = (uri.scheme == 'https')
100
+ http.read_timeout = options['timeout'].to_i if options['timeout']
101
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
102
+
103
+ request = Net::HTTP::Get.new(uri.request_uri)
104
+ request['User-Agent'] = options['user_agent']
105
+ options['worker_headers']&.each do |header|
106
+ key, value = header.split(':', 2)
107
+ request[key.strip] = value.strip
108
+ end
110
109
 
111
- response = http.request(request)
112
- status_code = response.code.to_i
113
- Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
110
+ response = http.request(request)
111
+ status_code = response.code.to_i
112
+ Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
114
113
 
115
- if status_code >= 400 || (status_code >= 300 && options['include30x'])
116
- Logger.found "[#{status_code} #{response.message}] #{j}"
117
- CACHE_QUE[j] = false
118
- OUTPUT[target] ||= []
119
- OUTPUT[target] << j
114
+ if status_code >= 400 || (status_code >= 300 && options['include30x'])
115
+ Logger.found "[#{status_code} #{response.message}] #{j}"
116
+ CACHE_QUE[j] = false
117
+ DeadFinder.output[target] ||= []
118
+ DeadFinder.output[target] << j
119
+ end
120
+ rescue StandardError => e
121
+ Logger.verbose "[#{e}] #{j}" if options['verbose']
120
122
  end
121
- rescue StandardError => e
122
- Logger.verbose "[#{e}] #{j}" if options['verbose']
123
123
  end
124
+ results << j
124
125
  end
125
- results << j
126
126
  end
127
- end
128
127
 
129
- private
130
-
131
- def extract_links(page)
132
- {
133
- anchor: page.css('a').map { |element| element['href'] }.compact,
134
- script: page.css('script').map { |element| element['src'] }.compact,
135
- link: page.css('link').map { |element| element['href'] }.compact,
136
- iframe: page.css('iframe').map { |element| element['src'] }.compact,
137
- form: page.css('form').map { |element| element['action'] }.compact,
138
- object: page.css('object').map { |element| element['data'] }.compact,
139
- embed: page.css('embed').map { |element| element['src'] }.compact
140
- }
128
+ private
129
+
130
+ def extract_links(page)
131
+ {
132
+ anchor: page.css('a').map { |element| element['href'] }.compact,
133
+ script: page.css('script').map { |element| element['src'] }.compact,
134
+ link: page.css('link').map { |element| element['href'] }.compact,
135
+ iframe: page.css('iframe').map { |element| element['src'] }.compact,
136
+ form: page.css('form').map { |element| element['action'] }.compact,
137
+ object: page.css('object').map { |element| element['data'] }.compact,
138
+ embed: page.css('embed').map { |element| element['src'] }.compact
139
+ }
140
+ end
141
141
  end
142
- end
143
-
144
- def run_pipe(options)
145
- Logger.set_silent if options['silent']
146
142
 
147
- Logger.info 'Reading from STDIN'
148
- app = DeadFinderRunner.new
149
- while $stdin.gets
150
- target = $LAST_READ_LINE.chomp
151
- Logger.target "Checking: #{target}"
152
- app.run target, options
143
+ def self.run_pipe(options)
144
+ Logger.set_silent if options['silent']
145
+ Logger.info 'Reading from STDIN'
146
+ app = Runner.new
147
+ while $stdin.gets
148
+ target = $LAST_READ_LINE.chomp
149
+ Logger.target "Checking: #{target}"
150
+ app.run target, options
151
+ end
152
+ gen_output(options)
153
153
  end
154
- gen_output(options)
155
- end
156
154
 
157
- def run_file(filename, options)
158
- Logger.set_silent if options['silent']
155
+ def self.run_file(filename, options)
156
+ Logger.set_silent if options['silent']
157
+ Logger.info "Reading: #{filename}"
158
+ app = Runner.new
159
+ File.foreach(filename) do |line|
160
+ target = line.chomp
161
+ Logger.target "Checking: #{target}"
162
+ app.run target, options
163
+ end
164
+ gen_output(options)
165
+ end
159
166
 
160
- Logger.info "Reading: #{filename}"
161
- app = DeadFinderRunner.new
162
- File.foreach(filename) do |line|
163
- target = line.chomp
164
- Logger.target "Checking: #{target}"
165
- app.run target, options
167
+ def self.run_url(url, options)
168
+ Logger.set_silent if options['silent']
169
+ Logger.target "Checking: #{url}"
170
+ app = Runner.new
171
+ app.run url, options
172
+ gen_output(options)
166
173
  end
167
- gen_output(options)
168
- end
169
174
 
170
- def run_url(url, options)
171
- Logger.set_silent if options['silent']
175
+ def self.run_sitemap(sitemap_url, options)
176
+ Logger.set_silent if options['silent']
177
+ Logger.info "Parsing sitemap: #{sitemap_url}"
178
+ app = Runner.new
179
+ base_uri = URI(sitemap_url)
180
+ sitemap = SitemapParser.new sitemap_url, { recurse: true }
181
+ sitemap.to_a.each do |url|
182
+ turl = generate_url(url, base_uri)
183
+ Logger.target "Checking: #{turl}"
184
+ app.run turl, options
185
+ end
186
+ gen_output(options)
187
+ end
172
188
 
173
- Logger.target "Checking: #{url}"
174
- app = DeadFinderRunner.new
175
- app.run url, options
176
- gen_output(options)
177
- end
189
+ def self.gen_output(options)
190
+ return if options['output'].empty?
178
191
 
179
- def run_sitemap(sitemap_url, options)
180
- Logger.set_silent if options['silent']
181
- Logger.info "Parsing sitemap: #{sitemap_url}"
182
- app = DeadFinderRunner.new
183
- base_uri = URI(sitemap_url)
184
- sitemap = SitemapParser.new sitemap_url, { recurse: true }
185
- sitemap.to_a.each do |url|
186
- turl = generate_url url, base_uri
187
- Logger.target "Checking: #{turl}"
188
- app.run turl, options
189
- end
190
- gen_output(options)
191
- end
192
+ output_data = DeadFinder.output.to_h
193
+ format = options['output_format'].to_s.downcase
192
194
 
193
- def gen_output(options)
194
- return if options['output'].empty?
195
-
196
- output_data = OUTPUT.to_h
197
- format = options['output_format'].to_s.downcase
198
-
199
- content = case format
200
- when 'yaml', 'yml'
201
- output_data.to_yaml
202
- when 'csv'
203
- CSV.generate do |csv|
204
- csv << ['target', 'url']
205
- output_data.each do |target, urls|
206
- Array(urls).each do |url|
207
- csv << [target, url]
195
+ content = case format
196
+ when 'yaml', 'yml'
197
+ output_data.to_yaml
198
+ when 'csv'
199
+ CSV.generate do |csv|
200
+ csv << %w[target url]
201
+ output_data.each do |target, urls|
202
+ Array(urls).each { |url| csv << [target, url] }
208
203
  end
209
204
  end
205
+ else
206
+ JSON.pretty_generate(output_data)
210
207
  end
211
- else
212
- JSON.pretty_generate(output_data)
213
- end
214
208
 
215
- File.write(options['output'], content)
216
- end
217
-
218
- class DeadFinder < Thor
219
- class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
220
- class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
221
- class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
222
- class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
223
- class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
224
- class_option :headers, aliases: :H, default: [], type: :array,
225
- desc: 'Custom HTTP headers to send with initial request'
226
- class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
227
- class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.0;)', type: :string,
228
- desc: 'User-Agent string to use for requests'
229
- class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
230
- class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
231
- class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
232
-
233
- desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
234
- def pipe
235
- run_pipe options
209
+ File.write(options['output'], content)
236
210
  end
237
211
 
238
- desc 'file <FILE>', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
239
- def file(filename)
240
- run_file filename, options
241
- end
212
+ class CLI < Thor
213
+ class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
214
+ class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
215
+ class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
216
+ class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
217
+ class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
218
+ class_option :headers, aliases: :H, default: [], type: :array,
219
+ desc: 'Custom HTTP headers to send with initial request'
220
+ class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
221
+ class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.1;)', type: :string,
222
+ desc: 'User-Agent string to use for requests'
223
+ class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
224
+ class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
225
+ class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
226
+
227
+ desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
228
+ def pipe
229
+ DeadFinder.run_pipe options
230
+ end
242
231
 
243
- desc 'url <URL>', 'Scan the Single URL.'
244
- def url(url)
245
- run_url url, options
246
- end
232
+ desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
233
+ def file(filename)
234
+ DeadFinder.run_file filename, options
235
+ end
247
236
 
248
- desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
249
- def sitemap(sitemap)
250
- run_sitemap sitemap, options
251
- end
237
+ desc 'url <URL>', 'Scan the Single URL.'
238
+ def url(url)
239
+ DeadFinder.run_url url, options
240
+ end
252
241
 
253
- desc 'version', 'Show version.'
254
- def version
255
- Logger.info "deadfinder #{VERSION}"
242
+ desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
243
+ def sitemap(sitemap)
244
+ DeadFinder.run_sitemap sitemap, options
245
+ end
246
+
247
+ desc 'version', 'Show version.'
248
+ def version
249
+ Logger.info "deadfinder #{VERSION}"
250
+ end
256
251
  end
257
252
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
@@ -169,6 +169,20 @@ dependencies:
169
169
  - - ">="
170
170
  - !ruby/object:Gem::Version
171
171
  version: 1.2.0
172
+ - !ruby/object:Gem::Dependency
173
+ name: rspec
174
+ requirement: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: '0'
179
+ type: :development
180
+ prerelease: false
181
+ version_requirements: !ruby/object:Gem::Requirement
182
+ requirements:
183
+ - - ">="
184
+ - !ruby/object:Gem::Version
185
+ version: '0'
172
186
  description: Find dead-links (broken links). Dead link (broken link) means a link
173
187
  within a web page that cannot be connected. These links can have a negative impact
174
188
  to SEO and Security. This tool makes it easy to identify and modify.
@@ -183,7 +197,7 @@ files:
183
197
  - lib/deadfinder/logger.rb
184
198
  - lib/deadfinder/utils.rb
185
199
  - lib/deadfinder/version.rb
186
- homepage: https://www.hahwul.com
200
+ homepage: https://www.hahwul.com/projects/deadfinder/
187
201
  licenses:
188
202
  - MIT
189
203
  metadata: