deadfinder 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/deadfinder +1 -1
- data/lib/deadfinder/version.rb +1 -1
- data/lib/deadfinder.rb +197 -202
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d8c1697ba3269001737ec44650a3f27ffd73cbf5467bd110df7e09cc7fc85ba5
|
4
|
+
data.tar.gz: f7ac73c33b9862cdbaaa661712d4969a999d73e12cdf4e6e725f9280fe67203b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c59678fbca26d0caa131cc31c931ddb43712bae58f389fb8b5207eb4564904e8ed29b746645ec2802d7379be2f98e502a87207dbcf5d56bb5dfbe5d8b211448d
|
7
|
+
data.tar.gz: ddb8e07bda9415278724e09b559bc3e76d49c56a9bc8143d7a5e4426914c6034884a9b90bccf34982c2097d79e28d80833bf1025beee16b3d8dfe9e1b3a9f2d6
|
data/bin/deadfinder
CHANGED
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -13,245 +13,240 @@ require 'json'
|
|
13
13
|
require 'yaml'
|
14
14
|
require 'csv'
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
def
|
23
|
-
|
24
|
-
'concurrency' => 50,
|
25
|
-
'timeout' => 10,
|
26
|
-
'output' => '',
|
27
|
-
'output_format' => 'json',
|
28
|
-
'headers' => [],
|
29
|
-
'worker_headers' => [],
|
30
|
-
'silent' => true,
|
31
|
-
'verbose' => false,
|
32
|
-
'include30x' => false
|
33
|
-
}
|
16
|
+
module DeadFinder
|
17
|
+
Channel = Concurrent::Channel
|
18
|
+
CACHE_SET = Concurrent::Map.new
|
19
|
+
CACHE_QUE = Concurrent::Map.new
|
20
|
+
|
21
|
+
@output = {}
|
22
|
+
def self.output
|
23
|
+
@output
|
34
24
|
end
|
35
25
|
|
36
|
-
def
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
26
|
+
def self.output=(val)
|
27
|
+
@output = val
|
28
|
+
end
|
29
|
+
|
30
|
+
class Runner
|
31
|
+
def default_options
|
32
|
+
{
|
33
|
+
'concurrency' => 50,
|
34
|
+
'timeout' => 10,
|
35
|
+
'output' => '',
|
36
|
+
'output_format' => 'json',
|
37
|
+
'headers' => [],
|
38
|
+
'worker_headers' => [],
|
39
|
+
'silent' => true,
|
40
|
+
'verbose' => false,
|
41
|
+
'include30x' => false
|
42
|
+
}
|
42
43
|
end
|
43
|
-
page = Nokogiri::HTML(URI.open(target, headers))
|
44
|
-
links = extract_links(page)
|
45
44
|
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
def run(target, options)
|
46
|
+
Logger.set_silent if options['silent']
|
47
|
+
headers = options['headers'].each_with_object({}) do |header, hash|
|
48
|
+
kv = header.split(': ')
|
49
|
+
hash[kv[0]] = kv[1]
|
50
|
+
rescue StandardError
|
51
|
+
end
|
52
|
+
page = Nokogiri::HTML(URI.open(target, headers))
|
53
|
+
links = extract_links(page)
|
49
54
|
|
50
|
-
|
51
|
-
|
52
|
-
|
55
|
+
total_links_count = links.values.flatten.length
|
56
|
+
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
|
57
|
+
.compact.join(' / ')
|
58
|
+
Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
|
59
|
+
Logger.sub_info 'Checking'
|
53
60
|
|
54
|
-
|
55
|
-
|
61
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
62
|
+
results = Channel.new(buffer: :buffered, capacity: 1000)
|
56
63
|
|
57
|
-
|
58
|
-
|
59
|
-
|
64
|
+
(1..options['concurrency']).each do |w|
|
65
|
+
Channel.go { worker(w, jobs, results, target, options) }
|
66
|
+
end
|
60
67
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
68
|
+
links.values.flatten.uniq.each do |node|
|
69
|
+
result = generate_url(node, target)
|
70
|
+
jobs << result unless result.nil?
|
71
|
+
end
|
65
72
|
|
66
|
-
|
67
|
-
|
73
|
+
jobs_size = jobs.size
|
74
|
+
jobs.close
|
68
75
|
|
69
|
-
|
70
|
-
|
76
|
+
(1..jobs_size).each { ~results }
|
77
|
+
Logger.sub_done 'Done'
|
78
|
+
rescue StandardError => e
|
79
|
+
Logger.error "[#{e}] #{target}"
|
71
80
|
end
|
72
|
-
Logger.sub_done 'Done'
|
73
|
-
rescue StandardError => e
|
74
|
-
Logger.error "[#{e}] #{target}"
|
75
|
-
end
|
76
81
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
# Add worker headers if provided
|
106
|
-
options['worker_headers']&.each do |header|
|
107
|
-
key, value = header.split(':', 2)
|
108
|
-
request[key.strip] = value.strip
|
109
|
-
end
|
82
|
+
def worker(_id, jobs, results, target, options)
|
83
|
+
jobs.each do |j|
|
84
|
+
if CACHE_SET[j]
|
85
|
+
Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
|
86
|
+
else
|
87
|
+
CACHE_SET[j] = true
|
88
|
+
begin
|
89
|
+
CACHE_QUE[j] = true
|
90
|
+
uri = URI.parse(j)
|
91
|
+
proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
|
92
|
+
http = if proxy_uri
|
93
|
+
Net::HTTP.new(uri.host, uri.port,
|
94
|
+
proxy_uri.host, proxy_uri.port,
|
95
|
+
proxy_uri.user, proxy_uri.password)
|
96
|
+
else
|
97
|
+
Net::HTTP.new(uri.host, uri.port)
|
98
|
+
end
|
99
|
+
http.use_ssl = (uri.scheme == 'https')
|
100
|
+
http.read_timeout = options['timeout'].to_i if options['timeout']
|
101
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
102
|
+
|
103
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
104
|
+
request['User-Agent'] = options['user_agent']
|
105
|
+
options['worker_headers']&.each do |header|
|
106
|
+
key, value = header.split(':', 2)
|
107
|
+
request[key.strip] = value.strip
|
108
|
+
end
|
110
109
|
|
111
|
-
|
112
|
-
|
113
|
-
|
110
|
+
response = http.request(request)
|
111
|
+
status_code = response.code.to_i
|
112
|
+
Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
|
114
113
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
114
|
+
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
115
|
+
Logger.found "[#{status_code} #{response.message}] #{j}"
|
116
|
+
CACHE_QUE[j] = false
|
117
|
+
DeadFinder.output[target] ||= []
|
118
|
+
DeadFinder.output[target] << j
|
119
|
+
end
|
120
|
+
rescue StandardError => e
|
121
|
+
Logger.verbose "[#{e}] #{j}" if options['verbose']
|
120
122
|
end
|
121
|
-
rescue StandardError => e
|
122
|
-
Logger.verbose "[#{e}] #{j}" if options['verbose']
|
123
123
|
end
|
124
|
+
results << j
|
124
125
|
end
|
125
|
-
results << j
|
126
126
|
end
|
127
|
-
end
|
128
127
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
128
|
+
private
|
129
|
+
|
130
|
+
def extract_links(page)
|
131
|
+
{
|
132
|
+
anchor: page.css('a').map { |element| element['href'] }.compact,
|
133
|
+
script: page.css('script').map { |element| element['src'] }.compact,
|
134
|
+
link: page.css('link').map { |element| element['href'] }.compact,
|
135
|
+
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
136
|
+
form: page.css('form').map { |element| element['action'] }.compact,
|
137
|
+
object: page.css('object').map { |element| element['data'] }.compact,
|
138
|
+
embed: page.css('embed').map { |element| element['src'] }.compact
|
139
|
+
}
|
140
|
+
end
|
141
141
|
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def run_pipe(options)
|
145
|
-
Logger.set_silent if options['silent']
|
146
142
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
143
|
+
def self.run_pipe(options)
|
144
|
+
Logger.set_silent if options['silent']
|
145
|
+
Logger.info 'Reading from STDIN'
|
146
|
+
app = Runner.new
|
147
|
+
while $stdin.gets
|
148
|
+
target = $LAST_READ_LINE.chomp
|
149
|
+
Logger.target "Checking: #{target}"
|
150
|
+
app.run target, options
|
151
|
+
end
|
152
|
+
gen_output(options)
|
153
153
|
end
|
154
|
-
gen_output(options)
|
155
|
-
end
|
156
154
|
|
157
|
-
def run_file(filename, options)
|
158
|
-
|
155
|
+
def self.run_file(filename, options)
|
156
|
+
Logger.set_silent if options['silent']
|
157
|
+
Logger.info "Reading: #{filename}"
|
158
|
+
app = Runner.new
|
159
|
+
File.foreach(filename) do |line|
|
160
|
+
target = line.chomp
|
161
|
+
Logger.target "Checking: #{target}"
|
162
|
+
app.run target, options
|
163
|
+
end
|
164
|
+
gen_output(options)
|
165
|
+
end
|
159
166
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
167
|
+
def self.run_url(url, options)
|
168
|
+
Logger.set_silent if options['silent']
|
169
|
+
Logger.target "Checking: #{url}"
|
170
|
+
app = Runner.new
|
171
|
+
app.run url, options
|
172
|
+
gen_output(options)
|
166
173
|
end
|
167
|
-
gen_output(options)
|
168
|
-
end
|
169
174
|
|
170
|
-
def
|
171
|
-
|
175
|
+
def self.run_sitemap(sitemap_url, options)
|
176
|
+
Logger.set_silent if options['silent']
|
177
|
+
Logger.info "Parsing sitemap: #{sitemap_url}"
|
178
|
+
app = Runner.new
|
179
|
+
base_uri = URI(sitemap_url)
|
180
|
+
sitemap = SitemapParser.new sitemap_url, { recurse: true }
|
181
|
+
sitemap.to_a.each do |url|
|
182
|
+
turl = generate_url(url, base_uri)
|
183
|
+
Logger.target "Checking: #{turl}"
|
184
|
+
app.run turl, options
|
185
|
+
end
|
186
|
+
gen_output(options)
|
187
|
+
end
|
172
188
|
|
173
|
-
|
174
|
-
|
175
|
-
app.run url, options
|
176
|
-
gen_output(options)
|
177
|
-
end
|
189
|
+
def self.gen_output(options)
|
190
|
+
return if options['output'].empty?
|
178
191
|
|
179
|
-
|
180
|
-
|
181
|
-
Logger.info "Parsing sitemap: #{sitemap_url}"
|
182
|
-
app = DeadFinderRunner.new
|
183
|
-
base_uri = URI(sitemap_url)
|
184
|
-
sitemap = SitemapParser.new sitemap_url, { recurse: true }
|
185
|
-
sitemap.to_a.each do |url|
|
186
|
-
turl = generate_url url, base_uri
|
187
|
-
Logger.target "Checking: #{turl}"
|
188
|
-
app.run turl, options
|
189
|
-
end
|
190
|
-
gen_output(options)
|
191
|
-
end
|
192
|
+
output_data = DeadFinder.output.to_h
|
193
|
+
format = options['output_format'].to_s.downcase
|
192
194
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
output_data.to_yaml
|
202
|
-
when 'csv'
|
203
|
-
CSV.generate do |csv|
|
204
|
-
csv << ['target', 'url']
|
205
|
-
output_data.each do |target, urls|
|
206
|
-
Array(urls).each do |url|
|
207
|
-
csv << [target, url]
|
195
|
+
content = case format
|
196
|
+
when 'yaml', 'yml'
|
197
|
+
output_data.to_yaml
|
198
|
+
when 'csv'
|
199
|
+
CSV.generate do |csv|
|
200
|
+
csv << %w[target url]
|
201
|
+
output_data.each do |target, urls|
|
202
|
+
Array(urls).each { |url| csv << [target, url] }
|
208
203
|
end
|
209
204
|
end
|
205
|
+
else
|
206
|
+
JSON.pretty_generate(output_data)
|
210
207
|
end
|
211
|
-
else
|
212
|
-
JSON.pretty_generate(output_data)
|
213
|
-
end
|
214
208
|
|
215
|
-
|
216
|
-
end
|
217
|
-
|
218
|
-
class DeadFinder < Thor
|
219
|
-
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
220
|
-
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
221
|
-
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
222
|
-
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
223
|
-
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
224
|
-
class_option :headers, aliases: :H, default: [], type: :array,
|
225
|
-
desc: 'Custom HTTP headers to send with initial request'
|
226
|
-
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
227
|
-
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.0;)', type: :string,
|
228
|
-
desc: 'User-Agent string to use for requests'
|
229
|
-
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
230
|
-
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
231
|
-
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
232
|
-
|
233
|
-
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
234
|
-
def pipe
|
235
|
-
run_pipe options
|
209
|
+
File.write(options['output'], content)
|
236
210
|
end
|
237
211
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
212
|
+
class CLI < Thor
|
213
|
+
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
214
|
+
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
215
|
+
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
216
|
+
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
217
|
+
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
218
|
+
class_option :headers, aliases: :H, default: [], type: :array,
|
219
|
+
desc: 'Custom HTTP headers to send with initial request'
|
220
|
+
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
221
|
+
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.1;)', type: :string,
|
222
|
+
desc: 'User-Agent string to use for requests'
|
223
|
+
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
224
|
+
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
225
|
+
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
226
|
+
|
227
|
+
desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
|
228
|
+
def pipe
|
229
|
+
DeadFinder.run_pipe options
|
230
|
+
end
|
242
231
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
232
|
+
desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
|
233
|
+
def file(filename)
|
234
|
+
DeadFinder.run_file filename, options
|
235
|
+
end
|
247
236
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
237
|
+
desc 'url <URL>', 'Scan the Single URL.'
|
238
|
+
def url(url)
|
239
|
+
DeadFinder.run_url url, options
|
240
|
+
end
|
252
241
|
|
253
|
-
|
254
|
-
|
255
|
-
|
242
|
+
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
243
|
+
def sitemap(sitemap)
|
244
|
+
DeadFinder.run_sitemap sitemap, options
|
245
|
+
end
|
246
|
+
|
247
|
+
desc 'version', 'Show version.'
|
248
|
+
def version
|
249
|
+
Logger.info "deadfinder #{VERSION}"
|
250
|
+
end
|
256
251
|
end
|
257
252
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
@@ -169,6 +169,20 @@ dependencies:
|
|
169
169
|
- - ">="
|
170
170
|
- !ruby/object:Gem::Version
|
171
171
|
version: 1.2.0
|
172
|
+
- !ruby/object:Gem::Dependency
|
173
|
+
name: rspec
|
174
|
+
requirement: !ruby/object:Gem::Requirement
|
175
|
+
requirements:
|
176
|
+
- - ">="
|
177
|
+
- !ruby/object:Gem::Version
|
178
|
+
version: '0'
|
179
|
+
type: :development
|
180
|
+
prerelease: false
|
181
|
+
version_requirements: !ruby/object:Gem::Requirement
|
182
|
+
requirements:
|
183
|
+
- - ">="
|
184
|
+
- !ruby/object:Gem::Version
|
185
|
+
version: '0'
|
172
186
|
description: Find dead-links (broken links). Dead link (broken link) means a link
|
173
187
|
within a web page that cannot be connected. These links can have a negative impact
|
174
188
|
to SEO and Security. This tool makes it easy to identify and modify.
|
@@ -183,7 +197,7 @@ files:
|
|
183
197
|
- lib/deadfinder/logger.rb
|
184
198
|
- lib/deadfinder/utils.rb
|
185
199
|
- lib/deadfinder/version.rb
|
186
|
-
homepage: https://www.hahwul.com
|
200
|
+
homepage: https://www.hahwul.com/projects/deadfinder/
|
187
201
|
licenses:
|
188
202
|
- MIT
|
189
203
|
metadata:
|