deadfinder 1.5.1 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/deadfinder +1 -1
- data/lib/deadfinder/utils.rb +12 -29
- data/lib/deadfinder/version.rb +1 -1
- data/lib/deadfinder.rb +206 -187
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d8c1697ba3269001737ec44650a3f27ffd73cbf5467bd110df7e09cc7fc85ba5
|
4
|
+
data.tar.gz: f7ac73c33b9862cdbaaa661712d4969a999d73e12cdf4e6e725f9280fe67203b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c59678fbca26d0caa131cc31c931ddb43712bae58f389fb8b5207eb4564904e8ed29b746645ec2802d7379be2f98e502a87207dbcf5d56bb5dfbe5d8b211448d
|
7
|
+
data.tar.gz: ddb8e07bda9415278724e09b559bc3e76d49c56a9bc8143d7a5e4426914c6034884a9b90bccf34982c2097d79e28d80833bf1025beee16b3d8dfe9e1b3a9f2d6
|
data/bin/deadfinder
CHANGED
data/lib/deadfinder/utils.rb
CHANGED
@@ -4,41 +4,24 @@ require 'uri'
|
|
4
4
|
|
5
5
|
def generate_url(text, base_url)
|
6
6
|
node = text.to_s
|
7
|
+
return node if node.start_with?('http://', 'https://')
|
8
|
+
|
7
9
|
begin
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
return "#{extract_directory(uri)}#{node}"
|
18
|
-
end
|
10
|
+
uri = URI(base_url)
|
11
|
+
if node.start_with?('//')
|
12
|
+
"#{uri.scheme}:#{node}"
|
13
|
+
elsif node.start_with?('/')
|
14
|
+
"#{uri.scheme}://#{uri.host}#{node}"
|
15
|
+
elsif ignore_scheme?(node)
|
16
|
+
nil
|
17
|
+
else
|
18
|
+
URI.join(base_url, node).to_s
|
19
19
|
end
|
20
20
|
rescue StandardError
|
21
|
-
|
21
|
+
nil
|
22
22
|
end
|
23
|
-
node
|
24
23
|
end
|
25
24
|
|
26
25
|
def ignore_scheme?(url)
|
27
26
|
url.start_with?('mailto:', 'tel:', 'sms:', 'data:', 'file:')
|
28
27
|
end
|
29
|
-
|
30
|
-
def extract_directory(uri)
|
31
|
-
return "#{uri.scheme}://#{uri.host}#{uri.path}" if uri.path.end_with?('/')
|
32
|
-
|
33
|
-
path_components = uri.path.split('/')
|
34
|
-
path_components.last
|
35
|
-
path_components.pop
|
36
|
-
|
37
|
-
directory_path = path_components.join('/')
|
38
|
-
|
39
|
-
if directory_path.start_with?('/')
|
40
|
-
"#{uri.scheme}://#{uri.host}#{directory_path}/"
|
41
|
-
else
|
42
|
-
"#{uri.scheme}://#{uri.host}/#{directory_path}/"
|
43
|
-
end
|
44
|
-
end
|
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -10,224 +10,243 @@ require 'deadfinder/version'
|
|
10
10
|
require 'concurrent-edge'
|
11
11
|
require 'sitemap-parser'
|
12
12
|
require 'json'
|
13
|
+
require 'yaml'
|
14
|
+
require 'csv'
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def
|
21
|
-
|
22
|
-
'concurrency' => 50,
|
23
|
-
'timeout' => 10,
|
24
|
-
'output' => '',
|
25
|
-
'headers' => [],
|
26
|
-
'worker_headers' => [],
|
27
|
-
'silent' => true,
|
28
|
-
'verbose' => false,
|
29
|
-
'include30x' => false
|
30
|
-
}
|
16
|
+
module DeadFinder
|
17
|
+
Channel = Concurrent::Channel
|
18
|
+
CACHE_SET = Concurrent::Map.new
|
19
|
+
CACHE_QUE = Concurrent::Map.new
|
20
|
+
|
21
|
+
@output = {}
|
22
|
+
def self.output
|
23
|
+
@output
|
31
24
|
end
|
32
25
|
|
33
|
-
def
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
26
|
+
def self.output=(val)
|
27
|
+
@output = val
|
28
|
+
end
|
29
|
+
|
30
|
+
class Runner
|
31
|
+
def default_options
|
32
|
+
{
|
33
|
+
'concurrency' => 50,
|
34
|
+
'timeout' => 10,
|
35
|
+
'output' => '',
|
36
|
+
'output_format' => 'json',
|
37
|
+
'headers' => [],
|
38
|
+
'worker_headers' => [],
|
39
|
+
'silent' => true,
|
40
|
+
'verbose' => false,
|
41
|
+
'include30x' => false
|
42
|
+
}
|
39
43
|
end
|
40
|
-
page = Nokogiri::HTML(URI.open(target, headers))
|
41
|
-
links = extract_links(page)
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
def run(target, options)
|
46
|
+
Logger.set_silent if options['silent']
|
47
|
+
headers = options['headers'].each_with_object({}) do |header, hash|
|
48
|
+
kv = header.split(': ')
|
49
|
+
hash[kv[0]] = kv[1]
|
50
|
+
rescue StandardError
|
51
|
+
end
|
52
|
+
page = Nokogiri::HTML(URI.open(target, headers))
|
53
|
+
links = extract_links(page)
|
46
54
|
|
47
|
-
|
48
|
-
|
49
|
-
|
55
|
+
total_links_count = links.values.flatten.length
|
56
|
+
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
|
57
|
+
.compact.join(' / ')
|
58
|
+
Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
|
59
|
+
Logger.sub_info 'Checking'
|
50
60
|
|
51
|
-
|
52
|
-
|
61
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
62
|
+
results = Channel.new(buffer: :buffered, capacity: 1000)
|
53
63
|
|
54
|
-
|
55
|
-
|
56
|
-
|
64
|
+
(1..options['concurrency']).each do |w|
|
65
|
+
Channel.go { worker(w, jobs, results, target, options) }
|
66
|
+
end
|
57
67
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
68
|
+
links.values.flatten.uniq.each do |node|
|
69
|
+
result = generate_url(node, target)
|
70
|
+
jobs << result unless result.nil?
|
71
|
+
end
|
62
72
|
|
63
|
-
|
64
|
-
|
73
|
+
jobs_size = jobs.size
|
74
|
+
jobs.close
|
65
75
|
|
66
|
-
|
67
|
-
|
76
|
+
(1..jobs_size).each { ~results }
|
77
|
+
Logger.sub_done 'Done'
|
78
|
+
rescue StandardError => e
|
79
|
+
Logger.error "[#{e}] #{target}"
|
68
80
|
end
|
69
|
-
Logger.sub_done 'Done'
|
70
|
-
rescue StandardError => e
|
71
|
-
Logger.error "[#{e}] #{target}"
|
72
|
-
end
|
73
81
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
82
|
+
def worker(_id, jobs, results, target, options)
|
83
|
+
jobs.each do |j|
|
84
|
+
if CACHE_SET[j]
|
85
|
+
Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
|
86
|
+
else
|
87
|
+
CACHE_SET[j] = true
|
88
|
+
begin
|
89
|
+
CACHE_QUE[j] = true
|
90
|
+
uri = URI.parse(j)
|
91
|
+
proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
|
92
|
+
http = if proxy_uri
|
93
|
+
Net::HTTP.new(uri.host, uri.port,
|
94
|
+
proxy_uri.host, proxy_uri.port,
|
95
|
+
proxy_uri.user, proxy_uri.password)
|
96
|
+
else
|
97
|
+
Net::HTTP.new(uri.host, uri.port)
|
98
|
+
end
|
99
|
+
http.use_ssl = (uri.scheme == 'https')
|
100
|
+
http.read_timeout = options['timeout'].to_i if options['timeout']
|
101
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
102
|
+
|
103
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
104
|
+
request['User-Agent'] = options['user_agent']
|
105
|
+
options['worker_headers']&.each do |header|
|
106
|
+
key, value = header.split(':', 2)
|
107
|
+
request[key.strip] = value.strip
|
108
|
+
end
|
109
|
+
|
110
|
+
response = http.request(request)
|
111
|
+
status_code = response.code.to_i
|
112
|
+
Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
|
113
|
+
|
114
|
+
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
115
|
+
Logger.found "[#{status_code} #{response.message}] #{j}"
|
116
|
+
CACHE_QUE[j] = false
|
117
|
+
DeadFinder.output[target] ||= []
|
118
|
+
DeadFinder.output[target] << j
|
119
|
+
end
|
120
|
+
rescue StandardError => e
|
121
|
+
Logger.verbose "[#{e}] #{j}" if options['verbose']
|
106
122
|
end
|
107
|
-
|
108
|
-
response = http.request(request)
|
109
|
-
status_code = response.code.to_i
|
110
|
-
Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
|
111
|
-
|
112
|
-
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
113
|
-
Logger.found "[#{status_code} #{response.message}] #{j}"
|
114
|
-
CACHE_QUE[j] = false
|
115
|
-
OUTPUT[target] ||= []
|
116
|
-
OUTPUT[target] << j
|
117
|
-
end
|
118
|
-
rescue StandardError => e
|
119
|
-
Logger.verbose "[#{e}] #{j}" if options['verbose']
|
120
123
|
end
|
124
|
+
results << j
|
121
125
|
end
|
122
|
-
results << j
|
123
126
|
end
|
124
|
-
end
|
125
127
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
128
|
+
private
|
129
|
+
|
130
|
+
def extract_links(page)
|
131
|
+
{
|
132
|
+
anchor: page.css('a').map { |element| element['href'] }.compact,
|
133
|
+
script: page.css('script').map { |element| element['src'] }.compact,
|
134
|
+
link: page.css('link').map { |element| element['href'] }.compact,
|
135
|
+
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
136
|
+
form: page.css('form').map { |element| element['action'] }.compact,
|
137
|
+
object: page.css('object').map { |element| element['data'] }.compact,
|
138
|
+
embed: page.css('embed').map { |element| element['src'] }.compact
|
139
|
+
}
|
140
|
+
end
|
138
141
|
end
|
139
|
-
end
|
140
142
|
|
141
|
-
def run_pipe(options)
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
143
|
+
def self.run_pipe(options)
|
144
|
+
Logger.set_silent if options['silent']
|
145
|
+
Logger.info 'Reading from STDIN'
|
146
|
+
app = Runner.new
|
147
|
+
while $stdin.gets
|
148
|
+
target = $LAST_READ_LINE.chomp
|
149
|
+
Logger.target "Checking: #{target}"
|
150
|
+
app.run target, options
|
151
|
+
end
|
152
|
+
gen_output(options)
|
150
153
|
end
|
151
|
-
gen_output(options)
|
152
|
-
end
|
153
|
-
|
154
|
-
def run_file(filename, options)
|
155
|
-
Logger.set_silent if options['silent']
|
156
154
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
155
|
+
def self.run_file(filename, options)
|
156
|
+
Logger.set_silent if options['silent']
|
157
|
+
Logger.info "Reading: #{filename}"
|
158
|
+
app = Runner.new
|
159
|
+
File.foreach(filename) do |line|
|
160
|
+
target = line.chomp
|
161
|
+
Logger.target "Checking: #{target}"
|
162
|
+
app.run target, options
|
163
|
+
end
|
164
|
+
gen_output(options)
|
163
165
|
end
|
164
|
-
gen_output(options)
|
165
|
-
end
|
166
|
-
|
167
|
-
def run_url(url, options)
|
168
|
-
Logger.set_silent if options['silent']
|
169
|
-
|
170
|
-
Logger.target "Checking: #{url}"
|
171
|
-
app = DeadFinderRunner.new
|
172
|
-
app.run url, options
|
173
|
-
gen_output(options)
|
174
|
-
end
|
175
166
|
|
176
|
-
def
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
sitemap.to_a.each do |url|
|
183
|
-
turl = generate_url url, base_uri
|
184
|
-
Logger.target "Checking: #{turl}"
|
185
|
-
app.run turl, options
|
167
|
+
def self.run_url(url, options)
|
168
|
+
Logger.set_silent if options['silent']
|
169
|
+
Logger.target "Checking: #{url}"
|
170
|
+
app = Runner.new
|
171
|
+
app.run url, options
|
172
|
+
gen_output(options)
|
186
173
|
end
|
187
|
-
gen_output(options)
|
188
|
-
end
|
189
174
|
|
190
|
-
def
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
203
|
-
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.5.1;)', type: :string,
|
204
|
-
desc: 'User-Agent string to use for requests'
|
205
|
-
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
206
|
-
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
207
|
-
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
208
|
-
|
209
|
-
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
210
|
-
def pipe
|
211
|
-
run_pipe options
|
175
|
+
def self.run_sitemap(sitemap_url, options)
|
176
|
+
Logger.set_silent if options['silent']
|
177
|
+
Logger.info "Parsing sitemap: #{sitemap_url}"
|
178
|
+
app = Runner.new
|
179
|
+
base_uri = URI(sitemap_url)
|
180
|
+
sitemap = SitemapParser.new sitemap_url, { recurse: true }
|
181
|
+
sitemap.to_a.each do |url|
|
182
|
+
turl = generate_url(url, base_uri)
|
183
|
+
Logger.target "Checking: #{turl}"
|
184
|
+
app.run turl, options
|
185
|
+
end
|
186
|
+
gen_output(options)
|
212
187
|
end
|
213
188
|
|
214
|
-
|
215
|
-
|
216
|
-
|
189
|
+
def self.gen_output(options)
|
190
|
+
return if options['output'].empty?
|
191
|
+
|
192
|
+
output_data = DeadFinder.output.to_h
|
193
|
+
format = options['output_format'].to_s.downcase
|
194
|
+
|
195
|
+
content = case format
|
196
|
+
when 'yaml', 'yml'
|
197
|
+
output_data.to_yaml
|
198
|
+
when 'csv'
|
199
|
+
CSV.generate do |csv|
|
200
|
+
csv << %w[target url]
|
201
|
+
output_data.each do |target, urls|
|
202
|
+
Array(urls).each { |url| csv << [target, url] }
|
203
|
+
end
|
204
|
+
end
|
205
|
+
else
|
206
|
+
JSON.pretty_generate(output_data)
|
207
|
+
end
|
208
|
+
|
209
|
+
File.write(options['output'], content)
|
217
210
|
end
|
218
211
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
212
|
+
class CLI < Thor
|
213
|
+
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
214
|
+
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
215
|
+
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
216
|
+
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
217
|
+
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
218
|
+
class_option :headers, aliases: :H, default: [], type: :array,
|
219
|
+
desc: 'Custom HTTP headers to send with initial request'
|
220
|
+
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
221
|
+
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.1;)', type: :string,
|
222
|
+
desc: 'User-Agent string to use for requests'
|
223
|
+
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
224
|
+
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
225
|
+
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
226
|
+
|
227
|
+
desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
|
228
|
+
def pipe
|
229
|
+
DeadFinder.run_pipe options
|
230
|
+
end
|
223
231
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
232
|
+
desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
|
233
|
+
def file(filename)
|
234
|
+
DeadFinder.run_file filename, options
|
235
|
+
end
|
228
236
|
|
229
|
-
|
230
|
-
|
231
|
-
|
237
|
+
desc 'url <URL>', 'Scan the Single URL.'
|
238
|
+
def url(url)
|
239
|
+
DeadFinder.run_url url, options
|
240
|
+
end
|
241
|
+
|
242
|
+
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
243
|
+
def sitemap(sitemap)
|
244
|
+
DeadFinder.run_sitemap sitemap, options
|
245
|
+
end
|
246
|
+
|
247
|
+
desc 'version', 'Show version.'
|
248
|
+
def version
|
249
|
+
Logger.info "deadfinder #{VERSION}"
|
250
|
+
end
|
232
251
|
end
|
233
252
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-02-17 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: colorize
|
@@ -169,6 +169,20 @@ dependencies:
|
|
169
169
|
- - ">="
|
170
170
|
- !ruby/object:Gem::Version
|
171
171
|
version: 1.2.0
|
172
|
+
- !ruby/object:Gem::Dependency
|
173
|
+
name: rspec
|
174
|
+
requirement: !ruby/object:Gem::Requirement
|
175
|
+
requirements:
|
176
|
+
- - ">="
|
177
|
+
- !ruby/object:Gem::Version
|
178
|
+
version: '0'
|
179
|
+
type: :development
|
180
|
+
prerelease: false
|
181
|
+
version_requirements: !ruby/object:Gem::Requirement
|
182
|
+
requirements:
|
183
|
+
- - ">="
|
184
|
+
- !ruby/object:Gem::Version
|
185
|
+
version: '0'
|
172
186
|
description: Find dead-links (broken links). Dead link (broken link) means a link
|
173
187
|
within a web page that cannot be connected. These links can have a negative impact
|
174
188
|
to SEO and Security. This tool makes it easy to identify and modify.
|
@@ -183,7 +197,7 @@ files:
|
|
183
197
|
- lib/deadfinder/logger.rb
|
184
198
|
- lib/deadfinder/utils.rb
|
185
199
|
- lib/deadfinder/version.rb
|
186
|
-
homepage: https://www.hahwul.com
|
200
|
+
homepage: https://www.hahwul.com/projects/deadfinder/
|
187
201
|
licenses:
|
188
202
|
- MIT
|
189
203
|
metadata:
|
@@ -203,7 +217,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
203
217
|
- !ruby/object:Gem::Version
|
204
218
|
version: '0'
|
205
219
|
requirements: []
|
206
|
-
rubygems_version: 3.6.
|
220
|
+
rubygems_version: 3.6.3
|
207
221
|
specification_version: 4
|
208
222
|
summary: Find dead-links (broken links)
|
209
223
|
test_files: []
|