deadfinder 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/deadfinder +1 -1
- data/lib/deadfinder/cli.rb +73 -0
- data/lib/deadfinder/completion.rb +65 -0
- data/lib/deadfinder/http_client.rb +36 -0
- data/lib/deadfinder/logger.rb +85 -36
- data/lib/deadfinder/runner.rb +142 -0
- data/lib/deadfinder/url_pattern_matcher.rb +20 -0
- data/lib/deadfinder/version.rb +3 -1
- data/lib/deadfinder.rb +60 -218
- metadata +22 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04f339906ad4505079153aea6164c57c48ef0d70540002cd49e9f56adddde355
|
4
|
+
data.tar.gz: 2483fc250e5de54e3cd4349c6a91cf305372c4acf7297fb286eb7a9140678d81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f2577176f086c4ad6406b1c60e5e82a4e711e83902fd4088424d993e67de5d0a17fd9fea6e40cd8a8808a5944738393d0fd8ea7d109f19ba939db13155fc9c9
|
7
|
+
data.tar.gz: beb4467d411b85e42b569a552321da6e9138fab877b4ebb2b1f8b852cd2e30b99e57a8bca8e12a5774fd83679c5a50bfdf3c3cdc21f66d716a38dd4e8833fdc7
|
data/bin/deadfinder
CHANGED
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'thor'
|
4
|
+
require 'deadfinder'
|
5
|
+
require 'deadfinder/completion'
|
6
|
+
|
7
|
+
module DeadFinder
|
8
|
+
# CLI class for handling command-line interactions
|
9
|
+
class CLI < Thor
|
10
|
+
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
11
|
+
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
12
|
+
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
13
|
+
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
14
|
+
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
15
|
+
class_option :headers, aliases: :H, default: [], type: :array,
|
16
|
+
desc: 'Custom HTTP headers to send with initial request'
|
17
|
+
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
18
|
+
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.7.0;)', type: :string,
|
19
|
+
desc: 'User-Agent string to use for requests'
|
20
|
+
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
21
|
+
class_option :proxy_auth, default: '', type: :string, desc: 'Proxy server authentication credentials'
|
22
|
+
class_option :match, aliases: :m, default: '', type: :string, desc: 'Match the URL with the given pattern'
|
23
|
+
class_option :ignore, aliases: :i, default: '', type: :string, desc: 'Ignore the URL with the given pattern'
|
24
|
+
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
25
|
+
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
26
|
+
class_option :debug, default: false, type: :boolean, desc: 'Debug mode'
|
27
|
+
|
28
|
+
def self.exit_on_failure?
|
29
|
+
true
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
|
33
|
+
def pipe
|
34
|
+
DeadFinder.run_pipe options
|
35
|
+
end
|
36
|
+
|
37
|
+
desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
|
38
|
+
def file(filename)
|
39
|
+
DeadFinder.run_file filename, options
|
40
|
+
end
|
41
|
+
|
42
|
+
desc 'url <URL>', 'Scan the Single URL.'
|
43
|
+
def url(url)
|
44
|
+
DeadFinder.run_url url, options
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
48
|
+
def sitemap(sitemap)
|
49
|
+
DeadFinder.run_sitemap sitemap, options
|
50
|
+
end
|
51
|
+
|
52
|
+
desc 'completion <SHELL>', 'Generate completion script for shell.'
|
53
|
+
def completion(shell)
|
54
|
+
unless %w[bash zsh fish].include?(shell)
|
55
|
+
DeadFinder::Logger.error "Unsupported shell: #{shell}"
|
56
|
+
return
|
57
|
+
end
|
58
|
+
case shell
|
59
|
+
when 'bash'
|
60
|
+
puts DeadFinder::Completion.bash
|
61
|
+
when 'zsh'
|
62
|
+
puts DeadFinder::Completion.zsh
|
63
|
+
when 'fish'
|
64
|
+
puts DeadFinder::Completion.fish
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
desc 'version', 'Show version.'
|
69
|
+
def version
|
70
|
+
DeadFinder::Logger.info "deadfinder #{DeadFinder::VERSION}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DeadFinder
|
4
|
+
# Module for completion script generation
|
5
|
+
module Completion
|
6
|
+
def self.bash
|
7
|
+
<<~BASH
|
8
|
+
_deadfinder_completions()
|
9
|
+
{
|
10
|
+
local cur prev opts
|
11
|
+
COMPREPLY=()
|
12
|
+
cur="${COMP_WORDS[COMP_CWORD]}"
|
13
|
+
opts="--include30x --concurrency --timeout --output --output_format --headers --worker_headers --user_agent --proxy --proxy_auth --match --ignore --silent --verbose --debug"
|
14
|
+
|
15
|
+
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
|
16
|
+
return 0
|
17
|
+
}
|
18
|
+
complete -F _deadfinder_completions deadfinder
|
19
|
+
BASH
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.zsh
|
23
|
+
<<~ZSH
|
24
|
+
#compdef deadfinder
|
25
|
+
|
26
|
+
_arguments \\
|
27
|
+
'--include30x[Include 30x redirections]' \\
|
28
|
+
'--concurrency[Number of concurrency]:number' \\
|
29
|
+
'--timeout[Timeout in seconds]:number' \\
|
30
|
+
'--output[File to write result]:file' \\
|
31
|
+
'--output_format[Output format]:string' \\
|
32
|
+
'--headers[Custom HTTP headers]:array' \\
|
33
|
+
'--worker_headers[Custom HTTP headers for workers]:array' \\
|
34
|
+
'--user_agent[User-Agent string]:string' \\
|
35
|
+
'--proxy[Proxy server]:string' \\
|
36
|
+
'--proxy_auth[Proxy server authentication]:string' \\
|
37
|
+
'--match[Match URL pattern]:string' \\
|
38
|
+
'--ignore[Ignore URL pattern]:string' \\
|
39
|
+
'--silent[Silent mode]' \\
|
40
|
+
'--verbose[Verbose mode]' \\
|
41
|
+
'--debug[Debug mode]'
|
42
|
+
ZSH
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.fish
|
46
|
+
<<~FISH
|
47
|
+
complete -c deadfinder -l include30x -d 'Include 30x redirections'
|
48
|
+
complete -c deadfinder -l concurrency -d 'Number of concurrency' -a '(seq 1 100)'
|
49
|
+
complete -c deadfinder -l timeout -d 'Timeout in seconds' -a '(seq 1 60)'
|
50
|
+
complete -c deadfinder -l output -d 'File to write result' -r
|
51
|
+
complete -c deadfinder -l output_format -d 'Output format' -r
|
52
|
+
complete -c deadfinder -l headers -d 'Custom HTTP headers' -r
|
53
|
+
complete -c deadfinder -l worker_headers -d 'Custom HTTP headers for workers' -r
|
54
|
+
complete -c deadfinder -l user_agent -d 'User-Agent string' -r
|
55
|
+
complete -c deadfinder -l proxy -d 'Proxy server' -r
|
56
|
+
complete -c deadfinder -l proxy_auth -d 'Proxy server authentication' -r
|
57
|
+
complete -c deadfinder -l match -d 'Match URL pattern' -r
|
58
|
+
complete -c deadfinder -l ignore -d 'Ignore URL pattern' -r
|
59
|
+
complete -c deadfinder -l silent -d 'Silent mode'
|
60
|
+
complete -c deadfinder -l verbose -d 'Verbose mode'
|
61
|
+
complete -c deadfinder -l debug -d 'Debug mode'
|
62
|
+
FISH
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'openssl'
|
5
|
+
|
6
|
+
module DeadFinder
|
7
|
+
# HTTP client module
|
8
|
+
module HttpClient
|
9
|
+
def self.create(uri, options)
|
10
|
+
begin
|
11
|
+
proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
|
12
|
+
rescue URI::InvalidURIError => e
|
13
|
+
DeadFinder::Logger.error "Invalid proxy URI: #{options['proxy']} - #{e.message}"
|
14
|
+
proxy_uri = nil # or handle the error as appropriate
|
15
|
+
end
|
16
|
+
http = if proxy_uri
|
17
|
+
Net::HTTP.new(uri.host, uri.port,
|
18
|
+
proxy_uri.host, proxy_uri.port,
|
19
|
+
proxy_uri.user, proxy_uri.password)
|
20
|
+
else
|
21
|
+
Net::HTTP.new(uri.host, uri.port)
|
22
|
+
end
|
23
|
+
http.use_ssl = (uri.scheme == 'https')
|
24
|
+
http.read_timeout = options['timeout'].to_i if options['timeout']
|
25
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
26
|
+
|
27
|
+
if options['proxy_auth'] && proxy_uri
|
28
|
+
proxy_user, proxy_pass = options['proxy_auth'].split(':', 2)
|
29
|
+
http.proxy_user = proxy_user
|
30
|
+
http.proxy_pass = proxy_pass
|
31
|
+
end
|
32
|
+
|
33
|
+
http
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/deadfinder/logger.rb
CHANGED
@@ -2,53 +2,102 @@
|
|
2
2
|
|
3
3
|
require 'colorize'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
module DeadFinder
|
6
|
+
class Logger
|
7
|
+
@silent = false
|
8
|
+
@verbose = false
|
9
|
+
@debug = false
|
10
|
+
@mutex = Mutex.new
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
+
def self.apply_options(options)
|
13
|
+
set_silent if options['silent']
|
14
|
+
set_verbose if options['verbose']
|
15
|
+
set_debug if options['debug']
|
16
|
+
end
|
12
17
|
|
13
|
-
|
14
|
-
|
15
|
-
|
18
|
+
def self.set_silent
|
19
|
+
@mutex.synchronize { @silent = true }
|
20
|
+
end
|
16
21
|
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
def self.set_verbose
|
23
|
+
@mutex.synchronize { @verbose = true }
|
24
|
+
end
|
20
25
|
|
21
|
-
|
22
|
-
|
26
|
+
def self.set_debug
|
27
|
+
@mutex.synchronize { @debug = true }
|
28
|
+
end
|
23
29
|
|
24
|
-
|
25
|
-
|
30
|
+
def self.unset_debug
|
31
|
+
@mutex.synchronize { @debug = false }
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
def self.unset_verbose
|
35
|
+
@mutex.synchronize { @verbose = false }
|
36
|
+
end
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
38
|
+
def self.debug?
|
39
|
+
@mutex.synchronize { @debug }
|
40
|
+
end
|
34
41
|
|
35
|
-
|
36
|
-
|
37
|
-
|
42
|
+
def self.verbose?
|
43
|
+
@mutex.synchronize { @verbose }
|
44
|
+
end
|
38
45
|
|
39
|
-
|
40
|
-
|
41
|
-
|
46
|
+
def self.unset_silent
|
47
|
+
@mutex.synchronize { @silent = false }
|
48
|
+
end
|
42
49
|
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
def self.silent?
|
51
|
+
@mutex.synchronize { @silent }
|
52
|
+
end
|
46
53
|
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
def self.log(prefix, text, color)
|
55
|
+
return if silent?
|
56
|
+
|
57
|
+
puts prefix.colorize(color) + text.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.sub_log(prefix, is_end, text, color)
|
61
|
+
return if silent?
|
62
|
+
|
63
|
+
indent = is_end ? ' └── ' : ' ├── '
|
64
|
+
puts indent.colorize(color) + prefix.colorize(color) + text.to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.debug(text)
|
68
|
+
log('❀ ', text, :yellow) if debug?
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.info(text)
|
72
|
+
log('ℹ ', text, :blue)
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.error(text)
|
76
|
+
log('⚠︎ ', text, :red)
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.target(text)
|
80
|
+
log('► ', text, :green)
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.sub_info(text)
|
84
|
+
log(' ● ', text, :blue)
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.sub_complete(text)
|
88
|
+
sub_log('● ', true, text, :blue)
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.found(text)
|
92
|
+
sub_log('✘ ', false, text, :red)
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.verbose(text)
|
96
|
+
sub_log('➜ ', false, text, :yellow) if verbose?
|
97
|
+
end
|
50
98
|
|
51
|
-
|
52
|
-
|
99
|
+
def self.verbose_ok(text)
|
100
|
+
sub_log('✓ ', false, text, :green) if verbose?
|
101
|
+
end
|
53
102
|
end
|
54
103
|
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent-edge'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'net/http'
|
7
|
+
require 'openssl'
|
8
|
+
require 'deadfinder/logger'
|
9
|
+
require 'deadfinder/http_client'
|
10
|
+
require 'deadfinder/url_pattern_matcher'
|
11
|
+
|
12
|
+
module DeadFinder
|
13
|
+
# Runner class for executing the main logic
|
14
|
+
class Runner
|
15
|
+
def default_options
|
16
|
+
{
|
17
|
+
'concurrency' => 50,
|
18
|
+
'timeout' => 10,
|
19
|
+
'output' => '',
|
20
|
+
'output_format' => 'json',
|
21
|
+
'headers' => [],
|
22
|
+
'worker_headers' => [],
|
23
|
+
'silent' => true,
|
24
|
+
'verbose' => false,
|
25
|
+
'include30x' => false,
|
26
|
+
'proxy' => '',
|
27
|
+
'proxy_auth' => '',
|
28
|
+
'match' => '',
|
29
|
+
'ignore' => '',
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def run(target, options)
|
34
|
+
DeadFinder::Logger.apply_options(options)
|
35
|
+
headers = options['headers'].each_with_object({}) do |header, hash|
|
36
|
+
kv = header.split(': ')
|
37
|
+
hash[kv[0]] = kv[1]
|
38
|
+
rescue StandardError
|
39
|
+
end
|
40
|
+
page = Nokogiri::HTML(URI.open(target, headers))
|
41
|
+
links = extract_links(page)
|
42
|
+
|
43
|
+
DeadFinder::Logger.debug "#{CACHE_QUE.size} URLs in queue, #{CACHE_SET.size} URLs in cache"
|
44
|
+
|
45
|
+
if options['match'] != ''
|
46
|
+
begin
|
47
|
+
links.each do |type, urls|
|
48
|
+
links[type] = urls.select { |url| DeadFinder::UrlPatternMatcher.match?(url, options['match']) }
|
49
|
+
end
|
50
|
+
rescue RegexpError => e
|
51
|
+
DeadFinder::Logger.error "Invalid match pattern: #{e.message}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if options['ignore'] != ''
|
56
|
+
begin
|
57
|
+
links.each do |type, urls|
|
58
|
+
links[type] = urls.reject { |url| DeadFinder::UrlPatternMatcher.ignore?(url, options['ignore']) }
|
59
|
+
end
|
60
|
+
rescue RegexpError => e
|
61
|
+
DeadFinder::Logger.error "Invalid match pattern: #{e.message}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
total_links_count = links.values.flatten.length
|
66
|
+
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
|
67
|
+
.compact.join(' / ')
|
68
|
+
DeadFinder::Logger.sub_info "Discovered #{total_links_count} URLs, currently checking them. [#{link_info}]" unless link_info.empty?
|
69
|
+
|
70
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
71
|
+
results = Channel.new(buffer: :buffered, capacity: 1000)
|
72
|
+
|
73
|
+
(1..options['concurrency']).each do |w|
|
74
|
+
Channel.go { worker(w, jobs, results, target, options) }
|
75
|
+
end
|
76
|
+
|
77
|
+
links.values.flatten.uniq.each do |node|
|
78
|
+
result = generate_url(node, target)
|
79
|
+
jobs << result unless result.nil?
|
80
|
+
end
|
81
|
+
|
82
|
+
jobs_size = jobs.size
|
83
|
+
jobs.close
|
84
|
+
|
85
|
+
(1..jobs_size).each { ~results }
|
86
|
+
DeadFinder::Logger.sub_complete 'Task completed'
|
87
|
+
rescue StandardError => e
|
88
|
+
DeadFinder::Logger.error "[#{e}] #{target}"
|
89
|
+
end
|
90
|
+
|
91
|
+
def worker(_id, jobs, results, target, options)
|
92
|
+
jobs.each do |j|
|
93
|
+
if CACHE_SET[j]
|
94
|
+
# Skip if already cached
|
95
|
+
else
|
96
|
+
CACHE_SET[j] = true
|
97
|
+
begin
|
98
|
+
CACHE_QUE[j] = true
|
99
|
+
uri = URI.parse(j)
|
100
|
+
http = HttpClient.create(uri, options)
|
101
|
+
|
102
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
103
|
+
request['User-Agent'] = options['user_agent']
|
104
|
+
options['worker_headers']&.each do |header|
|
105
|
+
key, value = header.split(':', 2)
|
106
|
+
request[key.strip] = value.strip
|
107
|
+
end
|
108
|
+
|
109
|
+
response = http.request(request)
|
110
|
+
status_code = response.code.to_i
|
111
|
+
|
112
|
+
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
113
|
+
DeadFinder::Logger.found "[#{status_code}] #{j}"
|
114
|
+
CACHE_QUE[j] = false
|
115
|
+
DeadFinder.output[target] ||= []
|
116
|
+
DeadFinder.output[target] << j
|
117
|
+
else
|
118
|
+
DeadFinder::Logger.verbose_ok "[#{status_code}] #{j}" if options['verbose']
|
119
|
+
end
|
120
|
+
rescue StandardError => e
|
121
|
+
DeadFinder::Logger.verbose "[#{e}] #{j}" if options['verbose']
|
122
|
+
end
|
123
|
+
end
|
124
|
+
results << j
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def extract_links(page)
|
131
|
+
{
|
132
|
+
anchor: page.css('a').map { |element| element['href'] }.compact,
|
133
|
+
script: page.css('script').map { |element| element['src'] }.compact,
|
134
|
+
link: page.css('link').map { |element| element['href'] }.compact,
|
135
|
+
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
136
|
+
form: page.css('form').map { |element| element['action'] }.compact,
|
137
|
+
object: page.css('object').map { |element| element['data'] }.compact,
|
138
|
+
embed: page.css('embed').map { |element| element['src'] }.compact
|
139
|
+
}
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'timeout'
|
4
|
+
|
5
|
+
module DeadFinder
|
6
|
+
# URL pattern matcher module
|
7
|
+
module UrlPatternMatcher
|
8
|
+
def self.match?(url, pattern)
|
9
|
+
Timeout.timeout(1) { Regexp.new(pattern).match?(url) }
|
10
|
+
rescue Timeout::Error
|
11
|
+
false
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.ignore?(url, pattern)
|
15
|
+
Timeout.timeout(1) { Regexp.new(pattern).match?(url) }
|
16
|
+
rescue Timeout::Error
|
17
|
+
false
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -6,6 +6,8 @@ require 'open-uri'
|
|
6
6
|
require 'nokogiri'
|
7
7
|
require 'deadfinder/utils'
|
8
8
|
require 'deadfinder/logger'
|
9
|
+
require 'deadfinder/runner'
|
10
|
+
require 'deadfinder/cli'
|
9
11
|
require 'deadfinder/version'
|
10
12
|
require 'concurrent-edge'
|
11
13
|
require 'sitemap-parser'
|
@@ -13,245 +15,85 @@ require 'json'
|
|
13
15
|
require 'yaml'
|
14
16
|
require 'csv'
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
18
|
+
module DeadFinder
|
19
|
+
Channel = Concurrent::Channel
|
20
|
+
CACHE_SET = Concurrent::Map.new
|
21
|
+
CACHE_QUE = Concurrent::Map.new
|
20
22
|
|
21
|
-
|
22
|
-
def
|
23
|
-
|
24
|
-
'concurrency' => 50,
|
25
|
-
'timeout' => 10,
|
26
|
-
'output' => '',
|
27
|
-
'output_format' => 'json',
|
28
|
-
'headers' => [],
|
29
|
-
'worker_headers' => [],
|
30
|
-
'silent' => true,
|
31
|
-
'verbose' => false,
|
32
|
-
'include30x' => false
|
33
|
-
}
|
23
|
+
@output = {}
|
24
|
+
def self.output
|
25
|
+
@output
|
34
26
|
end
|
35
27
|
|
36
|
-
def
|
37
|
-
|
38
|
-
headers = options['headers'].each_with_object({}) do |header, hash|
|
39
|
-
kv = header.split(': ')
|
40
|
-
hash[kv[0]] = kv[1]
|
41
|
-
rescue StandardError
|
42
|
-
end
|
43
|
-
page = Nokogiri::HTML(URI.open(target, headers))
|
44
|
-
links = extract_links(page)
|
45
|
-
|
46
|
-
total_links_count = links.values.flatten.length
|
47
|
-
# Generate link info string for non-empty link types
|
48
|
-
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
|
49
|
-
|
50
|
-
# Log the information if there are any links
|
51
|
-
Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
|
52
|
-
Logger.sub_info 'Checking'
|
53
|
-
|
54
|
-
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
55
|
-
results = Channel.new(buffer: :buffered, capacity: 1000)
|
56
|
-
|
57
|
-
(1..options['concurrency']).each do |w|
|
58
|
-
Channel.go { worker(w, jobs, results, target, options) }
|
59
|
-
end
|
60
|
-
|
61
|
-
links.values.flatten.uniq.each do |node|
|
62
|
-
result = generate_url(node, target)
|
63
|
-
jobs << result unless result.nil?
|
64
|
-
end
|
65
|
-
|
66
|
-
jobs_size = jobs.size
|
67
|
-
jobs.close
|
68
|
-
|
69
|
-
(1..jobs_size).each do
|
70
|
-
~results
|
71
|
-
end
|
72
|
-
Logger.sub_done 'Done'
|
73
|
-
rescue StandardError => e
|
74
|
-
Logger.error "[#{e}] #{target}"
|
28
|
+
def self.output=(val)
|
29
|
+
@output = val
|
75
30
|
end
|
76
31
|
|
77
|
-
def
|
78
|
-
|
79
|
-
if CACHE_SET[j]
|
80
|
-
Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
|
81
|
-
else
|
82
|
-
CACHE_SET[j] = true
|
83
|
-
begin
|
84
|
-
CACHE_QUE[j] = true
|
85
|
-
uri = URI.parse(j)
|
86
|
-
|
87
|
-
# Create HTTP request with timeout and headers
|
88
|
-
proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
|
89
|
-
http = if proxy_uri
|
90
|
-
Net::HTTP.new(uri.host, uri.port, proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password)
|
91
|
-
else
|
92
|
-
Net::HTTP.new(uri.host, uri.port)
|
93
|
-
end
|
94
|
-
http.use_ssl = (uri.scheme == 'https')
|
95
|
-
http.read_timeout = options['timeout'].to_i if options['timeout']
|
96
|
-
|
97
|
-
# Set SSL verification mode
|
98
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
99
|
-
|
100
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
101
|
-
|
102
|
-
# Add User-Agent header
|
103
|
-
request['User-Agent'] = options['user_agent']
|
104
|
-
|
105
|
-
# Add worker headers if provided
|
106
|
-
options['worker_headers']&.each do |header|
|
107
|
-
key, value = header.split(':', 2)
|
108
|
-
request[key.strip] = value.strip
|
109
|
-
end
|
110
|
-
|
111
|
-
response = http.request(request)
|
112
|
-
status_code = response.code.to_i
|
113
|
-
Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
|
114
|
-
|
115
|
-
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
116
|
-
Logger.found "[#{status_code} #{response.message}] #{j}"
|
117
|
-
CACHE_QUE[j] = false
|
118
|
-
OUTPUT[target] ||= []
|
119
|
-
OUTPUT[target] << j
|
120
|
-
end
|
121
|
-
rescue StandardError => e
|
122
|
-
Logger.verbose "[#{e}] #{j}" if options['verbose']
|
123
|
-
end
|
124
|
-
end
|
125
|
-
results << j
|
126
|
-
end
|
32
|
+
def self.run_pipe(options)
|
33
|
+
run_with_input(options) { $stdin.gets&.chomp }
|
127
34
|
end
|
128
35
|
|
129
|
-
|
130
|
-
|
131
|
-
def extract_links(page)
|
132
|
-
{
|
133
|
-
anchor: page.css('a').map { |element| element['href'] }.compact,
|
134
|
-
script: page.css('script').map { |element| element['src'] }.compact,
|
135
|
-
link: page.css('link').map { |element| element['href'] }.compact,
|
136
|
-
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
137
|
-
form: page.css('form').map { |element| element['action'] }.compact,
|
138
|
-
object: page.css('object').map { |element| element['data'] }.compact,
|
139
|
-
embed: page.css('embed').map { |element| element['src'] }.compact
|
140
|
-
}
|
36
|
+
def self.run_file(filename, options)
|
37
|
+
run_with_input(options) { File.foreach(filename).map(&:chomp) }
|
141
38
|
end
|
142
|
-
end
|
143
39
|
|
144
|
-
def
|
145
|
-
|
146
|
-
|
147
|
-
Logger.info 'Reading from STDIN'
|
148
|
-
app = DeadFinderRunner.new
|
149
|
-
while $stdin.gets
|
150
|
-
target = $LAST_READ_LINE.chomp
|
151
|
-
Logger.target "Checking: #{target}"
|
152
|
-
app.run target, options
|
40
|
+
def self.run_url(url, options)
|
41
|
+
DeadFinder::Logger.apply_options(options)
|
42
|
+
run_with_target(url, options)
|
153
43
|
end
|
154
|
-
gen_output(options)
|
155
|
-
end
|
156
|
-
|
157
|
-
def run_file(filename, options)
|
158
|
-
Logger.set_silent if options['silent']
|
159
44
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
45
|
+
def self.run_sitemap(sitemap_url, options)
|
46
|
+
DeadFinder::Logger.apply_options(options)
|
47
|
+
app = Runner.new
|
48
|
+
base_uri = URI(sitemap_url)
|
49
|
+
sitemap = SitemapParser.new(sitemap_url, recurse: true)
|
50
|
+
DeadFinder::Logger.info "Found #{sitemap.to_a.size} URLs from #{sitemap_url}"
|
51
|
+
sitemap.to_a.each do |url|
|
52
|
+
turl = generate_url(url, base_uri)
|
53
|
+
run_with_target(turl, options, app)
|
54
|
+
end
|
55
|
+
gen_output(options)
|
166
56
|
end
|
167
|
-
gen_output(options)
|
168
|
-
end
|
169
|
-
|
170
|
-
def run_url(url, options)
|
171
|
-
Logger.set_silent if options['silent']
|
172
57
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
58
|
+
def self.run_with_input(options)
|
59
|
+
DeadFinder::Logger.apply_options(options)
|
60
|
+
DeadFinder::Logger.info 'Reading input'
|
61
|
+
app = Runner.new
|
62
|
+
Array(yield).each do |target|
|
63
|
+
run_with_target(target, options, app)
|
64
|
+
end
|
65
|
+
gen_output(options)
|
66
|
+
end
|
178
67
|
|
179
|
-
def
|
180
|
-
|
181
|
-
|
182
|
-
app = DeadFinderRunner.new
|
183
|
-
base_uri = URI(sitemap_url)
|
184
|
-
sitemap = SitemapParser.new sitemap_url, { recurse: true }
|
185
|
-
sitemap.to_a.each do |url|
|
186
|
-
turl = generate_url url, base_uri
|
187
|
-
Logger.target "Checking: #{turl}"
|
188
|
-
app.run turl, options
|
68
|
+
def self.run_with_target(target, options, app = Runner.new)
|
69
|
+
DeadFinder::Logger.target "Fetching #{target}"
|
70
|
+
app.run(target, options)
|
189
71
|
end
|
190
|
-
gen_output(options)
|
191
|
-
end
|
192
72
|
|
193
|
-
def gen_output(options)
|
194
|
-
|
73
|
+
def self.gen_output(options)
|
74
|
+
return if options['output'].empty?
|
195
75
|
|
196
|
-
|
197
|
-
|
76
|
+
output_data = DeadFinder.output.to_h
|
77
|
+
format = options['output_format'].to_s.downcase
|
198
78
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
output_data
|
206
|
-
Array(urls).each do |url|
|
207
|
-
csv << [target, url]
|
208
|
-
end
|
209
|
-
end
|
79
|
+
content = case format
|
80
|
+
when 'yaml', 'yml'
|
81
|
+
output_data.to_yaml
|
82
|
+
when 'csv'
|
83
|
+
generate_csv(output_data)
|
84
|
+
else
|
85
|
+
JSON.pretty_generate(output_data)
|
210
86
|
end
|
211
|
-
else
|
212
|
-
JSON.pretty_generate(output_data)
|
213
|
-
end
|
214
87
|
|
215
|
-
|
216
|
-
end
|
217
|
-
|
218
|
-
class DeadFinder < Thor
|
219
|
-
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
220
|
-
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
221
|
-
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
222
|
-
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
223
|
-
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
224
|
-
class_option :headers, aliases: :H, default: [], type: :array,
|
225
|
-
desc: 'Custom HTTP headers to send with initial request'
|
226
|
-
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
227
|
-
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.0;)', type: :string,
|
228
|
-
desc: 'User-Agent string to use for requests'
|
229
|
-
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
230
|
-
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
231
|
-
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
232
|
-
|
233
|
-
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
234
|
-
def pipe
|
235
|
-
run_pipe options
|
236
|
-
end
|
237
|
-
|
238
|
-
desc 'file <FILE>', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
|
239
|
-
def file(filename)
|
240
|
-
run_file filename, options
|
241
|
-
end
|
242
|
-
|
243
|
-
desc 'url <URL>', 'Scan the Single URL.'
|
244
|
-
def url(url)
|
245
|
-
run_url url, options
|
246
|
-
end
|
247
|
-
|
248
|
-
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
249
|
-
def sitemap(sitemap)
|
250
|
-
run_sitemap sitemap, options
|
88
|
+
File.write(options['output'], content)
|
251
89
|
end
|
252
90
|
|
253
|
-
|
254
|
-
|
255
|
-
|
91
|
+
def self.generate_csv(output_data)
|
92
|
+
CSV.generate do |csv|
|
93
|
+
csv << %w[target url]
|
94
|
+
output_data.each do |target, urls|
|
95
|
+
Array(urls).each { |url| csv << [target, url] }
|
96
|
+
end
|
97
|
+
end
|
256
98
|
end
|
257
99
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-03-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: colorize
|
@@ -169,6 +169,20 @@ dependencies:
|
|
169
169
|
- - ">="
|
170
170
|
- !ruby/object:Gem::Version
|
171
171
|
version: 1.2.0
|
172
|
+
- !ruby/object:Gem::Dependency
|
173
|
+
name: rspec
|
174
|
+
requirement: !ruby/object:Gem::Requirement
|
175
|
+
requirements:
|
176
|
+
- - ">="
|
177
|
+
- !ruby/object:Gem::Version
|
178
|
+
version: '0'
|
179
|
+
type: :development
|
180
|
+
prerelease: false
|
181
|
+
version_requirements: !ruby/object:Gem::Requirement
|
182
|
+
requirements:
|
183
|
+
- - ">="
|
184
|
+
- !ruby/object:Gem::Version
|
185
|
+
version: '0'
|
172
186
|
description: Find dead-links (broken links). Dead link (broken link) means a link
|
173
187
|
within a web page that cannot be connected. These links can have a negative impact
|
174
188
|
to SEO and Security. This tool makes it easy to identify and modify.
|
@@ -180,10 +194,15 @@ extra_rdoc_files: []
|
|
180
194
|
files:
|
181
195
|
- bin/deadfinder
|
182
196
|
- lib/deadfinder.rb
|
197
|
+
- lib/deadfinder/cli.rb
|
198
|
+
- lib/deadfinder/completion.rb
|
199
|
+
- lib/deadfinder/http_client.rb
|
183
200
|
- lib/deadfinder/logger.rb
|
201
|
+
- lib/deadfinder/runner.rb
|
202
|
+
- lib/deadfinder/url_pattern_matcher.rb
|
184
203
|
- lib/deadfinder/utils.rb
|
185
204
|
- lib/deadfinder/version.rb
|
186
|
-
homepage: https://www.hahwul.com
|
205
|
+
homepage: https://www.hahwul.com/projects/deadfinder/
|
187
206
|
licenses:
|
188
207
|
- MIT
|
189
208
|
metadata:
|