deadfinder 1.6.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/deadfinder/cli.rb +73 -0
- data/lib/deadfinder/completion.rb +65 -0
- data/lib/deadfinder/http_client.rb +36 -0
- data/lib/deadfinder/logger.rb +85 -36
- data/lib/deadfinder/runner.rb +142 -0
- data/lib/deadfinder/url_pattern_matcher.rb +20 -0
- data/lib/deadfinder/version.rb +3 -1
- data/lib/deadfinder.rb +32 -185
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04f339906ad4505079153aea6164c57c48ef0d70540002cd49e9f56adddde355
|
4
|
+
data.tar.gz: 2483fc250e5de54e3cd4349c6a91cf305372c4acf7297fb286eb7a9140678d81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f2577176f086c4ad6406b1c60e5e82a4e711e83902fd4088424d993e67de5d0a17fd9fea6e40cd8a8808a5944738393d0fd8ea7d109f19ba939db13155fc9c9
|
7
|
+
data.tar.gz: beb4467d411b85e42b569a552321da6e9138fab877b4ebb2b1f8b852cd2e30b99e57a8bca8e12a5774fd83679c5a50bfdf3c3cdc21f66d716a38dd4e8833fdc7
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'thor'
|
4
|
+
require 'deadfinder'
|
5
|
+
require 'deadfinder/completion'
|
6
|
+
|
7
|
+
module DeadFinder
|
8
|
+
# CLI class for handling command-line interactions
|
9
|
+
class CLI < Thor
|
10
|
+
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
11
|
+
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
12
|
+
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
13
|
+
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
14
|
+
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
15
|
+
class_option :headers, aliases: :H, default: [], type: :array,
|
16
|
+
desc: 'Custom HTTP headers to send with initial request'
|
17
|
+
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
18
|
+
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.7.0;)', type: :string,
|
19
|
+
desc: 'User-Agent string to use for requests'
|
20
|
+
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
21
|
+
class_option :proxy_auth, default: '', type: :string, desc: 'Proxy server authentication credentials'
|
22
|
+
class_option :match, aliases: :m, default: '', type: :string, desc: 'Match the URL with the given pattern'
|
23
|
+
class_option :ignore, aliases: :i, default: '', type: :string, desc: 'Ignore the URL with the given pattern'
|
24
|
+
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
25
|
+
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
26
|
+
class_option :debug, default: false, type: :boolean, desc: 'Debug mode'
|
27
|
+
|
28
|
+
def self.exit_on_failure?
|
29
|
+
true
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
|
33
|
+
def pipe
|
34
|
+
DeadFinder.run_pipe options
|
35
|
+
end
|
36
|
+
|
37
|
+
desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
|
38
|
+
def file(filename)
|
39
|
+
DeadFinder.run_file filename, options
|
40
|
+
end
|
41
|
+
|
42
|
+
desc 'url <URL>', 'Scan the Single URL.'
|
43
|
+
def url(url)
|
44
|
+
DeadFinder.run_url url, options
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
48
|
+
def sitemap(sitemap)
|
49
|
+
DeadFinder.run_sitemap sitemap, options
|
50
|
+
end
|
51
|
+
|
52
|
+
desc 'completion <SHELL>', 'Generate completion script for shell.'
|
53
|
+
def completion(shell)
|
54
|
+
unless %w[bash zsh fish].include?(shell)
|
55
|
+
DeadFinder::Logger.error "Unsupported shell: #{shell}"
|
56
|
+
return
|
57
|
+
end
|
58
|
+
case shell
|
59
|
+
when 'bash'
|
60
|
+
puts DeadFinder::Completion.bash
|
61
|
+
when 'zsh'
|
62
|
+
puts DeadFinder::Completion.zsh
|
63
|
+
when 'fish'
|
64
|
+
puts DeadFinder::Completion.fish
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
desc 'version', 'Show version.'
|
69
|
+
def version
|
70
|
+
DeadFinder::Logger.info "deadfinder #{DeadFinder::VERSION}"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DeadFinder
|
4
|
+
# Module for completion script generation
|
5
|
+
module Completion
|
6
|
+
def self.bash
|
7
|
+
<<~BASH
|
8
|
+
_deadfinder_completions()
|
9
|
+
{
|
10
|
+
local cur prev opts
|
11
|
+
COMPREPLY=()
|
12
|
+
cur="${COMP_WORDS[COMP_CWORD]}"
|
13
|
+
opts="--include30x --concurrency --timeout --output --output_format --headers --worker_headers --user_agent --proxy --proxy_auth --match --ignore --silent --verbose --debug"
|
14
|
+
|
15
|
+
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
|
16
|
+
return 0
|
17
|
+
}
|
18
|
+
complete -F _deadfinder_completions deadfinder
|
19
|
+
BASH
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.zsh
|
23
|
+
<<~ZSH
|
24
|
+
#compdef deadfinder
|
25
|
+
|
26
|
+
_arguments \\
|
27
|
+
'--include30x[Include 30x redirections]' \\
|
28
|
+
'--concurrency[Number of concurrency]:number' \\
|
29
|
+
'--timeout[Timeout in seconds]:number' \\
|
30
|
+
'--output[File to write result]:file' \\
|
31
|
+
'--output_format[Output format]:string' \\
|
32
|
+
'--headers[Custom HTTP headers]:array' \\
|
33
|
+
'--worker_headers[Custom HTTP headers for workers]:array' \\
|
34
|
+
'--user_agent[User-Agent string]:string' \\
|
35
|
+
'--proxy[Proxy server]:string' \\
|
36
|
+
'--proxy_auth[Proxy server authentication]:string' \\
|
37
|
+
'--match[Match URL pattern]:string' \\
|
38
|
+
'--ignore[Ignore URL pattern]:string' \\
|
39
|
+
'--silent[Silent mode]' \\
|
40
|
+
'--verbose[Verbose mode]' \\
|
41
|
+
'--debug[Debug mode]'
|
42
|
+
ZSH
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.fish
|
46
|
+
<<~FISH
|
47
|
+
complete -c deadfinder -l include30x -d 'Include 30x redirections'
|
48
|
+
complete -c deadfinder -l concurrency -d 'Number of concurrency' -a '(seq 1 100)'
|
49
|
+
complete -c deadfinder -l timeout -d 'Timeout in seconds' -a '(seq 1 60)'
|
50
|
+
complete -c deadfinder -l output -d 'File to write result' -r
|
51
|
+
complete -c deadfinder -l output_format -d 'Output format' -r
|
52
|
+
complete -c deadfinder -l headers -d 'Custom HTTP headers' -r
|
53
|
+
complete -c deadfinder -l worker_headers -d 'Custom HTTP headers for workers' -r
|
54
|
+
complete -c deadfinder -l user_agent -d 'User-Agent string' -r
|
55
|
+
complete -c deadfinder -l proxy -d 'Proxy server' -r
|
56
|
+
complete -c deadfinder -l proxy_auth -d 'Proxy server authentication' -r
|
57
|
+
complete -c deadfinder -l match -d 'Match URL pattern' -r
|
58
|
+
complete -c deadfinder -l ignore -d 'Ignore URL pattern' -r
|
59
|
+
complete -c deadfinder -l silent -d 'Silent mode'
|
60
|
+
complete -c deadfinder -l verbose -d 'Verbose mode'
|
61
|
+
complete -c deadfinder -l debug -d 'Debug mode'
|
62
|
+
FISH
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'openssl'
|
5
|
+
|
6
|
+
module DeadFinder
|
7
|
+
# HTTP client module
|
8
|
+
module HttpClient
|
9
|
+
def self.create(uri, options)
|
10
|
+
begin
|
11
|
+
proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
|
12
|
+
rescue URI::InvalidURIError => e
|
13
|
+
DeadFinder::Logger.error "Invalid proxy URI: #{options['proxy']} - #{e.message}"
|
14
|
+
proxy_uri = nil # or handle the error as appropriate
|
15
|
+
end
|
16
|
+
http = if proxy_uri
|
17
|
+
Net::HTTP.new(uri.host, uri.port,
|
18
|
+
proxy_uri.host, proxy_uri.port,
|
19
|
+
proxy_uri.user, proxy_uri.password)
|
20
|
+
else
|
21
|
+
Net::HTTP.new(uri.host, uri.port)
|
22
|
+
end
|
23
|
+
http.use_ssl = (uri.scheme == 'https')
|
24
|
+
http.read_timeout = options['timeout'].to_i if options['timeout']
|
25
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
26
|
+
|
27
|
+
if options['proxy_auth'] && proxy_uri
|
28
|
+
proxy_user, proxy_pass = options['proxy_auth'].split(':', 2)
|
29
|
+
http.proxy_user = proxy_user
|
30
|
+
http.proxy_pass = proxy_pass
|
31
|
+
end
|
32
|
+
|
33
|
+
http
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/deadfinder/logger.rb
CHANGED
@@ -2,53 +2,102 @@
|
|
2
2
|
|
3
3
|
require 'colorize'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
module DeadFinder
|
6
|
+
class Logger
|
7
|
+
@silent = false
|
8
|
+
@verbose = false
|
9
|
+
@debug = false
|
10
|
+
@mutex = Mutex.new
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
+
def self.apply_options(options)
|
13
|
+
set_silent if options['silent']
|
14
|
+
set_verbose if options['verbose']
|
15
|
+
set_debug if options['debug']
|
16
|
+
end
|
12
17
|
|
13
|
-
|
14
|
-
|
15
|
-
|
18
|
+
def self.set_silent
|
19
|
+
@mutex.synchronize { @silent = true }
|
20
|
+
end
|
16
21
|
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
def self.set_verbose
|
23
|
+
@mutex.synchronize { @verbose = true }
|
24
|
+
end
|
20
25
|
|
21
|
-
|
22
|
-
|
26
|
+
def self.set_debug
|
27
|
+
@mutex.synchronize { @debug = true }
|
28
|
+
end
|
23
29
|
|
24
|
-
|
25
|
-
|
30
|
+
def self.unset_debug
|
31
|
+
@mutex.synchronize { @debug = false }
|
32
|
+
end
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
def self.unset_verbose
|
35
|
+
@mutex.synchronize { @verbose = false }
|
36
|
+
end
|
30
37
|
|
31
|
-
|
32
|
-
|
33
|
-
|
38
|
+
def self.debug?
|
39
|
+
@mutex.synchronize { @debug }
|
40
|
+
end
|
34
41
|
|
35
|
-
|
36
|
-
|
37
|
-
|
42
|
+
def self.verbose?
|
43
|
+
@mutex.synchronize { @verbose }
|
44
|
+
end
|
38
45
|
|
39
|
-
|
40
|
-
|
41
|
-
|
46
|
+
def self.unset_silent
|
47
|
+
@mutex.synchronize { @silent = false }
|
48
|
+
end
|
42
49
|
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
def self.silent?
|
51
|
+
@mutex.synchronize { @silent }
|
52
|
+
end
|
46
53
|
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
def self.log(prefix, text, color)
|
55
|
+
return if silent?
|
56
|
+
|
57
|
+
puts prefix.colorize(color) + text.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.sub_log(prefix, is_end, text, color)
|
61
|
+
return if silent?
|
62
|
+
|
63
|
+
indent = is_end ? ' └── ' : ' ├── '
|
64
|
+
puts indent.colorize(color) + prefix.colorize(color) + text.to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.debug(text)
|
68
|
+
log('❀ ', text, :yellow) if debug?
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.info(text)
|
72
|
+
log('ℹ ', text, :blue)
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.error(text)
|
76
|
+
log('⚠︎ ', text, :red)
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.target(text)
|
80
|
+
log('► ', text, :green)
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.sub_info(text)
|
84
|
+
log(' ● ', text, :blue)
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.sub_complete(text)
|
88
|
+
sub_log('● ', true, text, :blue)
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.found(text)
|
92
|
+
sub_log('✘ ', false, text, :red)
|
93
|
+
end
|
94
|
+
|
95
|
+
def self.verbose(text)
|
96
|
+
sub_log('➜ ', false, text, :yellow) if verbose?
|
97
|
+
end
|
50
98
|
|
51
|
-
|
52
|
-
|
99
|
+
def self.verbose_ok(text)
|
100
|
+
sub_log('✓ ', false, text, :green) if verbose?
|
101
|
+
end
|
53
102
|
end
|
54
103
|
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'concurrent-edge'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'net/http'
|
7
|
+
require 'openssl'
|
8
|
+
require 'deadfinder/logger'
|
9
|
+
require 'deadfinder/http_client'
|
10
|
+
require 'deadfinder/url_pattern_matcher'
|
11
|
+
|
12
|
+
module DeadFinder
|
13
|
+
# Runner class for executing the main logic
|
14
|
+
class Runner
|
15
|
+
def default_options
|
16
|
+
{
|
17
|
+
'concurrency' => 50,
|
18
|
+
'timeout' => 10,
|
19
|
+
'output' => '',
|
20
|
+
'output_format' => 'json',
|
21
|
+
'headers' => [],
|
22
|
+
'worker_headers' => [],
|
23
|
+
'silent' => true,
|
24
|
+
'verbose' => false,
|
25
|
+
'include30x' => false,
|
26
|
+
'proxy' => '',
|
27
|
+
'proxy_auth' => '',
|
28
|
+
'match' => '',
|
29
|
+
'ignore' => '',
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def run(target, options)
|
34
|
+
DeadFinder::Logger.apply_options(options)
|
35
|
+
headers = options['headers'].each_with_object({}) do |header, hash|
|
36
|
+
kv = header.split(': ')
|
37
|
+
hash[kv[0]] = kv[1]
|
38
|
+
rescue StandardError
|
39
|
+
end
|
40
|
+
page = Nokogiri::HTML(URI.open(target, headers))
|
41
|
+
links = extract_links(page)
|
42
|
+
|
43
|
+
DeadFinder::Logger.debug "#{CACHE_QUE.size} URLs in queue, #{CACHE_SET.size} URLs in cache"
|
44
|
+
|
45
|
+
if options['match'] != ''
|
46
|
+
begin
|
47
|
+
links.each do |type, urls|
|
48
|
+
links[type] = urls.select { |url| DeadFinder::UrlPatternMatcher.match?(url, options['match']) }
|
49
|
+
end
|
50
|
+
rescue RegexpError => e
|
51
|
+
DeadFinder::Logger.error "Invalid match pattern: #{e.message}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if options['ignore'] != ''
|
56
|
+
begin
|
57
|
+
links.each do |type, urls|
|
58
|
+
links[type] = urls.reject { |url| DeadFinder::UrlPatternMatcher.ignore?(url, options['ignore']) }
|
59
|
+
end
|
60
|
+
rescue RegexpError => e
|
61
|
+
DeadFinder::Logger.error "Invalid match pattern: #{e.message}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
total_links_count = links.values.flatten.length
|
66
|
+
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
|
67
|
+
.compact.join(' / ')
|
68
|
+
DeadFinder::Logger.sub_info "Discovered #{total_links_count} URLs, currently checking them. [#{link_info}]" unless link_info.empty?
|
69
|
+
|
70
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
71
|
+
results = Channel.new(buffer: :buffered, capacity: 1000)
|
72
|
+
|
73
|
+
(1..options['concurrency']).each do |w|
|
74
|
+
Channel.go { worker(w, jobs, results, target, options) }
|
75
|
+
end
|
76
|
+
|
77
|
+
links.values.flatten.uniq.each do |node|
|
78
|
+
result = generate_url(node, target)
|
79
|
+
jobs << result unless result.nil?
|
80
|
+
end
|
81
|
+
|
82
|
+
jobs_size = jobs.size
|
83
|
+
jobs.close
|
84
|
+
|
85
|
+
(1..jobs_size).each { ~results }
|
86
|
+
DeadFinder::Logger.sub_complete 'Task completed'
|
87
|
+
rescue StandardError => e
|
88
|
+
DeadFinder::Logger.error "[#{e}] #{target}"
|
89
|
+
end
|
90
|
+
|
91
|
+
def worker(_id, jobs, results, target, options)
|
92
|
+
jobs.each do |j|
|
93
|
+
if CACHE_SET[j]
|
94
|
+
# Skip if already cached
|
95
|
+
else
|
96
|
+
CACHE_SET[j] = true
|
97
|
+
begin
|
98
|
+
CACHE_QUE[j] = true
|
99
|
+
uri = URI.parse(j)
|
100
|
+
http = HttpClient.create(uri, options)
|
101
|
+
|
102
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
103
|
+
request['User-Agent'] = options['user_agent']
|
104
|
+
options['worker_headers']&.each do |header|
|
105
|
+
key, value = header.split(':', 2)
|
106
|
+
request[key.strip] = value.strip
|
107
|
+
end
|
108
|
+
|
109
|
+
response = http.request(request)
|
110
|
+
status_code = response.code.to_i
|
111
|
+
|
112
|
+
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
113
|
+
DeadFinder::Logger.found "[#{status_code}] #{j}"
|
114
|
+
CACHE_QUE[j] = false
|
115
|
+
DeadFinder.output[target] ||= []
|
116
|
+
DeadFinder.output[target] << j
|
117
|
+
else
|
118
|
+
DeadFinder::Logger.verbose_ok "[#{status_code}] #{j}" if options['verbose']
|
119
|
+
end
|
120
|
+
rescue StandardError => e
|
121
|
+
DeadFinder::Logger.verbose "[#{e}] #{j}" if options['verbose']
|
122
|
+
end
|
123
|
+
end
|
124
|
+
results << j
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def extract_links(page)
|
131
|
+
{
|
132
|
+
anchor: page.css('a').map { |element| element['href'] }.compact,
|
133
|
+
script: page.css('script').map { |element| element['src'] }.compact,
|
134
|
+
link: page.css('link').map { |element| element['href'] }.compact,
|
135
|
+
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
136
|
+
form: page.css('form').map { |element| element['action'] }.compact,
|
137
|
+
object: page.css('object').map { |element| element['data'] }.compact,
|
138
|
+
embed: page.css('embed').map { |element| element['src'] }.compact
|
139
|
+
}
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'timeout'
|
4
|
+
|
5
|
+
module DeadFinder
|
6
|
+
# URL pattern matcher module
|
7
|
+
module UrlPatternMatcher
|
8
|
+
def self.match?(url, pattern)
|
9
|
+
Timeout.timeout(1) { Regexp.new(pattern).match?(url) }
|
10
|
+
rescue Timeout::Error
|
11
|
+
false
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.ignore?(url, pattern)
|
15
|
+
Timeout.timeout(1) { Regexp.new(pattern).match?(url) }
|
16
|
+
rescue Timeout::Error
|
17
|
+
false
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -6,6 +6,8 @@ require 'open-uri'
|
|
6
6
|
require 'nokogiri'
|
7
7
|
require 'deadfinder/utils'
|
8
8
|
require 'deadfinder/logger'
|
9
|
+
require 'deadfinder/runner'
|
10
|
+
require 'deadfinder/cli'
|
9
11
|
require 'deadfinder/version'
|
10
12
|
require 'concurrent-edge'
|
11
13
|
require 'sitemap-parser'
|
@@ -27,165 +29,47 @@ module DeadFinder
|
|
27
29
|
@output = val
|
28
30
|
end
|
29
31
|
|
30
|
-
class Runner
|
31
|
-
def default_options
|
32
|
-
{
|
33
|
-
'concurrency' => 50,
|
34
|
-
'timeout' => 10,
|
35
|
-
'output' => '',
|
36
|
-
'output_format' => 'json',
|
37
|
-
'headers' => [],
|
38
|
-
'worker_headers' => [],
|
39
|
-
'silent' => true,
|
40
|
-
'verbose' => false,
|
41
|
-
'include30x' => false
|
42
|
-
}
|
43
|
-
end
|
44
|
-
|
45
|
-
def run(target, options)
|
46
|
-
Logger.set_silent if options['silent']
|
47
|
-
headers = options['headers'].each_with_object({}) do |header, hash|
|
48
|
-
kv = header.split(': ')
|
49
|
-
hash[kv[0]] = kv[1]
|
50
|
-
rescue StandardError
|
51
|
-
end
|
52
|
-
page = Nokogiri::HTML(URI.open(target, headers))
|
53
|
-
links = extract_links(page)
|
54
|
-
|
55
|
-
total_links_count = links.values.flatten.length
|
56
|
-
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
|
57
|
-
.compact.join(' / ')
|
58
|
-
Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
|
59
|
-
Logger.sub_info 'Checking'
|
60
|
-
|
61
|
-
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
62
|
-
results = Channel.new(buffer: :buffered, capacity: 1000)
|
63
|
-
|
64
|
-
(1..options['concurrency']).each do |w|
|
65
|
-
Channel.go { worker(w, jobs, results, target, options) }
|
66
|
-
end
|
67
|
-
|
68
|
-
links.values.flatten.uniq.each do |node|
|
69
|
-
result = generate_url(node, target)
|
70
|
-
jobs << result unless result.nil?
|
71
|
-
end
|
72
|
-
|
73
|
-
jobs_size = jobs.size
|
74
|
-
jobs.close
|
75
|
-
|
76
|
-
(1..jobs_size).each { ~results }
|
77
|
-
Logger.sub_done 'Done'
|
78
|
-
rescue StandardError => e
|
79
|
-
Logger.error "[#{e}] #{target}"
|
80
|
-
end
|
81
|
-
|
82
|
-
def worker(_id, jobs, results, target, options)
|
83
|
-
jobs.each do |j|
|
84
|
-
if CACHE_SET[j]
|
85
|
-
Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
|
86
|
-
else
|
87
|
-
CACHE_SET[j] = true
|
88
|
-
begin
|
89
|
-
CACHE_QUE[j] = true
|
90
|
-
uri = URI.parse(j)
|
91
|
-
proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
|
92
|
-
http = if proxy_uri
|
93
|
-
Net::HTTP.new(uri.host, uri.port,
|
94
|
-
proxy_uri.host, proxy_uri.port,
|
95
|
-
proxy_uri.user, proxy_uri.password)
|
96
|
-
else
|
97
|
-
Net::HTTP.new(uri.host, uri.port)
|
98
|
-
end
|
99
|
-
http.use_ssl = (uri.scheme == 'https')
|
100
|
-
http.read_timeout = options['timeout'].to_i if options['timeout']
|
101
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
102
|
-
|
103
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
104
|
-
request['User-Agent'] = options['user_agent']
|
105
|
-
options['worker_headers']&.each do |header|
|
106
|
-
key, value = header.split(':', 2)
|
107
|
-
request[key.strip] = value.strip
|
108
|
-
end
|
109
|
-
|
110
|
-
response = http.request(request)
|
111
|
-
status_code = response.code.to_i
|
112
|
-
Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
|
113
|
-
|
114
|
-
if status_code >= 400 || (status_code >= 300 && options['include30x'])
|
115
|
-
Logger.found "[#{status_code} #{response.message}] #{j}"
|
116
|
-
CACHE_QUE[j] = false
|
117
|
-
DeadFinder.output[target] ||= []
|
118
|
-
DeadFinder.output[target] << j
|
119
|
-
end
|
120
|
-
rescue StandardError => e
|
121
|
-
Logger.verbose "[#{e}] #{j}" if options['verbose']
|
122
|
-
end
|
123
|
-
end
|
124
|
-
results << j
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
private
|
129
|
-
|
130
|
-
def extract_links(page)
|
131
|
-
{
|
132
|
-
anchor: page.css('a').map { |element| element['href'] }.compact,
|
133
|
-
script: page.css('script').map { |element| element['src'] }.compact,
|
134
|
-
link: page.css('link').map { |element| element['href'] }.compact,
|
135
|
-
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
136
|
-
form: page.css('form').map { |element| element['action'] }.compact,
|
137
|
-
object: page.css('object').map { |element| element['data'] }.compact,
|
138
|
-
embed: page.css('embed').map { |element| element['src'] }.compact
|
139
|
-
}
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
32
|
def self.run_pipe(options)
|
144
|
-
|
145
|
-
Logger.info 'Reading from STDIN'
|
146
|
-
app = Runner.new
|
147
|
-
while $stdin.gets
|
148
|
-
target = $LAST_READ_LINE.chomp
|
149
|
-
Logger.target "Checking: #{target}"
|
150
|
-
app.run target, options
|
151
|
-
end
|
152
|
-
gen_output(options)
|
33
|
+
run_with_input(options) { $stdin.gets&.chomp }
|
153
34
|
end
|
154
35
|
|
155
36
|
def self.run_file(filename, options)
|
156
|
-
|
157
|
-
Logger.info "Reading: #{filename}"
|
158
|
-
app = Runner.new
|
159
|
-
File.foreach(filename) do |line|
|
160
|
-
target = line.chomp
|
161
|
-
Logger.target "Checking: #{target}"
|
162
|
-
app.run target, options
|
163
|
-
end
|
164
|
-
gen_output(options)
|
37
|
+
run_with_input(options) { File.foreach(filename).map(&:chomp) }
|
165
38
|
end
|
166
39
|
|
167
40
|
def self.run_url(url, options)
|
168
|
-
Logger.
|
169
|
-
|
170
|
-
app = Runner.new
|
171
|
-
app.run url, options
|
172
|
-
gen_output(options)
|
41
|
+
DeadFinder::Logger.apply_options(options)
|
42
|
+
run_with_target(url, options)
|
173
43
|
end
|
174
44
|
|
175
45
|
def self.run_sitemap(sitemap_url, options)
|
176
|
-
Logger.
|
177
|
-
Logger.info "Parsing sitemap: #{sitemap_url}"
|
46
|
+
DeadFinder::Logger.apply_options(options)
|
178
47
|
app = Runner.new
|
179
48
|
base_uri = URI(sitemap_url)
|
180
|
-
sitemap = SitemapParser.new
|
49
|
+
sitemap = SitemapParser.new(sitemap_url, recurse: true)
|
50
|
+
DeadFinder::Logger.info "Found #{sitemap.to_a.size} URLs from #{sitemap_url}"
|
181
51
|
sitemap.to_a.each do |url|
|
182
52
|
turl = generate_url(url, base_uri)
|
183
|
-
|
184
|
-
|
53
|
+
run_with_target(turl, options, app)
|
54
|
+
end
|
55
|
+
gen_output(options)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.run_with_input(options)
|
59
|
+
DeadFinder::Logger.apply_options(options)
|
60
|
+
DeadFinder::Logger.info 'Reading input'
|
61
|
+
app = Runner.new
|
62
|
+
Array(yield).each do |target|
|
63
|
+
run_with_target(target, options, app)
|
185
64
|
end
|
186
65
|
gen_output(options)
|
187
66
|
end
|
188
67
|
|
68
|
+
def self.run_with_target(target, options, app = Runner.new)
|
69
|
+
DeadFinder::Logger.target "Fetching #{target}"
|
70
|
+
app.run(target, options)
|
71
|
+
end
|
72
|
+
|
189
73
|
def self.gen_output(options)
|
190
74
|
return if options['output'].empty?
|
191
75
|
|
@@ -196,12 +80,7 @@ module DeadFinder
|
|
196
80
|
when 'yaml', 'yml'
|
197
81
|
output_data.to_yaml
|
198
82
|
when 'csv'
|
199
|
-
|
200
|
-
csv << %w[target url]
|
201
|
-
output_data.each do |target, urls|
|
202
|
-
Array(urls).each { |url| csv << [target, url] }
|
203
|
-
end
|
204
|
-
end
|
83
|
+
generate_csv(output_data)
|
205
84
|
else
|
206
85
|
JSON.pretty_generate(output_data)
|
207
86
|
end
|
@@ -209,44 +88,12 @@ module DeadFinder
|
|
209
88
|
File.write(options['output'], content)
|
210
89
|
end
|
211
90
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
class_option :headers, aliases: :H, default: [], type: :array,
|
219
|
-
desc: 'Custom HTTP headers to send with initial request'
|
220
|
-
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
221
|
-
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.1;)', type: :string,
|
222
|
-
desc: 'User-Agent string to use for requests'
|
223
|
-
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
224
|
-
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
225
|
-
class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
|
226
|
-
|
227
|
-
desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
|
228
|
-
def pipe
|
229
|
-
DeadFinder.run_pipe options
|
230
|
-
end
|
231
|
-
|
232
|
-
desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
|
233
|
-
def file(filename)
|
234
|
-
DeadFinder.run_file filename, options
|
235
|
-
end
|
236
|
-
|
237
|
-
desc 'url <URL>', 'Scan the Single URL.'
|
238
|
-
def url(url)
|
239
|
-
DeadFinder.run_url url, options
|
240
|
-
end
|
241
|
-
|
242
|
-
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
243
|
-
def sitemap(sitemap)
|
244
|
-
DeadFinder.run_sitemap sitemap, options
|
245
|
-
end
|
246
|
-
|
247
|
-
desc 'version', 'Show version.'
|
248
|
-
def version
|
249
|
-
Logger.info "deadfinder #{VERSION}"
|
91
|
+
def self.generate_csv(output_data)
|
92
|
+
CSV.generate do |csv|
|
93
|
+
csv << %w[target url]
|
94
|
+
output_data.each do |target, urls|
|
95
|
+
Array(urls).each { |url| csv << [target, url] }
|
96
|
+
end
|
250
97
|
end
|
251
98
|
end
|
252
99
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-03-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: colorize
|
@@ -194,7 +194,12 @@ extra_rdoc_files: []
|
|
194
194
|
files:
|
195
195
|
- bin/deadfinder
|
196
196
|
- lib/deadfinder.rb
|
197
|
+
- lib/deadfinder/cli.rb
|
198
|
+
- lib/deadfinder/completion.rb
|
199
|
+
- lib/deadfinder/http_client.rb
|
197
200
|
- lib/deadfinder/logger.rb
|
201
|
+
- lib/deadfinder/runner.rb
|
202
|
+
- lib/deadfinder/url_pattern_matcher.rb
|
198
203
|
- lib/deadfinder/utils.rb
|
199
204
|
- lib/deadfinder/version.rb
|
200
205
|
homepage: https://www.hahwul.com/projects/deadfinder/
|