deadfinder 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a9dba525c92f09cb14019fe890f73dc85eaa975179ac19efca534f0fdc76b14
4
- data.tar.gz: fee3168a2912dacd9a2740b1aaf2c495b0b68b48b4d024c208523d8a8a5bb5e1
3
+ metadata.gz: 04f339906ad4505079153aea6164c57c48ef0d70540002cd49e9f56adddde355
4
+ data.tar.gz: 2483fc250e5de54e3cd4349c6a91cf305372c4acf7297fb286eb7a9140678d81
5
5
  SHA512:
6
- metadata.gz: be806719c1ed5ca905885222d485f376c7e1a61dff0388f812e05d52e279e21315af08fdcfb5b66e9773c88be2f1a5c8c434eba62e6340f91db76ea29f4643db
7
- data.tar.gz: d966f9237d2a4f62ba98684346b045c49bd14b024723eda22ac5f0c9c2fb0e62fc358d6607d924c0cdd80cd10e8e5f4eb37a13a067188d8394d142fe0476001b
6
+ metadata.gz: 0f2577176f086c4ad6406b1c60e5e82a4e711e83902fd4088424d993e67de5d0a17fd9fea6e40cd8a8808a5944738393d0fd8ea7d109f19ba939db13155fc9c9
7
+ data.tar.gz: beb4467d411b85e42b569a552321da6e9138fab877b4ebb2b1f8b852cd2e30b99e57a8bca8e12a5774fd83679c5a50bfdf3c3cdc21f66d716a38dd4e8833fdc7
data/bin/deadfinder CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'deadfinder'
4
- DeadFinder.start(ARGV)
4
+ DeadFinder::CLI.start(ARGV)
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thor'
4
+ require 'deadfinder'
5
+ require 'deadfinder/completion'
6
+
7
+ module DeadFinder
8
+ # CLI class for handling command-line interactions
9
+ class CLI < Thor
10
+ class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
11
+ class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
12
+ class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
13
+ class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
14
+ class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
15
+ class_option :headers, aliases: :H, default: [], type: :array,
16
+ desc: 'Custom HTTP headers to send with initial request'
17
+ class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
18
+ class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.7.0;)', type: :string,
19
+ desc: 'User-Agent string to use for requests'
20
+ class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
21
+ class_option :proxy_auth, default: '', type: :string, desc: 'Proxy server authentication credentials'
22
+ class_option :match, aliases: :m, default: '', type: :string, desc: 'Match the URL with the given pattern'
23
+ class_option :ignore, aliases: :i, default: '', type: :string, desc: 'Ignore the URL with the given pattern'
24
+ class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
25
+ class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
26
+ class_option :debug, default: false, type: :boolean, desc: 'Debug mode'
27
+
28
+ def self.exit_on_failure?
29
+ true
30
+ end
31
+
32
+ desc 'pipe', 'Scan the URLs from STDIN. (e.g., cat urls.txt | deadfinder pipe)'
33
+ def pipe
34
+ DeadFinder.run_pipe options
35
+ end
36
+
37
+ desc 'file <FILE>', 'Scan the URLs from File. (e.g., deadfinder file urls.txt)'
38
+ def file(filename)
39
+ DeadFinder.run_file filename, options
40
+ end
41
+
42
+ desc 'url <URL>', 'Scan the Single URL.'
43
+ def url(url)
44
+ DeadFinder.run_url url, options
45
+ end
46
+
47
+ desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
48
+ def sitemap(sitemap)
49
+ DeadFinder.run_sitemap sitemap, options
50
+ end
51
+
52
+ desc 'completion <SHELL>', 'Generate completion script for shell.'
53
+ def completion(shell)
54
+ unless %w[bash zsh fish].include?(shell)
55
+ DeadFinder::Logger.error "Unsupported shell: #{shell}"
56
+ return
57
+ end
58
+ case shell
59
+ when 'bash'
60
+ puts DeadFinder::Completion.bash
61
+ when 'zsh'
62
+ puts DeadFinder::Completion.zsh
63
+ when 'fish'
64
+ puts DeadFinder::Completion.fish
65
+ end
66
+ end
67
+
68
+ desc 'version', 'Show version.'
69
+ def version
70
+ DeadFinder::Logger.info "deadfinder #{DeadFinder::VERSION}"
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadFinder
4
+ # Module for completion script generation
5
+ module Completion
6
+ def self.bash
7
+ <<~BASH
8
+ _deadfinder_completions()
9
+ {
10
+ local cur prev opts
11
+ COMPREPLY=()
12
+ cur="${COMP_WORDS[COMP_CWORD]}"
13
+ opts="--include30x --concurrency --timeout --output --output_format --headers --worker_headers --user_agent --proxy --proxy_auth --match --ignore --silent --verbose --debug"
14
+
15
+ COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
16
+ return 0
17
+ }
18
+ complete -F _deadfinder_completions deadfinder
19
+ BASH
20
+ end
21
+
22
+ def self.zsh
23
+ <<~ZSH
24
+ #compdef deadfinder
25
+
26
+ _arguments \\
27
+ '--include30x[Include 30x redirections]' \\
28
+ '--concurrency[Number of concurrency]:number' \\
29
+ '--timeout[Timeout in seconds]:number' \\
30
+ '--output[File to write result]:file' \\
31
+ '--output_format[Output format]:string' \\
32
+ '--headers[Custom HTTP headers]:array' \\
33
+ '--worker_headers[Custom HTTP headers for workers]:array' \\
34
+ '--user_agent[User-Agent string]:string' \\
35
+ '--proxy[Proxy server]:string' \\
36
+ '--proxy_auth[Proxy server authentication]:string' \\
37
+ '--match[Match URL pattern]:string' \\
38
+ '--ignore[Ignore URL pattern]:string' \\
39
+ '--silent[Silent mode]' \\
40
+ '--verbose[Verbose mode]' \\
41
+ '--debug[Debug mode]'
42
+ ZSH
43
+ end
44
+
45
+ def self.fish
46
+ <<~FISH
47
+ complete -c deadfinder -l include30x -d 'Include 30x redirections'
48
+ complete -c deadfinder -l concurrency -d 'Number of concurrency' -a '(seq 1 100)'
49
+ complete -c deadfinder -l timeout -d 'Timeout in seconds' -a '(seq 1 60)'
50
+ complete -c deadfinder -l output -d 'File to write result' -r
51
+ complete -c deadfinder -l output_format -d 'Output format' -r
52
+ complete -c deadfinder -l headers -d 'Custom HTTP headers' -r
53
+ complete -c deadfinder -l worker_headers -d 'Custom HTTP headers for workers' -r
54
+ complete -c deadfinder -l user_agent -d 'User-Agent string' -r
55
+ complete -c deadfinder -l proxy -d 'Proxy server' -r
56
+ complete -c deadfinder -l proxy_auth -d 'Proxy server authentication' -r
57
+ complete -c deadfinder -l match -d 'Match URL pattern' -r
58
+ complete -c deadfinder -l ignore -d 'Ignore URL pattern' -r
59
+ complete -c deadfinder -l silent -d 'Silent mode'
60
+ complete -c deadfinder -l verbose -d 'Verbose mode'
61
+ complete -c deadfinder -l debug -d 'Debug mode'
62
+ FISH
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'openssl'
5
+
6
+ module DeadFinder
7
+ # HTTP client module
8
+ module HttpClient
9
+ def self.create(uri, options)
10
+ begin
11
+ proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
12
+ rescue URI::InvalidURIError => e
13
+ DeadFinder::Logger.error "Invalid proxy URI: #{options['proxy']} - #{e.message}"
14
+ proxy_uri = nil # or handle the error as appropriate
15
+ end
16
+ http = if proxy_uri
17
+ Net::HTTP.new(uri.host, uri.port,
18
+ proxy_uri.host, proxy_uri.port,
19
+ proxy_uri.user, proxy_uri.password)
20
+ else
21
+ Net::HTTP.new(uri.host, uri.port)
22
+ end
23
+ http.use_ssl = (uri.scheme == 'https')
24
+ http.read_timeout = options['timeout'].to_i if options['timeout']
25
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
26
+
27
+ if options['proxy_auth'] && proxy_uri
28
+ proxy_user, proxy_pass = options['proxy_auth'].split(':', 2)
29
+ http.proxy_user = proxy_user
30
+ http.proxy_pass = proxy_pass
31
+ end
32
+
33
+ http
34
+ end
35
+ end
36
+ end
@@ -2,53 +2,102 @@
2
2
 
3
3
  require 'colorize'
4
4
 
5
- class Logger
6
- @silent = false
7
- @mutex = Mutex.new
5
+ module DeadFinder
6
+ class Logger
7
+ @silent = false
8
+ @verbose = false
9
+ @debug = false
10
+ @mutex = Mutex.new
8
11
 
9
- def self.set_silent
10
- @mutex.synchronize { @silent = true }
11
- end
12
+ def self.apply_options(options)
13
+ set_silent if options['silent']
14
+ set_verbose if options['verbose']
15
+ set_debug if options['debug']
16
+ end
12
17
 
13
- def self.unset_silent
14
- @mutex.synchronize { @silent = false }
15
- end
18
+ def self.set_silent
19
+ @mutex.synchronize { @silent = true }
20
+ end
16
21
 
17
- def self.silent?
18
- @mutex.synchronize { @silent }
19
- end
22
+ def self.set_verbose
23
+ @mutex.synchronize { @verbose = true }
24
+ end
20
25
 
21
- def self.log(prefix, text, color)
22
- return if silent?
26
+ def self.set_debug
27
+ @mutex.synchronize { @debug = true }
28
+ end
23
29
 
24
- puts prefix.colorize(color) + text.to_s.colorize(:"light_#{color}")
25
- end
30
+ def self.unset_debug
31
+ @mutex.synchronize { @debug = false }
32
+ end
26
33
 
27
- def self.info(text)
28
- log('ℹ ', text, :blue)
29
- end
34
+ def self.unset_verbose
35
+ @mutex.synchronize { @verbose = false }
36
+ end
30
37
 
31
- def self.error(text)
32
- log('⚠︎ ', text, :red)
33
- end
38
+ def self.debug?
39
+ @mutex.synchronize { @debug }
40
+ end
34
41
 
35
- def self.target(text)
36
- log('► ', text, :green)
37
- end
42
+ def self.verbose?
43
+ @mutex.synchronize { @verbose }
44
+ end
38
45
 
39
- def self.sub_info(text)
40
- log(' ● ', text, :blue)
41
- end
46
+ def self.unset_silent
47
+ @mutex.synchronize { @silent = false }
48
+ end
42
49
 
43
- def self.sub_done(text)
44
- log(' ✓ ', text, :blue)
45
- end
50
+ def self.silent?
51
+ @mutex.synchronize { @silent }
52
+ end
46
53
 
47
- def self.found(text)
48
- log(' ✘ ', text, :red)
49
- end
54
+ def self.log(prefix, text, color)
55
+ return if silent?
56
+
57
+ puts prefix.colorize(color) + text.to_s
58
+ end
59
+
60
+ def self.sub_log(prefix, is_end, text, color)
61
+ return if silent?
62
+
63
+ indent = is_end ? ' └── ' : ' ├── '
64
+ puts indent.colorize(color) + prefix.colorize(color) + text.to_s
65
+ end
66
+
67
+ def self.debug(text)
68
+ log('❀ ', text, :yellow) if debug?
69
+ end
70
+
71
+ def self.info(text)
72
+ log('ℹ ', text, :blue)
73
+ end
74
+
75
+ def self.error(text)
76
+ log('⚠︎ ', text, :red)
77
+ end
78
+
79
+ def self.target(text)
80
+ log('► ', text, :green)
81
+ end
82
+
83
+ def self.sub_info(text)
84
+ log(' ● ', text, :blue)
85
+ end
86
+
87
+ def self.sub_complete(text)
88
+ sub_log('● ', true, text, :blue)
89
+ end
90
+
91
+ def self.found(text)
92
+ sub_log('✘ ', false, text, :red)
93
+ end
94
+
95
+ def self.verbose(text)
96
+ sub_log('➜ ', false, text, :yellow) if verbose?
97
+ end
50
98
 
51
- def self.verbose(text)
52
- log(' ', text, :yellow)
99
+ def self.verbose_ok(text)
100
+ sub_log(' ', false, text, :green) if verbose?
101
+ end
53
102
  end
54
103
  end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'concurrent-edge'
4
+ require 'nokogiri'
5
+ require 'open-uri'
6
+ require 'net/http'
7
+ require 'openssl'
8
+ require 'deadfinder/logger'
9
+ require 'deadfinder/http_client'
10
+ require 'deadfinder/url_pattern_matcher'
11
+
12
+ module DeadFinder
13
+ # Runner class for executing the main logic
14
+ class Runner
15
+ def default_options
16
+ {
17
+ 'concurrency' => 50,
18
+ 'timeout' => 10,
19
+ 'output' => '',
20
+ 'output_format' => 'json',
21
+ 'headers' => [],
22
+ 'worker_headers' => [],
23
+ 'silent' => true,
24
+ 'verbose' => false,
25
+ 'include30x' => false,
26
+ 'proxy' => '',
27
+ 'proxy_auth' => '',
28
+ 'match' => '',
29
+ 'ignore' => '',
30
+ }
31
+ end
32
+
33
+ def run(target, options)
34
+ DeadFinder::Logger.apply_options(options)
35
+ headers = options['headers'].each_with_object({}) do |header, hash|
36
+ kv = header.split(': ')
37
+ hash[kv[0]] = kv[1]
38
+ rescue StandardError
39
+ end
40
+ page = Nokogiri::HTML(URI.open(target, headers))
41
+ links = extract_links(page)
42
+
43
+ DeadFinder::Logger.debug "#{CACHE_QUE.size} URLs in queue, #{CACHE_SET.size} URLs in cache"
44
+
45
+ if options['match'] != ''
46
+ begin
47
+ links.each do |type, urls|
48
+ links[type] = urls.select { |url| DeadFinder::UrlPatternMatcher.match?(url, options['match']) }
49
+ end
50
+ rescue RegexpError => e
51
+ DeadFinder::Logger.error "Invalid match pattern: #{e.message}"
52
+ end
53
+ end
54
+
55
+ if options['ignore'] != ''
56
+ begin
57
+ links.each do |type, urls|
58
+ links[type] = urls.reject { |url| DeadFinder::UrlPatternMatcher.ignore?(url, options['ignore']) }
59
+ end
60
+ rescue RegexpError => e
61
+ DeadFinder::Logger.error "Invalid match pattern: #{e.message}"
62
+ end
63
+ end
64
+
65
+ total_links_count = links.values.flatten.length
66
+ link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }
67
+ .compact.join(' / ')
68
+ DeadFinder::Logger.sub_info "Discovered #{total_links_count} URLs, currently checking them. [#{link_info}]" unless link_info.empty?
69
+
70
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
71
+ results = Channel.new(buffer: :buffered, capacity: 1000)
72
+
73
+ (1..options['concurrency']).each do |w|
74
+ Channel.go { worker(w, jobs, results, target, options) }
75
+ end
76
+
77
+ links.values.flatten.uniq.each do |node|
78
+ result = generate_url(node, target)
79
+ jobs << result unless result.nil?
80
+ end
81
+
82
+ jobs_size = jobs.size
83
+ jobs.close
84
+
85
+ (1..jobs_size).each { ~results }
86
+ DeadFinder::Logger.sub_complete 'Task completed'
87
+ rescue StandardError => e
88
+ DeadFinder::Logger.error "[#{e}] #{target}"
89
+ end
90
+
91
+ def worker(_id, jobs, results, target, options)
92
+ jobs.each do |j|
93
+ if CACHE_SET[j]
94
+ # Skip if already cached
95
+ else
96
+ CACHE_SET[j] = true
97
+ begin
98
+ CACHE_QUE[j] = true
99
+ uri = URI.parse(j)
100
+ http = HttpClient.create(uri, options)
101
+
102
+ request = Net::HTTP::Get.new(uri.request_uri)
103
+ request['User-Agent'] = options['user_agent']
104
+ options['worker_headers']&.each do |header|
105
+ key, value = header.split(':', 2)
106
+ request[key.strip] = value.strip
107
+ end
108
+
109
+ response = http.request(request)
110
+ status_code = response.code.to_i
111
+
112
+ if status_code >= 400 || (status_code >= 300 && options['include30x'])
113
+ DeadFinder::Logger.found "[#{status_code}] #{j}"
114
+ CACHE_QUE[j] = false
115
+ DeadFinder.output[target] ||= []
116
+ DeadFinder.output[target] << j
117
+ else
118
+ DeadFinder::Logger.verbose_ok "[#{status_code}] #{j}" if options['verbose']
119
+ end
120
+ rescue StandardError => e
121
+ DeadFinder::Logger.verbose "[#{e}] #{j}" if options['verbose']
122
+ end
123
+ end
124
+ results << j
125
+ end
126
+ end
127
+
128
+ private
129
+
130
+ def extract_links(page)
131
+ {
132
+ anchor: page.css('a').map { |element| element['href'] }.compact,
133
+ script: page.css('script').map { |element| element['src'] }.compact,
134
+ link: page.css('link').map { |element| element['href'] }.compact,
135
+ iframe: page.css('iframe').map { |element| element['src'] }.compact,
136
+ form: page.css('form').map { |element| element['action'] }.compact,
137
+ object: page.css('object').map { |element| element['data'] }.compact,
138
+ embed: page.css('embed').map { |element| element['src'] }.compact
139
+ }
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'timeout'
4
+
5
+ module DeadFinder
6
+ # URL pattern matcher module
7
+ module UrlPatternMatcher
8
+ def self.match?(url, pattern)
9
+ Timeout.timeout(1) { Regexp.new(pattern).match?(url) }
10
+ rescue Timeout::Error
11
+ false
12
+ end
13
+
14
+ def self.ignore?(url, pattern)
15
+ Timeout.timeout(1) { Regexp.new(pattern).match?(url) }
16
+ rescue Timeout::Error
17
+ false
18
+ end
19
+ end
20
+ end
@@ -1,3 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- VERSION = '1.6.0'
3
+ module DeadFinder
4
+ VERSION = '1.7.0'
5
+ end
data/lib/deadfinder.rb CHANGED
@@ -6,6 +6,8 @@ require 'open-uri'
6
6
  require 'nokogiri'
7
7
  require 'deadfinder/utils'
8
8
  require 'deadfinder/logger'
9
+ require 'deadfinder/runner'
10
+ require 'deadfinder/cli'
9
11
  require 'deadfinder/version'
10
12
  require 'concurrent-edge'
11
13
  require 'sitemap-parser'
@@ -13,245 +15,85 @@ require 'json'
13
15
  require 'yaml'
14
16
  require 'csv'
15
17
 
16
- Channel = Concurrent::Channel
17
- CACHE_SET = Concurrent::Map.new
18
- CACHE_QUE = Concurrent::Map.new
19
- OUTPUT = {}
18
+ module DeadFinder
19
+ Channel = Concurrent::Channel
20
+ CACHE_SET = Concurrent::Map.new
21
+ CACHE_QUE = Concurrent::Map.new
20
22
 
21
- class DeadFinderRunner
22
- def default_options
23
- {
24
- 'concurrency' => 50,
25
- 'timeout' => 10,
26
- 'output' => '',
27
- 'output_format' => 'json',
28
- 'headers' => [],
29
- 'worker_headers' => [],
30
- 'silent' => true,
31
- 'verbose' => false,
32
- 'include30x' => false
33
- }
23
+ @output = {}
24
+ def self.output
25
+ @output
34
26
  end
35
27
 
36
- def run(target, options)
37
- Logger.set_silent if options['silent']
38
- headers = options['headers'].each_with_object({}) do |header, hash|
39
- kv = header.split(': ')
40
- hash[kv[0]] = kv[1]
41
- rescue StandardError
42
- end
43
- page = Nokogiri::HTML(URI.open(target, headers))
44
- links = extract_links(page)
45
-
46
- total_links_count = links.values.flatten.length
47
- # Generate link info string for non-empty link types
48
- link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
49
-
50
- # Log the information if there are any links
51
- Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
52
- Logger.sub_info 'Checking'
53
-
54
- jobs = Channel.new(buffer: :buffered, capacity: 1000)
55
- results = Channel.new(buffer: :buffered, capacity: 1000)
56
-
57
- (1..options['concurrency']).each do |w|
58
- Channel.go { worker(w, jobs, results, target, options) }
59
- end
60
-
61
- links.values.flatten.uniq.each do |node|
62
- result = generate_url(node, target)
63
- jobs << result unless result.nil?
64
- end
65
-
66
- jobs_size = jobs.size
67
- jobs.close
68
-
69
- (1..jobs_size).each do
70
- ~results
71
- end
72
- Logger.sub_done 'Done'
73
- rescue StandardError => e
74
- Logger.error "[#{e}] #{target}"
28
+ def self.output=(val)
29
+ @output = val
75
30
  end
76
31
 
77
- def worker(_id, jobs, results, target, options)
78
- jobs.each do |j|
79
- if CACHE_SET[j]
80
- Logger.found "[404 Not Found] #{j}" unless CACHE_QUE[j]
81
- else
82
- CACHE_SET[j] = true
83
- begin
84
- CACHE_QUE[j] = true
85
- uri = URI.parse(j)
86
-
87
- # Create HTTP request with timeout and headers
88
- proxy_uri = URI.parse(options['proxy']) if options['proxy'] && !options['proxy'].empty?
89
- http = if proxy_uri
90
- Net::HTTP.new(uri.host, uri.port, proxy_uri.host, proxy_uri.port, proxy_uri.user, proxy_uri.password)
91
- else
92
- Net::HTTP.new(uri.host, uri.port)
93
- end
94
- http.use_ssl = (uri.scheme == 'https')
95
- http.read_timeout = options['timeout'].to_i if options['timeout']
96
-
97
- # Set SSL verification mode
98
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
99
-
100
- request = Net::HTTP::Get.new(uri.request_uri)
101
-
102
- # Add User-Agent header
103
- request['User-Agent'] = options['user_agent']
104
-
105
- # Add worker headers if provided
106
- options['worker_headers']&.each do |header|
107
- key, value = header.split(':', 2)
108
- request[key.strip] = value.strip
109
- end
110
-
111
- response = http.request(request)
112
- status_code = response.code.to_i
113
- Logger.verbose "Status Code: #{status_code} for #{j}" if options['verbose']
114
-
115
- if status_code >= 400 || (status_code >= 300 && options['include30x'])
116
- Logger.found "[#{status_code} #{response.message}] #{j}"
117
- CACHE_QUE[j] = false
118
- OUTPUT[target] ||= []
119
- OUTPUT[target] << j
120
- end
121
- rescue StandardError => e
122
- Logger.verbose "[#{e}] #{j}" if options['verbose']
123
- end
124
- end
125
- results << j
126
- end
32
+ def self.run_pipe(options)
33
+ run_with_input(options) { $stdin.gets&.chomp }
127
34
  end
128
35
 
129
- private
130
-
131
- def extract_links(page)
132
- {
133
- anchor: page.css('a').map { |element| element['href'] }.compact,
134
- script: page.css('script').map { |element| element['src'] }.compact,
135
- link: page.css('link').map { |element| element['href'] }.compact,
136
- iframe: page.css('iframe').map { |element| element['src'] }.compact,
137
- form: page.css('form').map { |element| element['action'] }.compact,
138
- object: page.css('object').map { |element| element['data'] }.compact,
139
- embed: page.css('embed').map { |element| element['src'] }.compact
140
- }
36
+ def self.run_file(filename, options)
37
+ run_with_input(options) { File.foreach(filename).map(&:chomp) }
141
38
  end
142
- end
143
39
 
144
- def run_pipe(options)
145
- Logger.set_silent if options['silent']
146
-
147
- Logger.info 'Reading from STDIN'
148
- app = DeadFinderRunner.new
149
- while $stdin.gets
150
- target = $LAST_READ_LINE.chomp
151
- Logger.target "Checking: #{target}"
152
- app.run target, options
40
+ def self.run_url(url, options)
41
+ DeadFinder::Logger.apply_options(options)
42
+ run_with_target(url, options)
153
43
  end
154
- gen_output(options)
155
- end
156
-
157
- def run_file(filename, options)
158
- Logger.set_silent if options['silent']
159
44
 
160
- Logger.info "Reading: #{filename}"
161
- app = DeadFinderRunner.new
162
- File.foreach(filename) do |line|
163
- target = line.chomp
164
- Logger.target "Checking: #{target}"
165
- app.run target, options
45
+ def self.run_sitemap(sitemap_url, options)
46
+ DeadFinder::Logger.apply_options(options)
47
+ app = Runner.new
48
+ base_uri = URI(sitemap_url)
49
+ sitemap = SitemapParser.new(sitemap_url, recurse: true)
50
+ DeadFinder::Logger.info "Found #{sitemap.to_a.size} URLs from #{sitemap_url}"
51
+ sitemap.to_a.each do |url|
52
+ turl = generate_url(url, base_uri)
53
+ run_with_target(turl, options, app)
54
+ end
55
+ gen_output(options)
166
56
  end
167
- gen_output(options)
168
- end
169
-
170
- def run_url(url, options)
171
- Logger.set_silent if options['silent']
172
57
 
173
- Logger.target "Checking: #{url}"
174
- app = DeadFinderRunner.new
175
- app.run url, options
176
- gen_output(options)
177
- end
58
+ def self.run_with_input(options)
59
+ DeadFinder::Logger.apply_options(options)
60
+ DeadFinder::Logger.info 'Reading input'
61
+ app = Runner.new
62
+ Array(yield).each do |target|
63
+ run_with_target(target, options, app)
64
+ end
65
+ gen_output(options)
66
+ end
178
67
 
179
- def run_sitemap(sitemap_url, options)
180
- Logger.set_silent if options['silent']
181
- Logger.info "Parsing sitemap: #{sitemap_url}"
182
- app = DeadFinderRunner.new
183
- base_uri = URI(sitemap_url)
184
- sitemap = SitemapParser.new sitemap_url, { recurse: true }
185
- sitemap.to_a.each do |url|
186
- turl = generate_url url, base_uri
187
- Logger.target "Checking: #{turl}"
188
- app.run turl, options
68
+ def self.run_with_target(target, options, app = Runner.new)
69
+ DeadFinder::Logger.target "Fetching #{target}"
70
+ app.run(target, options)
189
71
  end
190
- gen_output(options)
191
- end
192
72
 
193
- def gen_output(options)
194
- return if options['output'].empty?
73
+ def self.gen_output(options)
74
+ return if options['output'].empty?
195
75
 
196
- output_data = OUTPUT.to_h
197
- format = options['output_format'].to_s.downcase
76
+ output_data = DeadFinder.output.to_h
77
+ format = options['output_format'].to_s.downcase
198
78
 
199
- content = case format
200
- when 'yaml', 'yml'
201
- output_data.to_yaml
202
- when 'csv'
203
- CSV.generate do |csv|
204
- csv << ['target', 'url']
205
- output_data.each do |target, urls|
206
- Array(urls).each do |url|
207
- csv << [target, url]
208
- end
209
- end
79
+ content = case format
80
+ when 'yaml', 'yml'
81
+ output_data.to_yaml
82
+ when 'csv'
83
+ generate_csv(output_data)
84
+ else
85
+ JSON.pretty_generate(output_data)
210
86
  end
211
- else
212
- JSON.pretty_generate(output_data)
213
- end
214
87
 
215
- File.write(options['output'], content)
216
- end
217
-
218
- class DeadFinder < Thor
219
- class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
220
- class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
221
- class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
222
- class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
223
- class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
224
- class_option :headers, aliases: :H, default: [], type: :array,
225
- desc: 'Custom HTTP headers to send with initial request'
226
- class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
227
- class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.0;)', type: :string,
228
- desc: 'User-Agent string to use for requests'
229
- class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
230
- class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
231
- class_option :verbose, aliases: :v, default: false, type: :boolean, desc: 'Verbose mode'
232
-
233
- desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
234
- def pipe
235
- run_pipe options
236
- end
237
-
238
- desc 'file <FILE>', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
239
- def file(filename)
240
- run_file filename, options
241
- end
242
-
243
- desc 'url <URL>', 'Scan the Single URL.'
244
- def url(url)
245
- run_url url, options
246
- end
247
-
248
- desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
249
- def sitemap(sitemap)
250
- run_sitemap sitemap, options
88
+ File.write(options['output'], content)
251
89
  end
252
90
 
253
- desc 'version', 'Show version.'
254
- def version
255
- Logger.info "deadfinder #{VERSION}"
91
+ def self.generate_csv(output_data)
92
+ CSV.generate do |csv|
93
+ csv << %w[target url]
94
+ output_data.each do |target, urls|
95
+ Array(urls).each { |url| csv << [target, url] }
96
+ end
97
+ end
256
98
  end
257
99
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-02-17 00:00:00.000000000 Z
10
+ date: 2025-03-12 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: colorize
@@ -169,6 +169,20 @@ dependencies:
169
169
  - - ">="
170
170
  - !ruby/object:Gem::Version
171
171
  version: 1.2.0
172
+ - !ruby/object:Gem::Dependency
173
+ name: rspec
174
+ requirement: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: '0'
179
+ type: :development
180
+ prerelease: false
181
+ version_requirements: !ruby/object:Gem::Requirement
182
+ requirements:
183
+ - - ">="
184
+ - !ruby/object:Gem::Version
185
+ version: '0'
172
186
  description: Find dead-links (broken links). Dead link (broken link) means a link
173
187
  within a web page that cannot be connected. These links can have a negative impact
174
188
  to SEO and Security. This tool makes it easy to identify and modify.
@@ -180,10 +194,15 @@ extra_rdoc_files: []
180
194
  files:
181
195
  - bin/deadfinder
182
196
  - lib/deadfinder.rb
197
+ - lib/deadfinder/cli.rb
198
+ - lib/deadfinder/completion.rb
199
+ - lib/deadfinder/http_client.rb
183
200
  - lib/deadfinder/logger.rb
201
+ - lib/deadfinder/runner.rb
202
+ - lib/deadfinder/url_pattern_matcher.rb
184
203
  - lib/deadfinder/utils.rb
185
204
  - lib/deadfinder/version.rb
186
- homepage: https://www.hahwul.com
205
+ homepage: https://www.hahwul.com/projects/deadfinder/
187
206
  licenses:
188
207
  - MIT
189
208
  metadata: