deadfinder 1.3.4 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/deadfinder/logger.rb +16 -6
- data/lib/deadfinder/utils.rb +2 -4
- data/lib/deadfinder/version.rb +1 -1
- data/lib/deadfinder.rb +74 -50
- metadata +52 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c209ef1731010129da940409a782e283301d1b0c80c61b38367fec3cdc73365
|
4
|
+
data.tar.gz: 1d68364a7cba83bb2e651a91344ca2e3b1a75731c7acd5fae6ac0be51e35310d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87fbf3c5bd6a2c786211449b0dd51ca5c428ab791649440f3601afef3c632d969dc1495e6d7c5d2bc141cab4bb07d1fc1293a67cddc03e22d64b3a223ab05837
|
7
|
+
data.tar.gz: 10028b1b527f28359e4dc8288f01389218e73aade4855caa857716436b8f7dd33daa40b7708461fd82b2d17c3b9dff84dd12eea0cbbd3129bfabbb76f39224ef
|
data/lib/deadfinder/logger.rb
CHANGED
@@ -3,27 +3,37 @@
|
|
3
3
|
require 'colorize'
|
4
4
|
|
5
5
|
class Logger
|
6
|
+
@silent = false
|
7
|
+
|
8
|
+
def self.set_silent
|
9
|
+
@silent = true
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.silent?
|
13
|
+
@silent
|
14
|
+
end
|
15
|
+
|
6
16
|
def self.info(text)
|
7
|
-
puts 'ℹ '.colorize(:blue) + text.to_s.colorize(:light_blue)
|
17
|
+
puts 'ℹ '.colorize(:blue) + text.to_s.colorize(:light_blue) unless silent?
|
8
18
|
end
|
9
19
|
|
10
20
|
def self.error(text)
|
11
|
-
puts '⚠︎ '.colorize(:red) + text.to_s
|
21
|
+
puts '⚠︎ '.colorize(:red) + text.to_s unless silent?
|
12
22
|
end
|
13
23
|
|
14
24
|
def self.target(text)
|
15
|
-
puts '► '.colorize(:green) + text.to_s.colorize(:light_green)
|
25
|
+
puts '► '.colorize(:green) + text.to_s.colorize(:light_green) unless silent?
|
16
26
|
end
|
17
27
|
|
18
28
|
def self.sub_info(text)
|
19
|
-
puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue)
|
29
|
+
puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue) unless silent?
|
20
30
|
end
|
21
31
|
|
22
32
|
def self.sub_done(text)
|
23
|
-
puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue)
|
33
|
+
puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue) unless silent?
|
24
34
|
end
|
25
35
|
|
26
36
|
def self.found(text)
|
27
|
-
puts " ✘ #{text}".colorize(:red)
|
37
|
+
puts " ✘ #{text}".colorize(:red) unless silent?
|
28
38
|
end
|
29
39
|
end
|
data/lib/deadfinder/utils.rb
CHANGED
@@ -28,12 +28,10 @@ def ignore_scheme?(url)
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def extract_directory(uri)
|
31
|
-
if uri.path.end_with?('/')
|
32
|
-
return "#{uri.scheme}://#{uri.host}#{uri.path}"
|
33
|
-
end
|
31
|
+
return "#{uri.scheme}://#{uri.host}#{uri.path}" if uri.path.end_with?('/')
|
34
32
|
|
35
33
|
path_components = uri.path.split('/')
|
36
|
-
|
34
|
+
path_components.last
|
37
35
|
path_components.pop
|
38
36
|
|
39
37
|
directory_path = path_components.join('/')
|
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -13,49 +13,48 @@ require 'set'
|
|
13
13
|
require 'json'
|
14
14
|
|
15
15
|
Channel = Concurrent::Channel
|
16
|
-
CacheSet =
|
17
|
-
CacheQue =
|
18
|
-
Output =
|
16
|
+
CacheSet = Concurrent::Map.new
|
17
|
+
CacheQue = Concurrent::Map.new
|
18
|
+
Output = Concurrent::Map.new
|
19
19
|
|
20
20
|
class DeadFinderRunner
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
end
|
21
|
+
def default_options
|
22
|
+
{
|
23
|
+
'concurrency' => 50,
|
24
|
+
'timeout' => 10,
|
25
|
+
'output' => '',
|
26
|
+
'headers' => [],
|
27
|
+
'silent' => true
|
28
|
+
}
|
29
|
+
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
def run(target, options)
|
32
|
+
Logger.set_silent if options['silent']
|
33
|
+
headers = options['headers'].each_with_object({}) do |header, hash|
|
34
|
+
kv = header.split(': ')
|
35
|
+
hash[kv[0]] = kv[1]
|
36
|
+
rescue StandardError
|
35
37
|
end
|
38
|
+
page = Nokogiri::HTML(URI.open(target, headers))
|
39
|
+
links = extract_links(page)
|
36
40
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
link_script = nodeset_script.map { |element| element['src'] }.compact
|
41
|
-
nodeset_link = page.css('link')
|
42
|
-
link_link = nodeset_link.map { |element| element['href'] }.compact
|
43
|
-
|
44
|
-
link_merged = []
|
45
|
-
link_merged.concat link_a, link_script, link_link
|
41
|
+
total_links_count = links.values.flatten.length
|
42
|
+
# Generate link info string for non-empty link types
|
43
|
+
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
|
46
44
|
|
47
|
-
|
48
|
-
Logger.sub_info "Found #{
|
45
|
+
# Log the information if there are any links
|
46
|
+
Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
|
49
47
|
Logger.sub_info 'Checking'
|
50
|
-
|
48
|
+
|
49
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
51
50
|
results = Channel.new(buffer: :buffered, capacity: 1000)
|
52
51
|
|
53
52
|
(1..options['concurrency']).each do |w|
|
54
53
|
Channel.go { worker(w, jobs, results, target, options) }
|
55
54
|
end
|
56
55
|
|
57
|
-
|
58
|
-
result = generate_url
|
56
|
+
links.values.flatten.uniq.each do |node|
|
57
|
+
result = generate_url(node, target)
|
59
58
|
jobs << result unless result.nil?
|
60
59
|
end
|
61
60
|
|
@@ -72,8 +71,10 @@ class DeadFinderRunner
|
|
72
71
|
|
73
72
|
def worker(_id, jobs, results, target, options)
|
74
73
|
jobs.each do |j|
|
75
|
-
if
|
76
|
-
|
74
|
+
if CacheSet[j]
|
75
|
+
Logger.found "[404 Not Found] #{j}" unless CacheQue[j]
|
76
|
+
else
|
77
|
+
CacheSet[j] = true
|
77
78
|
begin
|
78
79
|
CacheQue[j] = true
|
79
80
|
URI.open(j, read_timeout: options['timeout'])
|
@@ -81,84 +82,107 @@ class DeadFinderRunner
|
|
81
82
|
if e.to_s.include? '404 Not Found'
|
82
83
|
Logger.found "[#{e}] #{j}"
|
83
84
|
CacheQue[j] = false
|
84
|
-
Output[target]
|
85
|
-
Output[target]
|
85
|
+
Output[target] ||= []
|
86
|
+
Output[target] << j
|
86
87
|
end
|
87
88
|
end
|
88
|
-
elsif !CacheQue[j]
|
89
|
-
Logger.found "[404 Not Found] #{j}"
|
90
89
|
end
|
91
90
|
results << j
|
92
91
|
end
|
93
92
|
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def extract_links(page)
|
97
|
+
{
|
98
|
+
anchor: page.css('a').map { |element| element['href'] }.compact,
|
99
|
+
script: page.css('script').map { |element| element['src'] }.compact,
|
100
|
+
link: page.css('link').map { |element| element['href'] }.compact,
|
101
|
+
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
102
|
+
form: page.css('form').map { |element| element['action'] }.compact,
|
103
|
+
object: page.css('object').map { |element| element['data'] }.compact,
|
104
|
+
embed: page.css('embed').map { |element| element['src'] }.compact
|
105
|
+
}
|
106
|
+
end
|
94
107
|
end
|
95
108
|
|
96
109
|
def run_pipe(options)
|
110
|
+
Logger.set_silent if options['silent']
|
111
|
+
|
112
|
+
Logger.info 'Reading from STDIN'
|
97
113
|
app = DeadFinderRunner.new
|
98
114
|
while $stdin.gets
|
99
|
-
target = $LAST_READ_LINE.
|
115
|
+
target = $LAST_READ_LINE.chomp
|
116
|
+
Logger.target "Checking: #{target}"
|
100
117
|
app.run target, options
|
101
118
|
end
|
102
|
-
gen_output
|
119
|
+
gen_output(options)
|
103
120
|
end
|
104
121
|
|
105
122
|
def run_file(filename, options)
|
123
|
+
Logger.set_silent if options['silent']
|
124
|
+
|
125
|
+
Logger.info "Reading: #{filename}"
|
106
126
|
app = DeadFinderRunner.new
|
107
|
-
File.
|
108
|
-
target = line.
|
127
|
+
File.foreach(filename) do |line|
|
128
|
+
target = line.chomp
|
129
|
+
Logger.target "Checking: #{target}"
|
109
130
|
app.run target, options
|
110
131
|
end
|
111
|
-
gen_output
|
132
|
+
gen_output(options)
|
112
133
|
end
|
113
134
|
|
114
135
|
def run_url(url, options)
|
136
|
+
Logger.set_silent if options['silent']
|
137
|
+
|
138
|
+
Logger.target "Checking: #{url}"
|
115
139
|
app = DeadFinderRunner.new
|
116
140
|
app.run url, options
|
117
|
-
gen_output
|
141
|
+
gen_output(options)
|
118
142
|
end
|
119
143
|
|
120
144
|
def run_sitemap(sitemap_url, options)
|
145
|
+
Logger.set_silent if options['silent']
|
146
|
+
Logger.info "Parsing sitemap: #{sitemap_url}"
|
121
147
|
app = DeadFinderRunner.new
|
122
148
|
base_uri = URI(sitemap_url)
|
123
149
|
sitemap = SitemapParser.new sitemap_url, { recurse: true }
|
124
150
|
sitemap.to_a.each do |url|
|
125
151
|
turl = generate_url url, base_uri
|
152
|
+
Logger.target "Checking: #{turl}"
|
126
153
|
app.run turl, options
|
127
154
|
end
|
128
|
-
gen_output
|
155
|
+
gen_output(options)
|
129
156
|
end
|
130
157
|
|
131
|
-
def gen_output
|
132
|
-
File.write
|
158
|
+
def gen_output(options)
|
159
|
+
File.write(options['output'], Output.to_json) unless options['output'].empty?
|
133
160
|
end
|
134
161
|
|
135
162
|
class DeadFinder < Thor
|
136
|
-
class_option :concurrency, aliases: :c, default:
|
163
|
+
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
137
164
|
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
138
165
|
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write JSON result'
|
139
166
|
class_option :headers, aliases: :H, default: [], type: :array, desc: 'Custom HTTP headers to send with request'
|
167
|
+
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
140
168
|
|
141
169
|
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
142
170
|
def pipe
|
143
|
-
Logger.info 'Pipe mode'
|
144
171
|
run_pipe options
|
145
172
|
end
|
146
173
|
|
147
174
|
desc 'file <FILE>', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
|
148
175
|
def file(filename)
|
149
|
-
Logger.info 'File mode'
|
150
176
|
run_file filename, options
|
151
177
|
end
|
152
178
|
|
153
179
|
desc 'url <URL>', 'Scan the Single URL.'
|
154
180
|
def url(url)
|
155
|
-
Logger.info 'Single URL mode'
|
156
181
|
run_url url, options
|
157
182
|
end
|
158
183
|
|
159
184
|
desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
|
160
185
|
def sitemap(sitemap)
|
161
|
-
Logger.info 'Sitemap mode'
|
162
186
|
run_sitemap sitemap, options
|
163
187
|
end
|
164
188
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -17,6 +17,9 @@ dependencies:
|
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.8.0
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.8.0
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -24,6 +27,9 @@ dependencies:
|
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: 0.8.0
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.8.0
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: concurrent-ruby-edge
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -31,6 +37,9 @@ dependencies:
|
|
31
37
|
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
39
|
version: 0.6.0
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.6.0
|
34
43
|
type: :runtime
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -38,6 +47,9 @@ dependencies:
|
|
38
47
|
- - "~>"
|
39
48
|
- !ruby/object:Gem::Version
|
40
49
|
version: 0.6.0
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.6.0
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: json
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +57,9 @@ dependencies:
|
|
45
57
|
- - "~>"
|
46
58
|
- !ruby/object:Gem::Version
|
47
59
|
version: 2.6.0
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 2.6.0
|
48
63
|
type: :runtime
|
49
64
|
prerelease: false
|
50
65
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -52,6 +67,9 @@ dependencies:
|
|
52
67
|
- - "~>"
|
53
68
|
- !ruby/object:Gem::Version
|
54
69
|
version: 2.6.0
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 2.6.0
|
55
73
|
- !ruby/object:Gem::Dependency
|
56
74
|
name: nokogiri
|
57
75
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,6 +77,9 @@ dependencies:
|
|
59
77
|
- - "~>"
|
60
78
|
- !ruby/object:Gem::Version
|
61
79
|
version: 1.13.0
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.13.0
|
62
83
|
type: :runtime
|
63
84
|
prerelease: false
|
64
85
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -66,6 +87,9 @@ dependencies:
|
|
66
87
|
- - "~>"
|
67
88
|
- !ruby/object:Gem::Version
|
68
89
|
version: 1.13.0
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.13.0
|
69
93
|
- !ruby/object:Gem::Dependency
|
70
94
|
name: open-uri
|
71
95
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,6 +97,9 @@ dependencies:
|
|
73
97
|
- - "~>"
|
74
98
|
- !ruby/object:Gem::Version
|
75
99
|
version: 0.2.0
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 0.2.0
|
76
103
|
type: :runtime
|
77
104
|
prerelease: false
|
78
105
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -80,20 +107,29 @@ dependencies:
|
|
80
107
|
- - "~>"
|
81
108
|
- !ruby/object:Gem::Version
|
82
109
|
version: 0.2.0
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 0.2.0
|
83
113
|
- !ruby/object:Gem::Dependency
|
84
114
|
name: set
|
85
115
|
requirement: !ruby/object:Gem::Requirement
|
86
116
|
requirements:
|
87
117
|
- - "~>"
|
88
118
|
- !ruby/object:Gem::Version
|
89
|
-
version: 1.
|
119
|
+
version: 1.1.0
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.1.0
|
90
123
|
type: :runtime
|
91
124
|
prerelease: false
|
92
125
|
version_requirements: !ruby/object:Gem::Requirement
|
93
126
|
requirements:
|
94
127
|
- - "~>"
|
95
128
|
- !ruby/object:Gem::Version
|
96
|
-
version: 1.
|
129
|
+
version: 1.1.0
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 1.1.0
|
97
133
|
- !ruby/object:Gem::Dependency
|
98
134
|
name: sitemap-parser
|
99
135
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +137,9 @@ dependencies:
|
|
101
137
|
- - "~>"
|
102
138
|
- !ruby/object:Gem::Version
|
103
139
|
version: 0.5.0
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: 0.5.0
|
104
143
|
type: :runtime
|
105
144
|
prerelease: false
|
106
145
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -108,6 +147,9 @@ dependencies:
|
|
108
147
|
- - "~>"
|
109
148
|
- !ruby/object:Gem::Version
|
110
149
|
version: 0.5.0
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.5.0
|
111
153
|
- !ruby/object:Gem::Dependency
|
112
154
|
name: thor
|
113
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -115,6 +157,9 @@ dependencies:
|
|
115
157
|
- - "~>"
|
116
158
|
- !ruby/object:Gem::Version
|
117
159
|
version: 1.2.0
|
160
|
+
- - ">="
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
version: 1.2.0
|
118
163
|
type: :runtime
|
119
164
|
prerelease: false
|
120
165
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -122,6 +167,9 @@ dependencies:
|
|
122
167
|
- - "~>"
|
123
168
|
- !ruby/object:Gem::Version
|
124
169
|
version: 1.2.0
|
170
|
+
- - ">="
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: 1.2.0
|
125
173
|
description: Find dead-links (broken links). Dead link (broken link) means a link
|
126
174
|
within a web page that cannot be connected. These links can have a negative impact
|
127
175
|
to SEO and Security. This tool makes it easy to identify and modify.
|