deadfinder 1.5.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/deadfinder/utils.rb +12 -29
- data/lib/deadfinder/version.rb +1 -1
- data/lib/deadfinder.rb +27 -3
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a9dba525c92f09cb14019fe890f73dc85eaa975179ac19efca534f0fdc76b14
|
4
|
+
data.tar.gz: fee3168a2912dacd9a2740b1aaf2c495b0b68b48b4d024c208523d8a8a5bb5e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be806719c1ed5ca905885222d485f376c7e1a61dff0388f812e05d52e279e21315af08fdcfb5b66e9773c88be2f1a5c8c434eba62e6340f91db76ea29f4643db
|
7
|
+
data.tar.gz: d966f9237d2a4f62ba98684346b045c49bd14b024723eda22ac5f0c9c2fb0e62fc358d6607d924c0cdd80cd10e8e5f4eb37a13a067188d8394d142fe0476001b
|
data/lib/deadfinder/utils.rb
CHANGED
@@ -4,41 +4,24 @@ require 'uri'
|
|
4
4
|
|
5
5
|
def generate_url(text, base_url)
|
6
6
|
node = text.to_s
|
7
|
+
return node if node.start_with?('http://', 'https://')
|
8
|
+
|
7
9
|
begin
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
return "#{extract_directory(uri)}#{node}"
|
18
|
-
end
|
10
|
+
uri = URI(base_url)
|
11
|
+
if node.start_with?('//')
|
12
|
+
"#{uri.scheme}:#{node}"
|
13
|
+
elsif node.start_with?('/')
|
14
|
+
"#{uri.scheme}://#{uri.host}#{node}"
|
15
|
+
elsif ignore_scheme?(node)
|
16
|
+
nil
|
17
|
+
else
|
18
|
+
URI.join(base_url, node).to_s
|
19
19
|
end
|
20
20
|
rescue StandardError
|
21
|
-
|
21
|
+
nil
|
22
22
|
end
|
23
|
-
node
|
24
23
|
end
|
25
24
|
|
26
25
|
def ignore_scheme?(url)
|
27
26
|
url.start_with?('mailto:', 'tel:', 'sms:', 'data:', 'file:')
|
28
27
|
end
|
29
|
-
|
30
|
-
def extract_directory(uri)
|
31
|
-
return "#{uri.scheme}://#{uri.host}#{uri.path}" if uri.path.end_with?('/')
|
32
|
-
|
33
|
-
path_components = uri.path.split('/')
|
34
|
-
path_components.last
|
35
|
-
path_components.pop
|
36
|
-
|
37
|
-
directory_path = path_components.join('/')
|
38
|
-
|
39
|
-
if directory_path.start_with?('/')
|
40
|
-
"#{uri.scheme}://#{uri.host}#{directory_path}/"
|
41
|
-
else
|
42
|
-
"#{uri.scheme}://#{uri.host}/#{directory_path}/"
|
43
|
-
end
|
44
|
-
end
|
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -10,6 +10,8 @@ require 'deadfinder/version'
|
|
10
10
|
require 'concurrent-edge'
|
11
11
|
require 'sitemap-parser'
|
12
12
|
require 'json'
|
13
|
+
require 'yaml'
|
14
|
+
require 'csv'
|
13
15
|
|
14
16
|
Channel = Concurrent::Channel
|
15
17
|
CACHE_SET = Concurrent::Map.new
|
@@ -22,6 +24,7 @@ class DeadFinderRunner
|
|
22
24
|
'concurrency' => 50,
|
23
25
|
'timeout' => 10,
|
24
26
|
'output' => '',
|
27
|
+
'output_format' => 'json',
|
25
28
|
'headers' => [],
|
26
29
|
'worker_headers' => [],
|
27
30
|
'silent' => true,
|
@@ -188,19 +191,40 @@ def run_sitemap(sitemap_url, options)
|
|
188
191
|
end
|
189
192
|
|
190
193
|
def gen_output(options)
|
194
|
+
return if options['output'].empty?
|
195
|
+
|
191
196
|
output_data = OUTPUT.to_h
|
192
|
-
|
197
|
+
format = options['output_format'].to_s.downcase
|
198
|
+
|
199
|
+
content = case format
|
200
|
+
when 'yaml', 'yml'
|
201
|
+
output_data.to_yaml
|
202
|
+
when 'csv'
|
203
|
+
CSV.generate do |csv|
|
204
|
+
csv << ['target', 'url']
|
205
|
+
output_data.each do |target, urls|
|
206
|
+
Array(urls).each do |url|
|
207
|
+
csv << [target, url]
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
else
|
212
|
+
JSON.pretty_generate(output_data)
|
213
|
+
end
|
214
|
+
|
215
|
+
File.write(options['output'], content)
|
193
216
|
end
|
194
217
|
|
195
218
|
class DeadFinder < Thor
|
196
219
|
class_option :include30x, aliases: :r, default: false, type: :boolean, desc: 'Include 30x redirections'
|
197
220
|
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
198
221
|
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
199
|
-
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write
|
222
|
+
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write result (e.g., json, yaml, csv)'
|
223
|
+
class_option :output_format, aliases: :f, default: 'json', type: :string, desc: 'Output format'
|
200
224
|
class_option :headers, aliases: :H, default: [], type: :array,
|
201
225
|
desc: 'Custom HTTP headers to send with initial request'
|
202
226
|
class_option :worker_headers, default: [], type: :array, desc: 'Custom HTTP headers to send with worker requests'
|
203
|
-
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.
|
227
|
+
class_option :user_agent, default: 'Mozilla/5.0 (compatible; DeadFinder/1.6.0;)', type: :string,
|
204
228
|
desc: 'User-Agent string to use for requests'
|
205
229
|
class_option :proxy, aliases: :p, default: '', type: :string, desc: 'Proxy server to use for requests'
|
206
230
|
class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-02-17 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: colorize
|
@@ -203,7 +203,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
203
203
|
- !ruby/object:Gem::Version
|
204
204
|
version: '0'
|
205
205
|
requirements: []
|
206
|
-
rubygems_version: 3.6.
|
206
|
+
rubygems_version: 3.6.3
|
207
207
|
specification_version: 4
|
208
208
|
summary: Find dead-links (broken links)
|
209
209
|
test_files: []
|