deadfinder 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/deadfinder/utils.rb +2 -4
- data/lib/deadfinder/version.rb +1 -1
- data/lib/deadfinder.rb +48 -46
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4cee202abc3ad85072d0b9cfd7d7b2029eb539236cffb1d3b2fd220136186c3
|
4
|
+
data.tar.gz: 3013f0ed97adfd00061325405141f6bba346957063d6ef7813408aa331cf3232
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69bf37ab49d7464b5345fd8833fca11a3efd003958faa2f5406fc46b00ce17557b19b3c497086304070beb13cf22eeaf18974651cb88c77329ac056a7969bf6a
|
7
|
+
data.tar.gz: 51435a7e833e6ee91d2f986265c4f885419ce59864f0f994f9deb28d384ce95595e97060837232115f1a5bdc9cb00961f9e93d792fafd11213d03050b29819f1
|
data/lib/deadfinder/utils.rb
CHANGED
@@ -28,12 +28,10 @@ def ignore_scheme?(url)
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def extract_directory(uri)
|
31
|
-
if uri.path.end_with?('/')
|
32
|
-
return "#{uri.scheme}://#{uri.host}#{uri.path}"
|
33
|
-
end
|
31
|
+
return "#{uri.scheme}://#{uri.host}#{uri.path}" if uri.path.end_with?('/')
|
34
32
|
|
35
33
|
path_components = uri.path.split('/')
|
36
|
-
|
34
|
+
path_components.last
|
37
35
|
path_components.pop
|
38
36
|
|
39
37
|
directory_path = path_components.join('/')
|
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -13,49 +13,37 @@ require 'set'
|
|
13
13
|
require 'json'
|
14
14
|
|
15
15
|
Channel = Concurrent::Channel
|
16
|
-
CacheSet =
|
17
|
-
CacheQue =
|
18
|
-
Output =
|
16
|
+
CacheSet = Concurrent::Map.new
|
17
|
+
CacheQue = Concurrent::Map.new
|
18
|
+
Output = Concurrent::Map.new
|
19
19
|
|
20
20
|
class DeadFinderRunner
|
21
21
|
def run(target, options)
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
options['headers'].each do |header|
|
27
|
-
kv = header.split ': '
|
28
|
-
headers[kv[0]] = kv[1]
|
29
|
-
rescue StandardError
|
30
|
-
end
|
31
|
-
|
32
|
-
page = Nokogiri::HTML(URI.open(target, headers))
|
33
|
-
else
|
34
|
-
page = Nokogiri::HTML(URI.open(target))
|
22
|
+
headers = options['headers'].each_with_object({}) do |header, hash|
|
23
|
+
kv = header.split(': ')
|
24
|
+
hash[kv[0]] = kv[1]
|
25
|
+
rescue StandardError
|
35
26
|
end
|
27
|
+
page = Nokogiri::HTML(URI.open(target, headers))
|
28
|
+
links = extract_links(page)
|
36
29
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
link_script = nodeset_script.map { |element| element['src'] }.compact
|
41
|
-
nodeset_link = page.css('link')
|
42
|
-
link_link = nodeset_link.map { |element| element['href'] }.compact
|
43
|
-
|
44
|
-
link_merged = []
|
45
|
-
link_merged.concat link_a, link_script, link_link
|
30
|
+
total_links_count = links.values.flatten.length
|
31
|
+
# Generate link info string for non-empty link types
|
32
|
+
link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
|
46
33
|
|
47
|
-
|
48
|
-
Logger.sub_info "Found #{
|
34
|
+
# Log the information if there are any links
|
35
|
+
Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
|
49
36
|
Logger.sub_info 'Checking'
|
50
|
-
|
37
|
+
|
38
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
51
39
|
results = Channel.new(buffer: :buffered, capacity: 1000)
|
52
40
|
|
53
41
|
(1..options['concurrency']).each do |w|
|
54
42
|
Channel.go { worker(w, jobs, results, target, options) }
|
55
43
|
end
|
56
44
|
|
57
|
-
|
58
|
-
result = generate_url
|
45
|
+
links.values.flatten.uniq.each do |node|
|
46
|
+
result = generate_url(node, target)
|
59
47
|
jobs << result unless result.nil?
|
60
48
|
end
|
61
49
|
|
@@ -72,8 +60,10 @@ class DeadFinderRunner
|
|
72
60
|
|
73
61
|
def worker(_id, jobs, results, target, options)
|
74
62
|
jobs.each do |j|
|
75
|
-
if
|
76
|
-
|
63
|
+
if CacheSet[j]
|
64
|
+
Logger.found "[404 Not Found] #{j}" unless CacheQue[j]
|
65
|
+
else
|
66
|
+
CacheSet[j] = true
|
77
67
|
begin
|
78
68
|
CacheQue[j] = true
|
79
69
|
URI.open(j, read_timeout: options['timeout'])
|
@@ -81,40 +71,52 @@ class DeadFinderRunner
|
|
81
71
|
if e.to_s.include? '404 Not Found'
|
82
72
|
Logger.found "[#{e}] #{j}"
|
83
73
|
CacheQue[j] = false
|
84
|
-
Output[target]
|
85
|
-
Output[target]
|
74
|
+
Output[target] ||= []
|
75
|
+
Output[target] << j
|
86
76
|
end
|
87
77
|
end
|
88
|
-
elsif !CacheQue[j]
|
89
|
-
Logger.found "[404 Not Found] #{j}"
|
90
78
|
end
|
91
79
|
results << j
|
92
80
|
end
|
93
81
|
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def extract_links(page)
|
86
|
+
{
|
87
|
+
anchor: page.css('a').map { |element| element['href'] }.compact,
|
88
|
+
script: page.css('script').map { |element| element['src'] }.compact,
|
89
|
+
link: page.css('link').map { |element| element['href'] }.compact,
|
90
|
+
iframe: page.css('iframe').map { |element| element['src'] }.compact,
|
91
|
+
form: page.css('form').map { |element| element['action'] }.compact,
|
92
|
+
object: page.css('object').map { |element| element['data'] }.compact,
|
93
|
+
embed: page.css('embed').map { |element| element['src'] }.compact
|
94
|
+
}
|
95
|
+
end
|
94
96
|
end
|
95
97
|
|
96
98
|
def run_pipe(options)
|
97
99
|
app = DeadFinderRunner.new
|
98
100
|
while $stdin.gets
|
99
|
-
target = $LAST_READ_LINE.
|
101
|
+
target = $LAST_READ_LINE.chomp
|
100
102
|
app.run target, options
|
101
103
|
end
|
102
|
-
gen_output
|
104
|
+
gen_output(options)
|
103
105
|
end
|
104
106
|
|
105
107
|
def run_file(filename, options)
|
106
108
|
app = DeadFinderRunner.new
|
107
|
-
File.
|
108
|
-
target = line.
|
109
|
+
File.foreach(filename) do |line|
|
110
|
+
target = line.chomp
|
109
111
|
app.run target, options
|
110
112
|
end
|
111
|
-
gen_output
|
113
|
+
gen_output(options)
|
112
114
|
end
|
113
115
|
|
114
116
|
def run_url(url, options)
|
115
117
|
app = DeadFinderRunner.new
|
116
118
|
app.run url, options
|
117
|
-
gen_output
|
119
|
+
gen_output(options)
|
118
120
|
end
|
119
121
|
|
120
122
|
def run_sitemap(sitemap_url, options)
|
@@ -125,15 +127,15 @@ def run_sitemap(sitemap_url, options)
|
|
125
127
|
turl = generate_url url, base_uri
|
126
128
|
app.run turl, options
|
127
129
|
end
|
128
|
-
gen_output
|
130
|
+
gen_output(options)
|
129
131
|
end
|
130
132
|
|
131
|
-
def gen_output
|
132
|
-
File.write
|
133
|
+
def gen_output(options)
|
134
|
+
File.write(options['output'], Output.to_json) unless options['output'].empty?
|
133
135
|
end
|
134
136
|
|
135
137
|
class DeadFinder < Thor
|
136
|
-
class_option :concurrency, aliases: :c, default:
|
138
|
+
class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
|
137
139
|
class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
|
138
140
|
class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write JSON result'
|
139
141
|
class_option :headers, aliases: :H, default: [], type: :array, desc: 'Custom HTTP headers to send with request'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -157,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
157
157
|
- !ruby/object:Gem::Version
|
158
158
|
version: '0'
|
159
159
|
requirements: []
|
160
|
-
rubygems_version: 3.
|
160
|
+
rubygems_version: 3.5.3
|
161
161
|
signing_key:
|
162
162
|
specification_version: 4
|
163
163
|
summary: Find dead-links (broken links)
|