deadfinder 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/deadfinder/logger.rb +4 -0
- data/lib/deadfinder/version.rb +1 -1
- data/lib/deadfinder.rb +36 -13
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e08daa4368e4c3e041e642f1708619637ea618c2937c6e36f2aba841f152ec4
|
4
|
+
data.tar.gz: 5162fb9eb973b7638ece6bcac4e6b2c731f3a6bd654f842f1aeeeed286cee4ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa82c3b87b81f8f5293819dbd0e242e57e7fda14fe8820197aa4e32d6c774ea0c379adda7ef49e88865fc00f32cb5cce36c6fae711ac019e2d5be921e1747d65
|
7
|
+
data.tar.gz: 952de90ed7b150e106fe33ec203140c162f2f74c941400328fc3da9c41866111f940043b53d9560903fdb822337ff8973bfc70740ad937cdc04428e5598e37fb
|
data/lib/deadfinder/logger.rb
CHANGED
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -9,22 +9,34 @@ require 'deadfinder/logger'
|
|
9
9
|
require 'deadfinder/version'
|
10
10
|
require 'concurrent-edge'
|
11
11
|
require 'sitemap-parser'
|
12
|
+
require 'set'
|
12
13
|
|
13
14
|
Channel = Concurrent::Channel
|
15
|
+
CacheSet = Set.new
|
16
|
+
CacheQue = {}
|
14
17
|
|
15
18
|
class DeadFinderRunner
|
16
19
|
def run(target, options)
|
17
20
|
page = Nokogiri::HTML(URI.open(target))
|
18
|
-
|
19
|
-
|
21
|
+
|
22
|
+
nodeset_a = page.css('a')
|
23
|
+
link_a = nodeset_a.map { |element| element['href'] }.compact
|
24
|
+
nodeset_script = page.css('script')
|
25
|
+
link_script = nodeset_script.map { |element| element['src'] }.compact
|
26
|
+
nodeset_link = page.css('link')
|
27
|
+
link_link = nodeset_link.map { |element| element['href'] }.compact
|
28
|
+
|
29
|
+
link_merged = []
|
30
|
+
link_merged = link_merged.concat link_a, link_script, link_link
|
31
|
+
|
20
32
|
Logger.target target
|
21
|
-
Logger.sub_info "Found #{
|
33
|
+
Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
|
22
34
|
Logger.sub_info 'Checking'
|
23
|
-
jobs = Channel.new(buffer: :buffered, capacity:
|
24
|
-
results = Channel.new(buffer: :buffered, capacity:
|
35
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
36
|
+
results = Channel.new(buffer: :buffered, capacity: 1000)
|
25
37
|
|
26
38
|
(1..options['concurrency']).each do |w|
|
27
|
-
Channel.go { worker(w, jobs, results) }
|
39
|
+
Channel.go { worker(w, jobs, results, options) }
|
28
40
|
end
|
29
41
|
|
30
42
|
link_a.uniq.each do |node|
|
@@ -36,17 +48,27 @@ class DeadFinderRunner
|
|
36
48
|
(1..link_a.uniq.length).each do
|
37
49
|
~results
|
38
50
|
end
|
39
|
-
Logger.
|
51
|
+
Logger.sub_done 'Done'
|
40
52
|
end
|
41
53
|
|
42
|
-
def worker(_id, jobs, results)
|
54
|
+
def worker(_id, jobs, results, options)
|
43
55
|
jobs.each do |j|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
56
|
+
if !CacheSet.include? j
|
57
|
+
CacheSet.add j
|
58
|
+
begin
|
59
|
+
CacheQue[j] = true
|
60
|
+
URI.open(j, :read_timeout => options['timeout'])
|
61
|
+
rescue StandardError => e
|
62
|
+
Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
|
63
|
+
CacheQue[j] = false
|
64
|
+
end
|
65
|
+
results << j
|
66
|
+
else
|
67
|
+
if !CacheQue[j]
|
68
|
+
Logger.found "[404 Not Found] #{j}"
|
69
|
+
end
|
70
|
+
results << j
|
48
71
|
end
|
49
|
-
results << j
|
50
72
|
end
|
51
73
|
end
|
52
74
|
end
|
@@ -82,6 +104,7 @@ end
|
|
82
104
|
|
83
105
|
class DeadFinder < Thor
|
84
106
|
class_option :concurrency, aliases: :c, default: 20, type: :numeric
|
107
|
+
class_option :timeout, aliases: :t, default: 10, type: :numeric
|
85
108
|
|
86
109
|
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
87
110
|
def pipe
|