deadfinder 1.1.0 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/deadfinder/logger.rb +4 -0
- data/lib/deadfinder/version.rb +3 -1
- data/lib/deadfinder.rb +39 -16
- metadata +90 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e08daa4368e4c3e041e642f1708619637ea618c2937c6e36f2aba841f152ec4
|
4
|
+
data.tar.gz: 5162fb9eb973b7638ece6bcac4e6b2c731f3a6bd654f842f1aeeeed286cee4ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa82c3b87b81f8f5293819dbd0e242e57e7fda14fe8820197aa4e32d6c774ea0c379adda7ef49e88865fc00f32cb5cce36c6fae711ac019e2d5be921e1747d65
|
7
|
+
data.tar.gz: 952de90ed7b150e106fe33ec203140c162f2f74c941400328fc3da9c41866111f940043b53d9560903fdb822337ff8973bfc70740ad937cdc04428e5598e37fb
|
data/lib/deadfinder/logger.rb
CHANGED
data/lib/deadfinder/version.rb
CHANGED
data/lib/deadfinder.rb
CHANGED
@@ -9,22 +9,34 @@ require 'deadfinder/logger'
|
|
9
9
|
require 'deadfinder/version'
|
10
10
|
require 'concurrent-edge'
|
11
11
|
require 'sitemap-parser'
|
12
|
+
require 'set'
|
12
13
|
|
13
14
|
Channel = Concurrent::Channel
|
15
|
+
CacheSet = Set.new
|
16
|
+
CacheQue = {}
|
14
17
|
|
15
18
|
class DeadFinderRunner
|
16
19
|
def run(target, options)
|
17
20
|
page = Nokogiri::HTML(URI.open(target))
|
18
|
-
|
19
|
-
|
21
|
+
|
22
|
+
nodeset_a = page.css('a')
|
23
|
+
link_a = nodeset_a.map { |element| element['href'] }.compact
|
24
|
+
nodeset_script = page.css('script')
|
25
|
+
link_script = nodeset_script.map { |element| element['src'] }.compact
|
26
|
+
nodeset_link = page.css('link')
|
27
|
+
link_link = nodeset_link.map { |element| element['href'] }.compact
|
28
|
+
|
29
|
+
link_merged = []
|
30
|
+
link_merged = link_merged.concat link_a, link_script, link_link
|
31
|
+
|
20
32
|
Logger.target target
|
21
|
-
Logger.sub_info "Found #{
|
33
|
+
Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
|
22
34
|
Logger.sub_info 'Checking'
|
23
|
-
jobs = Channel.new(buffer: :buffered, capacity:
|
24
|
-
results = Channel.new(buffer: :buffered, capacity:
|
35
|
+
jobs = Channel.new(buffer: :buffered, capacity: 1000)
|
36
|
+
results = Channel.new(buffer: :buffered, capacity: 1000)
|
25
37
|
|
26
38
|
(1..options['concurrency']).each do |w|
|
27
|
-
Channel.go { worker(w, jobs, results) }
|
39
|
+
Channel.go { worker(w, jobs, results, options) }
|
28
40
|
end
|
29
41
|
|
30
42
|
link_a.uniq.each do |node|
|
@@ -36,17 +48,27 @@ class DeadFinderRunner
|
|
36
48
|
(1..link_a.uniq.length).each do
|
37
49
|
~results
|
38
50
|
end
|
39
|
-
Logger.
|
51
|
+
Logger.sub_done 'Done'
|
40
52
|
end
|
41
53
|
|
42
|
-
def worker(_id, jobs, results)
|
54
|
+
def worker(_id, jobs, results, options)
|
43
55
|
jobs.each do |j|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
56
|
+
if !CacheSet.include? j
|
57
|
+
CacheSet.add j
|
58
|
+
begin
|
59
|
+
CacheQue[j] = true
|
60
|
+
URI.open(j, :read_timeout => options['timeout'])
|
61
|
+
rescue StandardError => e
|
62
|
+
Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
|
63
|
+
CacheQue[j] = false
|
64
|
+
end
|
65
|
+
results << j
|
66
|
+
else
|
67
|
+
if !CacheQue[j]
|
68
|
+
Logger.found "[404 Not Found] #{j}"
|
69
|
+
end
|
70
|
+
results << j
|
48
71
|
end
|
49
|
-
results << j
|
50
72
|
end
|
51
73
|
end
|
52
74
|
end
|
@@ -59,7 +81,7 @@ def run_pipe(options)
|
|
59
81
|
end
|
60
82
|
end
|
61
83
|
|
62
|
-
def run_file(filename,options)
|
84
|
+
def run_file(filename, options)
|
63
85
|
app = DeadFinderRunner.new
|
64
86
|
File.open(filename).each do |line|
|
65
87
|
target = line.gsub("\n", '')
|
@@ -67,12 +89,12 @@ def run_file(filename,options)
|
|
67
89
|
end
|
68
90
|
end
|
69
91
|
|
70
|
-
def run_url(url,options)
|
92
|
+
def run_url(url, options)
|
71
93
|
app = DeadFinderRunner.new
|
72
94
|
app.run url, options
|
73
95
|
end
|
74
96
|
|
75
|
-
def run_sitemap(sitemap_url,options)
|
97
|
+
def run_sitemap(sitemap_url, options)
|
76
98
|
app = DeadFinderRunner.new
|
77
99
|
sitemap = SitemapParser.new sitemap_url, { recurse: true }
|
78
100
|
sitemap.to_a.each do |url|
|
@@ -82,6 +104,7 @@ end
|
|
82
104
|
|
83
105
|
class DeadFinder < Thor
|
84
106
|
class_option :concurrency, aliases: :c, default: 20, type: :numeric
|
107
|
+
class_option :timeout, aliases: :t, default: 10, type: :numeric
|
85
108
|
|
86
109
|
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
87
110
|
def pipe
|
metadata
CHANGED
@@ -1,16 +1,102 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadfinder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hahwul
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-09-
|
12
|
-
dependencies:
|
13
|
-
|
11
|
+
date: 2022-09-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: colorize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.8.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.8.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: concurrent-ruby-edge
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.6.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.6.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.13.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.13.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: open-uri
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.2.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.2.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sitemap-parser
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.5.0
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.5.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: thor
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.2.0
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.2.0
|
97
|
+
description: Dead link (broken link) means a link within a web page that cannot be
|
98
|
+
connected. These links can have a security negative impact with SEO. This tool makes
|
99
|
+
it easy to identify and modify.
|
14
100
|
email: hahwul@gmail.com
|
15
101
|
executables:
|
16
102
|
- deadfinder
|