deadfinder 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/deadfinder +4 -0
- data/lib/deadfinder/logger.rb +21 -0
- data/lib/deadfinder/utils.rb +22 -0
- data/lib/deadfinder.rb +109 -0
- metadata +48 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3a2e86be2607ea1af0d52ca1ed57055869cb457d58aa0b3773154d8de15f7a08
|
4
|
+
data.tar.gz: ffb4fa247f7f388c9e5f71144f8b9728118627feead5159380bac65d2af4420f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 237f99c06741f5b509af6ede98be469e20d9a60f7be2a5d615568eb7707686a40998ee49ce41143cb6af72beb3fe181ad9cebaed114d11f7a8e9408b3fd54fee
|
7
|
+
data.tar.gz: 8cfd465a45ae8b4646e82810e4aa595c809a44a11107c0845f06e11a2eddeee394bb8e01b53a412f33b13bc05aaca49bd0a890ed6a702138d3b7b327634712f1
|
data/bin/deadfinder
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'colorize'
|
4
|
+
|
5
|
+
class Logger
|
6
|
+
def self.info text
|
7
|
+
puts "ℹ ".colorize(:blue) + "#{text}".colorize(:light_blue)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.target text
|
11
|
+
puts "► ".colorize(:green) + "#{text}".colorize(:light_green)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.sub_info text
|
15
|
+
puts " ● ".colorize(:blue) + "#{text}".colorize(:light_blue)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.found text
|
19
|
+
puts " ✘ #{text}".colorize(:red)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
def generate_url(text, base_url)
|
6
|
+
node = text.to_s
|
7
|
+
begin
|
8
|
+
unless node.start_with?('http://', 'https://')
|
9
|
+
uri = URI(base_url)
|
10
|
+
if node.start_with? '//'
|
11
|
+
return "#{uri.scheme}:#{node}"
|
12
|
+
elsif node.start_with? '/'
|
13
|
+
return "#{uri.scheme}://#{uri.host}#{node}"
|
14
|
+
else
|
15
|
+
return "#{uri}#{node}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
rescue StandardError => e
|
19
|
+
# puts e
|
20
|
+
end
|
21
|
+
node
|
22
|
+
end
|
data/lib/deadfinder.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'English'
|
4
|
+
require 'thor'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'deadfinder/utils'
|
8
|
+
require 'deadfinder/logger'
|
9
|
+
require 'concurrent-edge'
|
10
|
+
require 'sitemap-parser'
|
11
|
+
|
12
|
+
Channel = Concurrent::Channel
|
13
|
+
|
14
|
+
class DeadFinderRunner
|
15
|
+
def run(target)
|
16
|
+
page = Nokogiri::HTML(URI.open(target))
|
17
|
+
nodeset = page.css('a')
|
18
|
+
link_a = nodeset.map { |element| element['href'] }.compact
|
19
|
+
Logger.target target
|
20
|
+
Logger.sub_info "Found #{link_a.length} point"
|
21
|
+
Logger.sub_info 'Checking'
|
22
|
+
jobs = Channel.new(buffer: :buffered, capacity: 100)
|
23
|
+
results = Channel.new(buffer: :buffered, capacity: 100)
|
24
|
+
|
25
|
+
(1..20).each do |w|
|
26
|
+
Channel.go { worker(w, jobs, results) }
|
27
|
+
end
|
28
|
+
|
29
|
+
link_a.uniq.each do |node|
|
30
|
+
result = generate_url node, target
|
31
|
+
jobs << result
|
32
|
+
end
|
33
|
+
jobs.close
|
34
|
+
|
35
|
+
(1..link_a.uniq.length).each do
|
36
|
+
~results
|
37
|
+
end
|
38
|
+
Logger.sub_info 'Done'
|
39
|
+
end
|
40
|
+
|
41
|
+
def worker(id, jobs, results)
|
42
|
+
jobs.each do |j|
|
43
|
+
begin
|
44
|
+
URI.open(j)
|
45
|
+
rescue => exception
|
46
|
+
if exception.to_s.include? '404 Not Found'
|
47
|
+
Logger.found "[#{exception}] #{j}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
results << j
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def run_pipe
|
56
|
+
app = DeadFinderRunner.new
|
57
|
+
while $stdin.gets
|
58
|
+
target = $LAST_READ_LINE.gsub("\n", '')
|
59
|
+
app.run target
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def run_file(filename)
|
64
|
+
app = DeadFinderRunner.new
|
65
|
+
File.open(filename).each do |line|
|
66
|
+
target = line.gsub("\n", '')
|
67
|
+
app.run target
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def run_url(url)
|
72
|
+
app = DeadFinderRunner.new
|
73
|
+
app.run url
|
74
|
+
end
|
75
|
+
|
76
|
+
def run_sitemap(sitemap_url)
|
77
|
+
app = DeadFinderRunner.new
|
78
|
+
sitemap = SitemapParser.new sitemap_url, {recurse: true}
|
79
|
+
sitemap.to_a.each do |url|
|
80
|
+
app.run url
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class DeadFinder < Thor
|
85
|
+
|
86
|
+
desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
|
87
|
+
def pipe
|
88
|
+
Logger.info 'Pipe mode'
|
89
|
+
run_pipe
|
90
|
+
end
|
91
|
+
|
92
|
+
desc 'file', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
|
93
|
+
def file(filename)
|
94
|
+
Logger.info 'File mode'
|
95
|
+
run_file filename
|
96
|
+
end
|
97
|
+
|
98
|
+
desc 'url', 'Scan the Single URL.'
|
99
|
+
def url(url)
|
100
|
+
Logger.info 'Single URL mode'
|
101
|
+
run_url url
|
102
|
+
end
|
103
|
+
|
104
|
+
desc 'sitemap', 'Scan the URLs from sitemap.'
|
105
|
+
def sitemap(sitemap)
|
106
|
+
Logger.info 'Sitemap mode'
|
107
|
+
run_sitemap sitemap
|
108
|
+
end
|
109
|
+
end
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: deadfinder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- hahwul
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-09-24 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Find dead-links (broken links)
|
14
|
+
email: hahwul@gmail.com
|
15
|
+
executables:
|
16
|
+
- deadfinder
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/deadfinder
|
21
|
+
- lib/deadfinder.rb
|
22
|
+
- lib/deadfinder/logger.rb
|
23
|
+
- lib/deadfinder/utils.rb
|
24
|
+
homepage: https://www.hahwul.com
|
25
|
+
licenses:
|
26
|
+
- MIT
|
27
|
+
metadata:
|
28
|
+
rubygems_mfa_required: 'true'
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubygems_version: 3.3.3
|
45
|
+
signing_key:
|
46
|
+
specification_version: 4
|
47
|
+
summary: Find dead-links (broken links)
|
48
|
+
test_files: []
|