deadfinder 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3a2e86be2607ea1af0d52ca1ed57055869cb457d58aa0b3773154d8de15f7a08
4
+ data.tar.gz: ffb4fa247f7f388c9e5f71144f8b9728118627feead5159380bac65d2af4420f
5
+ SHA512:
6
+ metadata.gz: 237f99c06741f5b509af6ede98be469e20d9a60f7be2a5d615568eb7707686a40998ee49ce41143cb6af72beb3fe181ad9cebaed114d11f7a8e9408b3fd54fee
7
+ data.tar.gz: 8cfd465a45ae8b4646e82810e4aa595c809a44a11107c0845f06e11a2eddeee394bb8e01b53a412f33b13bc05aaca49bd0a890ed6a702138d3b7b327634712f1
data/bin/deadfinder ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'deadfinder'
4
+ DeadFinder.start(ARGV)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'colorize'
4
+
5
+ class Logger
6
+ def self.info text
7
+ puts "ℹ ".colorize(:blue) + "#{text}".colorize(:light_blue)
8
+ end
9
+
10
+ def self.target text
11
+ puts "► ".colorize(:green) + "#{text}".colorize(:light_green)
12
+ end
13
+
14
+ def self.sub_info text
15
+ puts " ● ".colorize(:blue) + "#{text}".colorize(:light_blue)
16
+ end
17
+
18
+ def self.found text
19
+ puts " ✘ #{text}".colorize(:red)
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+
5
+ def generate_url(text, base_url)
6
+ node = text.to_s
7
+ begin
8
+ unless node.start_with?('http://', 'https://')
9
+ uri = URI(base_url)
10
+ if node.start_with? '//'
11
+ return "#{uri.scheme}:#{node}"
12
+ elsif node.start_with? '/'
13
+ return "#{uri.scheme}://#{uri.host}#{node}"
14
+ else
15
+ return "#{uri}#{node}"
16
+ end
17
+ end
18
+ rescue StandardError => e
19
+ # puts e
20
+ end
21
+ node
22
+ end
data/lib/deadfinder.rb ADDED
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'English'
4
+ require 'thor'
5
+ require 'open-uri'
6
+ require 'nokogiri'
7
+ require 'deadfinder/utils'
8
+ require 'deadfinder/logger'
9
+ require 'concurrent-edge'
10
+ require 'sitemap-parser'
11
+
12
+ Channel = Concurrent::Channel
13
+
14
+ class DeadFinderRunner
15
+ def run(target)
16
+ page = Nokogiri::HTML(URI.open(target))
17
+ nodeset = page.css('a')
18
+ link_a = nodeset.map { |element| element['href'] }.compact
19
+ Logger.target target
20
+ Logger.sub_info "Found #{link_a.length} point"
21
+ Logger.sub_info 'Checking'
22
+ jobs = Channel.new(buffer: :buffered, capacity: 100)
23
+ results = Channel.new(buffer: :buffered, capacity: 100)
24
+
25
+ (1..20).each do |w|
26
+ Channel.go { worker(w, jobs, results) }
27
+ end
28
+
29
+ link_a.uniq.each do |node|
30
+ result = generate_url node, target
31
+ jobs << result
32
+ end
33
+ jobs.close
34
+
35
+ (1..link_a.uniq.length).each do
36
+ ~results
37
+ end
38
+ Logger.sub_info 'Done'
39
+ end
40
+
41
+ def worker(id, jobs, results)
42
+ jobs.each do |j|
43
+ begin
44
+ URI.open(j)
45
+ rescue => exception
46
+ if exception.to_s.include? '404 Not Found'
47
+ Logger.found "[#{exception}] #{j}"
48
+ end
49
+ end
50
+ results << j
51
+ end
52
+ end
53
+ end
54
+
55
+ def run_pipe
56
+ app = DeadFinderRunner.new
57
+ while $stdin.gets
58
+ target = $LAST_READ_LINE.gsub("\n", '')
59
+ app.run target
60
+ end
61
+ end
62
+
63
+ def run_file(filename)
64
+ app = DeadFinderRunner.new
65
+ File.open(filename).each do |line|
66
+ target = line.gsub("\n", '')
67
+ app.run target
68
+ end
69
+ end
70
+
71
+ def run_url(url)
72
+ app = DeadFinderRunner.new
73
+ app.run url
74
+ end
75
+
76
+ def run_sitemap(sitemap_url)
77
+ app = DeadFinderRunner.new
78
+ sitemap = SitemapParser.new sitemap_url, {recurse: true}
79
+ sitemap.to_a.each do |url|
80
+ app.run url
81
+ end
82
+ end
83
+
84
+ class DeadFinder < Thor
85
+
86
+ desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
87
+ def pipe
88
+ Logger.info 'Pipe mode'
89
+ run_pipe
90
+ end
91
+
92
+ desc 'file', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
93
+ def file(filename)
94
+ Logger.info 'File mode'
95
+ run_file filename
96
+ end
97
+
98
+ desc 'url', 'Scan the Single URL.'
99
+ def url(url)
100
+ Logger.info 'Single URL mode'
101
+ run_url url
102
+ end
103
+
104
+ desc 'sitemap', 'Scan the URLs from sitemap.'
105
+ def sitemap(sitemap)
106
+ Logger.info 'Sitemap mode'
107
+ run_sitemap sitemap
108
+ end
109
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: deadfinder
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - hahwul
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-09-24 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Find dead-links (broken links)
14
+ email: hahwul@gmail.com
15
+ executables:
16
+ - deadfinder
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/deadfinder
21
+ - lib/deadfinder.rb
22
+ - lib/deadfinder/logger.rb
23
+ - lib/deadfinder/utils.rb
24
+ homepage: https://www.hahwul.com
25
+ licenses:
26
+ - MIT
27
+ metadata:
28
+ rubygems_mfa_required: 'true'
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubygems_version: 3.3.3
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: Find dead-links (broken links)
48
+ test_files: []