deadfinder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3a2e86be2607ea1af0d52ca1ed57055869cb457d58aa0b3773154d8de15f7a08
4
+ data.tar.gz: ffb4fa247f7f388c9e5f71144f8b9728118627feead5159380bac65d2af4420f
5
+ SHA512:
6
+ metadata.gz: 237f99c06741f5b509af6ede98be469e20d9a60f7be2a5d615568eb7707686a40998ee49ce41143cb6af72beb3fe181ad9cebaed114d11f7a8e9408b3fd54fee
7
+ data.tar.gz: 8cfd465a45ae8b4646e82810e4aa595c809a44a11107c0845f06e11a2eddeee394bb8e01b53a412f33b13bc05aaca49bd0a890ed6a702138d3b7b327634712f1
data/bin/deadfinder ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'deadfinder'
4
+ DeadFinder.start(ARGV)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'colorize'
4
+
5
+ class Logger
6
+ def self.info text
7
+ puts "ℹ ".colorize(:blue) + "#{text}".colorize(:light_blue)
8
+ end
9
+
10
+ def self.target text
11
+ puts "► ".colorize(:green) + "#{text}".colorize(:light_green)
12
+ end
13
+
14
+ def self.sub_info text
15
+ puts " ● ".colorize(:blue) + "#{text}".colorize(:light_blue)
16
+ end
17
+
18
+ def self.found text
19
+ puts " ✘ #{text}".colorize(:red)
20
+ end
21
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+
5
+ def generate_url(text, base_url)
6
+ node = text.to_s
7
+ begin
8
+ unless node.start_with?('http://', 'https://')
9
+ uri = URI(base_url)
10
+ if node.start_with? '//'
11
+ return "#{uri.scheme}:#{node}"
12
+ elsif node.start_with? '/'
13
+ return "#{uri.scheme}://#{uri.host}#{node}"
14
+ else
15
+ return "#{uri}#{node}"
16
+ end
17
+ end
18
+ rescue StandardError => e
19
+ # puts e
20
+ end
21
+ node
22
+ end
data/lib/deadfinder.rb ADDED
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'English'
4
+ require 'thor'
5
+ require 'open-uri'
6
+ require 'nokogiri'
7
+ require 'deadfinder/utils'
8
+ require 'deadfinder/logger'
9
+ require 'concurrent-edge'
10
+ require 'sitemap-parser'
11
+
12
+ Channel = Concurrent::Channel
13
+
14
+ class DeadFinderRunner
15
+ def run(target)
16
+ page = Nokogiri::HTML(URI.open(target))
17
+ nodeset = page.css('a')
18
+ link_a = nodeset.map { |element| element['href'] }.compact
19
+ Logger.target target
20
+ Logger.sub_info "Found #{link_a.length} point"
21
+ Logger.sub_info 'Checking'
22
+ jobs = Channel.new(buffer: :buffered, capacity: 100)
23
+ results = Channel.new(buffer: :buffered, capacity: 100)
24
+
25
+ (1..20).each do |w|
26
+ Channel.go { worker(w, jobs, results) }
27
+ end
28
+
29
+ link_a.uniq.each do |node|
30
+ result = generate_url node, target
31
+ jobs << result
32
+ end
33
+ jobs.close
34
+
35
+ (1..link_a.uniq.length).each do
36
+ ~results
37
+ end
38
+ Logger.sub_info 'Done'
39
+ end
40
+
41
+ def worker(id, jobs, results)
42
+ jobs.each do |j|
43
+ begin
44
+ URI.open(j)
45
+ rescue => exception
46
+ if exception.to_s.include? '404 Not Found'
47
+ Logger.found "[#{exception}] #{j}"
48
+ end
49
+ end
50
+ results << j
51
+ end
52
+ end
53
+ end
54
+
55
+ def run_pipe
56
+ app = DeadFinderRunner.new
57
+ while $stdin.gets
58
+ target = $LAST_READ_LINE.gsub("\n", '')
59
+ app.run target
60
+ end
61
+ end
62
+
63
+ def run_file(filename)
64
+ app = DeadFinderRunner.new
65
+ File.open(filename).each do |line|
66
+ target = line.gsub("\n", '')
67
+ app.run target
68
+ end
69
+ end
70
+
71
+ def run_url(url)
72
+ app = DeadFinderRunner.new
73
+ app.run url
74
+ end
75
+
76
+ def run_sitemap(sitemap_url)
77
+ app = DeadFinderRunner.new
78
+ sitemap = SitemapParser.new sitemap_url, {recurse: true}
79
+ sitemap.to_a.each do |url|
80
+ app.run url
81
+ end
82
+ end
83
+
84
+ class DeadFinder < Thor
85
+
86
+ desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
87
+ def pipe
88
+ Logger.info 'Pipe mode'
89
+ run_pipe
90
+ end
91
+
92
+ desc 'file', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
93
+ def file(filename)
94
+ Logger.info 'File mode'
95
+ run_file filename
96
+ end
97
+
98
+ desc 'url', 'Scan the Single URL.'
99
+ def url(url)
100
+ Logger.info 'Single URL mode'
101
+ run_url url
102
+ end
103
+
104
+ desc 'sitemap', 'Scan the URLs from sitemap.'
105
+ def sitemap(sitemap)
106
+ Logger.info 'Sitemap mode'
107
+ run_sitemap sitemap
108
+ end
109
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: deadfinder
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - hahwul
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-09-24 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Find dead-links (broken links)
14
+ email: hahwul@gmail.com
15
+ executables:
16
+ - deadfinder
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/deadfinder
21
+ - lib/deadfinder.rb
22
+ - lib/deadfinder/logger.rb
23
+ - lib/deadfinder/utils.rb
24
+ homepage: https://www.hahwul.com
25
+ licenses:
26
+ - MIT
27
+ metadata:
28
+ rubygems_mfa_required: 'true'
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubygems_version: 3.3.3
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: Find dead-links (broken links)
48
+ test_files: []