deadfinder 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c79dd5fe128b1037e3779ce179e83987677d2ec2b221a5192d62e97ed7d4c70f
4
- data.tar.gz: 3df717dbc6953e5d76fad1d5a96253c6f3ed0380ad4abd31966bf6776c5a2ee3
3
+ metadata.gz: 0c209ef1731010129da940409a782e283301d1b0c80c61b38367fec3cdc73365
4
+ data.tar.gz: 1d68364a7cba83bb2e651a91344ca2e3b1a75731c7acd5fae6ac0be51e35310d
5
5
  SHA512:
6
- metadata.gz: a9c3563018ebee9c061474bd8747c5edab14f1155081a46d4505a288cf46af3ac8747a480226585802a27bdee7d5677594caa8b5926fe50bb3051cd904b599b5
7
- data.tar.gz: a9224112450555ec2055b54761c7406bab11d7303c3df909e36131343a724f98bcaf3b8bd1fa080c96e36afc2ed1c580bf5030c3117c9fe046041c303a18bafd
6
+ metadata.gz: 87fbf3c5bd6a2c786211449b0dd51ca5c428ab791649440f3601afef3c632d969dc1495e6d7c5d2bc141cab4bb07d1fc1293a67cddc03e22d64b3a223ab05837
7
+ data.tar.gz: 10028b1b527f28359e4dc8288f01389218e73aade4855caa857716436b8f7dd33daa40b7708461fd82b2d17c3b9dff84dd12eea0cbbd3129bfabbb76f39224ef
@@ -3,27 +3,37 @@
3
3
  require 'colorize'
4
4
 
5
5
  class Logger
6
+ @silent = false
7
+
8
+ def self.set_silent
9
+ @silent = true
10
+ end
11
+
12
+ def self.silent?
13
+ @silent
14
+ end
15
+
6
16
  def self.info(text)
7
- puts 'ℹ '.colorize(:blue) + text.to_s.colorize(:light_blue)
17
+ puts 'ℹ '.colorize(:blue) + text.to_s.colorize(:light_blue) unless silent?
8
18
  end
9
19
 
10
20
  def self.error(text)
11
- puts '⚠︎ '.colorize(:red) + text.to_s
21
+ puts '⚠︎ '.colorize(:red) + text.to_s unless silent?
12
22
  end
13
23
 
14
24
  def self.target(text)
15
- puts '► '.colorize(:green) + text.to_s.colorize(:light_green)
25
+ puts '► '.colorize(:green) + text.to_s.colorize(:light_green) unless silent?
16
26
  end
17
27
 
18
28
  def self.sub_info(text)
19
- puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue)
29
+ puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue) unless silent?
20
30
  end
21
31
 
22
32
  def self.sub_done(text)
23
- puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue)
33
+ puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue) unless silent?
24
34
  end
25
35
 
26
36
  def self.found(text)
27
- puts " ✘ #{text}".colorize(:red)
37
+ puts " ✘ #{text}".colorize(:red) unless silent?
28
38
  end
29
39
  end
@@ -28,12 +28,10 @@ def ignore_scheme?(url)
28
28
  end
29
29
 
30
30
  def extract_directory(uri)
31
- if uri.path.end_with?('/')
32
- return "#{uri.scheme}://#{uri.host}#{uri.path}"
33
- end
31
+ return "#{uri.scheme}://#{uri.host}#{uri.path}" if uri.path.end_with?('/')
34
32
 
35
33
  path_components = uri.path.split('/')
36
- last_component = path_components.last
34
+ path_components.last
37
35
  path_components.pop
38
36
 
39
37
  directory_path = path_components.join('/')
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- VERSION = '1.3.4'
3
+ VERSION = '1.3.6'
data/lib/deadfinder.rb CHANGED
@@ -13,49 +13,48 @@ require 'set'
13
13
  require 'json'
14
14
 
15
15
  Channel = Concurrent::Channel
16
- CacheSet = Set.new
17
- CacheQue = {}
18
- Output = {}
16
+ CacheSet = Concurrent::Map.new
17
+ CacheQue = Concurrent::Map.new
18
+ Output = Concurrent::Map.new
19
19
 
20
20
  class DeadFinderRunner
21
- def run(target, options)
22
- page = nil
23
-
24
- if options['headers'].length.positive?
25
- headers = {}
26
- options['headers'].each do |header|
27
- kv = header.split ': '
28
- headers[kv[0]] = kv[1]
29
- rescue StandardError
30
- end
21
+ def default_options
22
+ {
23
+ 'concurrency' => 50,
24
+ 'timeout' => 10,
25
+ 'output' => '',
26
+ 'headers' => [],
27
+ 'silent' => true
28
+ }
29
+ end
31
30
 
32
- page = Nokogiri::HTML(URI.open(target, headers))
33
- else
34
- page = Nokogiri::HTML(URI.open(target))
31
+ def run(target, options)
32
+ Logger.set_silent if options['silent']
33
+ headers = options['headers'].each_with_object({}) do |header, hash|
34
+ kv = header.split(': ')
35
+ hash[kv[0]] = kv[1]
36
+ rescue StandardError
35
37
  end
38
+ page = Nokogiri::HTML(URI.open(target, headers))
39
+ links = extract_links(page)
36
40
 
37
- nodeset_a = page.css('a')
38
- link_a = nodeset_a.map { |element| element['href'] }.compact
39
- nodeset_script = page.css('script')
40
- link_script = nodeset_script.map { |element| element['src'] }.compact
41
- nodeset_link = page.css('link')
42
- link_link = nodeset_link.map { |element| element['href'] }.compact
43
-
44
- link_merged = []
45
- link_merged.concat link_a, link_script, link_link
41
+ total_links_count = links.values.flatten.length
42
+ # Generate link info string for non-empty link types
43
+ link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
46
44
 
47
- Logger.target target
48
- Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
45
+ # Log the information if there are any links
46
+ Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
49
47
  Logger.sub_info 'Checking'
50
- jobs = Channel.new(buffer: :buffered, capacity: 1000)
48
+
49
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
51
50
  results = Channel.new(buffer: :buffered, capacity: 1000)
52
51
 
53
52
  (1..options['concurrency']).each do |w|
54
53
  Channel.go { worker(w, jobs, results, target, options) }
55
54
  end
56
55
 
57
- link_merged.uniq.each do |node|
58
- result = generate_url node, target
56
+ links.values.flatten.uniq.each do |node|
57
+ result = generate_url(node, target)
59
58
  jobs << result unless result.nil?
60
59
  end
61
60
 
@@ -72,8 +71,10 @@ class DeadFinderRunner
72
71
 
73
72
  def worker(_id, jobs, results, target, options)
74
73
  jobs.each do |j|
75
- if !CacheSet.include? j
76
- CacheSet.add j
74
+ if CacheSet[j]
75
+ Logger.found "[404 Not Found] #{j}" unless CacheQue[j]
76
+ else
77
+ CacheSet[j] = true
77
78
  begin
78
79
  CacheQue[j] = true
79
80
  URI.open(j, read_timeout: options['timeout'])
@@ -81,84 +82,107 @@ class DeadFinderRunner
81
82
  if e.to_s.include? '404 Not Found'
82
83
  Logger.found "[#{e}] #{j}"
83
84
  CacheQue[j] = false
84
- Output[target] = [] if Output[target].nil?
85
- Output[target].push j
85
+ Output[target] ||= []
86
+ Output[target] << j
86
87
  end
87
88
  end
88
- elsif !CacheQue[j]
89
- Logger.found "[404 Not Found] #{j}"
90
89
  end
91
90
  results << j
92
91
  end
93
92
  end
93
+
94
+ private
95
+
96
+ def extract_links(page)
97
+ {
98
+ anchor: page.css('a').map { |element| element['href'] }.compact,
99
+ script: page.css('script').map { |element| element['src'] }.compact,
100
+ link: page.css('link').map { |element| element['href'] }.compact,
101
+ iframe: page.css('iframe').map { |element| element['src'] }.compact,
102
+ form: page.css('form').map { |element| element['action'] }.compact,
103
+ object: page.css('object').map { |element| element['data'] }.compact,
104
+ embed: page.css('embed').map { |element| element['src'] }.compact
105
+ }
106
+ end
94
107
  end
95
108
 
96
109
  def run_pipe(options)
110
+ Logger.set_silent if options['silent']
111
+
112
+ Logger.info 'Reading from STDIN'
97
113
  app = DeadFinderRunner.new
98
114
  while $stdin.gets
99
- target = $LAST_READ_LINE.gsub("\n", '')
115
+ target = $LAST_READ_LINE.chomp
116
+ Logger.target "Checking: #{target}"
100
117
  app.run target, options
101
118
  end
102
- gen_output
119
+ gen_output(options)
103
120
  end
104
121
 
105
122
  def run_file(filename, options)
123
+ Logger.set_silent if options['silent']
124
+
125
+ Logger.info "Reading: #{filename}"
106
126
  app = DeadFinderRunner.new
107
- File.open(filename).each do |line|
108
- target = line.gsub("\n", '')
127
+ File.foreach(filename) do |line|
128
+ target = line.chomp
129
+ Logger.target "Checking: #{target}"
109
130
  app.run target, options
110
131
  end
111
- gen_output
132
+ gen_output(options)
112
133
  end
113
134
 
114
135
  def run_url(url, options)
136
+ Logger.set_silent if options['silent']
137
+
138
+ Logger.target "Checking: #{url}"
115
139
  app = DeadFinderRunner.new
116
140
  app.run url, options
117
- gen_output
141
+ gen_output(options)
118
142
  end
119
143
 
120
144
  def run_sitemap(sitemap_url, options)
145
+ Logger.set_silent if options['silent']
146
+ Logger.info "Parsing sitemap: #{sitemap_url}"
121
147
  app = DeadFinderRunner.new
122
148
  base_uri = URI(sitemap_url)
123
149
  sitemap = SitemapParser.new sitemap_url, { recurse: true }
124
150
  sitemap.to_a.each do |url|
125
151
  turl = generate_url url, base_uri
152
+ Logger.target "Checking: #{turl}"
126
153
  app.run turl, options
127
154
  end
128
- gen_output
155
+ gen_output(options)
129
156
  end
130
157
 
131
- def gen_output
132
- File.write options['output'], Output.to_json if options['output'] != ''
158
+ def gen_output(options)
159
+ File.write(options['output'], Output.to_json) unless options['output'].empty?
133
160
  end
134
161
 
135
162
  class DeadFinder < Thor
136
- class_option :concurrency, aliases: :c, default: 20, type: :numeric, desc: 'Number of concurrncy'
163
+ class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
137
164
  class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
138
165
  class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write JSON result'
139
166
  class_option :headers, aliases: :H, default: [], type: :array, desc: 'Custom HTTP headers to send with request'
167
+ class_option :silent, aliases: :s, default: false, type: :boolean, desc: 'Silent mode'
140
168
 
141
169
  desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
142
170
  def pipe
143
- Logger.info 'Pipe mode'
144
171
  run_pipe options
145
172
  end
146
173
 
147
174
  desc 'file <FILE>', 'Scan the URLs from File. (e.g deadfinder file urls.txt)'
148
175
  def file(filename)
149
- Logger.info 'File mode'
150
176
  run_file filename, options
151
177
  end
152
178
 
153
179
  desc 'url <URL>', 'Scan the Single URL.'
154
180
  def url(url)
155
- Logger.info 'Single URL mode'
156
181
  run_url url, options
157
182
  end
158
183
 
159
184
  desc 'sitemap <SITEMAP-URL>', 'Scan the URLs from sitemap.'
160
185
  def sitemap(sitemap)
161
- Logger.info 'Sitemap mode'
162
186
  run_sitemap sitemap, options
163
187
  end
164
188
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.4
4
+ version: 1.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-29 00:00:00.000000000 Z
11
+ date: 2024-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colorize
@@ -17,6 +17,9 @@ dependencies:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 0.8.0
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.8.0
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -24,6 +27,9 @@ dependencies:
24
27
  - - "~>"
25
28
  - !ruby/object:Gem::Version
26
29
  version: 0.8.0
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.0
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: concurrent-ruby-edge
29
35
  requirement: !ruby/object:Gem::Requirement
@@ -31,6 +37,9 @@ dependencies:
31
37
  - - "~>"
32
38
  - !ruby/object:Gem::Version
33
39
  version: 0.6.0
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 0.6.0
34
43
  type: :runtime
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
@@ -38,6 +47,9 @@ dependencies:
38
47
  - - "~>"
39
48
  - !ruby/object:Gem::Version
40
49
  version: 0.6.0
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 0.6.0
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: json
43
55
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +57,9 @@ dependencies:
45
57
  - - "~>"
46
58
  - !ruby/object:Gem::Version
47
59
  version: 2.6.0
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 2.6.0
48
63
  type: :runtime
49
64
  prerelease: false
50
65
  version_requirements: !ruby/object:Gem::Requirement
@@ -52,6 +67,9 @@ dependencies:
52
67
  - - "~>"
53
68
  - !ruby/object:Gem::Version
54
69
  version: 2.6.0
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 2.6.0
55
73
  - !ruby/object:Gem::Dependency
56
74
  name: nokogiri
57
75
  requirement: !ruby/object:Gem::Requirement
@@ -59,6 +77,9 @@ dependencies:
59
77
  - - "~>"
60
78
  - !ruby/object:Gem::Version
61
79
  version: 1.13.0
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.13.0
62
83
  type: :runtime
63
84
  prerelease: false
64
85
  version_requirements: !ruby/object:Gem::Requirement
@@ -66,6 +87,9 @@ dependencies:
66
87
  - - "~>"
67
88
  - !ruby/object:Gem::Version
68
89
  version: 1.13.0
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.13.0
69
93
  - !ruby/object:Gem::Dependency
70
94
  name: open-uri
71
95
  requirement: !ruby/object:Gem::Requirement
@@ -73,6 +97,9 @@ dependencies:
73
97
  - - "~>"
74
98
  - !ruby/object:Gem::Version
75
99
  version: 0.2.0
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 0.2.0
76
103
  type: :runtime
77
104
  prerelease: false
78
105
  version_requirements: !ruby/object:Gem::Requirement
@@ -80,20 +107,29 @@ dependencies:
80
107
  - - "~>"
81
108
  - !ruby/object:Gem::Version
82
109
  version: 0.2.0
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 0.2.0
83
113
  - !ruby/object:Gem::Dependency
84
114
  name: set
85
115
  requirement: !ruby/object:Gem::Requirement
86
116
  requirements:
87
117
  - - "~>"
88
118
  - !ruby/object:Gem::Version
89
- version: 1.0.0
119
+ version: 1.1.0
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 1.1.0
90
123
  type: :runtime
91
124
  prerelease: false
92
125
  version_requirements: !ruby/object:Gem::Requirement
93
126
  requirements:
94
127
  - - "~>"
95
128
  - !ruby/object:Gem::Version
96
- version: 1.0.0
129
+ version: 1.1.0
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 1.1.0
97
133
  - !ruby/object:Gem::Dependency
98
134
  name: sitemap-parser
99
135
  requirement: !ruby/object:Gem::Requirement
@@ -101,6 +137,9 @@ dependencies:
101
137
  - - "~>"
102
138
  - !ruby/object:Gem::Version
103
139
  version: 0.5.0
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ version: 0.5.0
104
143
  type: :runtime
105
144
  prerelease: false
106
145
  version_requirements: !ruby/object:Gem::Requirement
@@ -108,6 +147,9 @@ dependencies:
108
147
  - - "~>"
109
148
  - !ruby/object:Gem::Version
110
149
  version: 0.5.0
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: 0.5.0
111
153
  - !ruby/object:Gem::Dependency
112
154
  name: thor
113
155
  requirement: !ruby/object:Gem::Requirement
@@ -115,6 +157,9 @@ dependencies:
115
157
  - - "~>"
116
158
  - !ruby/object:Gem::Version
117
159
  version: 1.2.0
160
+ - - ">="
161
+ - !ruby/object:Gem::Version
162
+ version: 1.2.0
118
163
  type: :runtime
119
164
  prerelease: false
120
165
  version_requirements: !ruby/object:Gem::Requirement
@@ -122,6 +167,9 @@ dependencies:
122
167
  - - "~>"
123
168
  - !ruby/object:Gem::Version
124
169
  version: 1.2.0
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ version: 1.2.0
125
173
  description: Find dead-links (broken links). Dead link (broken link) means a link
126
174
  within a web page that cannot be connected. These links can have a negative impact
127
175
  to SEO and Security. This tool makes it easy to identify and modify.