deadfinder 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c79dd5fe128b1037e3779ce179e83987677d2ec2b221a5192d62e97ed7d4c70f
4
- data.tar.gz: 3df717dbc6953e5d76fad1d5a96253c6f3ed0380ad4abd31966bf6776c5a2ee3
3
+ metadata.gz: c4cee202abc3ad85072d0b9cfd7d7b2029eb539236cffb1d3b2fd220136186c3
4
+ data.tar.gz: 3013f0ed97adfd00061325405141f6bba346957063d6ef7813408aa331cf3232
5
5
  SHA512:
6
- metadata.gz: a9c3563018ebee9c061474bd8747c5edab14f1155081a46d4505a288cf46af3ac8747a480226585802a27bdee7d5677594caa8b5926fe50bb3051cd904b599b5
7
- data.tar.gz: a9224112450555ec2055b54761c7406bab11d7303c3df909e36131343a724f98bcaf3b8bd1fa080c96e36afc2ed1c580bf5030c3117c9fe046041c303a18bafd
6
+ metadata.gz: 69bf37ab49d7464b5345fd8833fca11a3efd003958faa2f5406fc46b00ce17557b19b3c497086304070beb13cf22eeaf18974651cb88c77329ac056a7969bf6a
7
+ data.tar.gz: 51435a7e833e6ee91d2f986265c4f885419ce59864f0f994f9deb28d384ce95595e97060837232115f1a5bdc9cb00961f9e93d792fafd11213d03050b29819f1
@@ -28,12 +28,10 @@ def ignore_scheme?(url)
28
28
  end
29
29
 
30
30
  def extract_directory(uri)
31
- if uri.path.end_with?('/')
32
- return "#{uri.scheme}://#{uri.host}#{uri.path}"
33
- end
31
+ return "#{uri.scheme}://#{uri.host}#{uri.path}" if uri.path.end_with?('/')
34
32
 
35
33
  path_components = uri.path.split('/')
36
- last_component = path_components.last
34
+ path_components.last
37
35
  path_components.pop
38
36
 
39
37
  directory_path = path_components.join('/')
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- VERSION = '1.3.4'
3
+ VERSION = '1.3.5'
data/lib/deadfinder.rb CHANGED
@@ -13,49 +13,37 @@ require 'set'
13
13
  require 'json'
14
14
 
15
15
  Channel = Concurrent::Channel
16
- CacheSet = Set.new
17
- CacheQue = {}
18
- Output = {}
16
+ CacheSet = Concurrent::Map.new
17
+ CacheQue = Concurrent::Map.new
18
+ Output = Concurrent::Map.new
19
19
 
20
20
  class DeadFinderRunner
21
21
  def run(target, options)
22
- page = nil
23
-
24
- if options['headers'].length.positive?
25
- headers = {}
26
- options['headers'].each do |header|
27
- kv = header.split ': '
28
- headers[kv[0]] = kv[1]
29
- rescue StandardError
30
- end
31
-
32
- page = Nokogiri::HTML(URI.open(target, headers))
33
- else
34
- page = Nokogiri::HTML(URI.open(target))
22
+ headers = options['headers'].each_with_object({}) do |header, hash|
23
+ kv = header.split(': ')
24
+ hash[kv[0]] = kv[1]
25
+ rescue StandardError
35
26
  end
27
+ page = Nokogiri::HTML(URI.open(target, headers))
28
+ links = extract_links(page)
36
29
 
37
- nodeset_a = page.css('a')
38
- link_a = nodeset_a.map { |element| element['href'] }.compact
39
- nodeset_script = page.css('script')
40
- link_script = nodeset_script.map { |element| element['src'] }.compact
41
- nodeset_link = page.css('link')
42
- link_link = nodeset_link.map { |element| element['href'] }.compact
43
-
44
- link_merged = []
45
- link_merged.concat link_a, link_script, link_link
30
+ total_links_count = links.values.flatten.length
31
+ # Generate link info string for non-empty link types
32
+ link_info = links.map { |type, urls| "#{type}:#{urls.length}" if urls.length.positive? }.compact.join(' / ')
46
33
 
47
- Logger.target target
48
- Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
34
+ # Log the information if there are any links
35
+ Logger.sub_info "Found #{total_links_count} URLs. [#{link_info}]" unless link_info.empty?
49
36
  Logger.sub_info 'Checking'
50
- jobs = Channel.new(buffer: :buffered, capacity: 1000)
37
+
38
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
51
39
  results = Channel.new(buffer: :buffered, capacity: 1000)
52
40
 
53
41
  (1..options['concurrency']).each do |w|
54
42
  Channel.go { worker(w, jobs, results, target, options) }
55
43
  end
56
44
 
57
- link_merged.uniq.each do |node|
58
- result = generate_url node, target
45
+ links.values.flatten.uniq.each do |node|
46
+ result = generate_url(node, target)
59
47
  jobs << result unless result.nil?
60
48
  end
61
49
 
@@ -72,8 +60,10 @@ class DeadFinderRunner
72
60
 
73
61
  def worker(_id, jobs, results, target, options)
74
62
  jobs.each do |j|
75
- if !CacheSet.include? j
76
- CacheSet.add j
63
+ if CacheSet[j]
64
+ Logger.found "[404 Not Found] #{j}" unless CacheQue[j]
65
+ else
66
+ CacheSet[j] = true
77
67
  begin
78
68
  CacheQue[j] = true
79
69
  URI.open(j, read_timeout: options['timeout'])
@@ -81,40 +71,52 @@ class DeadFinderRunner
81
71
  if e.to_s.include? '404 Not Found'
82
72
  Logger.found "[#{e}] #{j}"
83
73
  CacheQue[j] = false
84
- Output[target] = [] if Output[target].nil?
85
- Output[target].push j
74
+ Output[target] ||= []
75
+ Output[target] << j
86
76
  end
87
77
  end
88
- elsif !CacheQue[j]
89
- Logger.found "[404 Not Found] #{j}"
90
78
  end
91
79
  results << j
92
80
  end
93
81
  end
82
+
83
+ private
84
+
85
+ def extract_links(page)
86
+ {
87
+ anchor: page.css('a').map { |element| element['href'] }.compact,
88
+ script: page.css('script').map { |element| element['src'] }.compact,
89
+ link: page.css('link').map { |element| element['href'] }.compact,
90
+ iframe: page.css('iframe').map { |element| element['src'] }.compact,
91
+ form: page.css('form').map { |element| element['action'] }.compact,
92
+ object: page.css('object').map { |element| element['data'] }.compact,
93
+ embed: page.css('embed').map { |element| element['src'] }.compact
94
+ }
95
+ end
94
96
  end
95
97
 
96
98
  def run_pipe(options)
97
99
  app = DeadFinderRunner.new
98
100
  while $stdin.gets
99
- target = $LAST_READ_LINE.gsub("\n", '')
101
+ target = $LAST_READ_LINE.chomp
100
102
  app.run target, options
101
103
  end
102
- gen_output
104
+ gen_output(options)
103
105
  end
104
106
 
105
107
  def run_file(filename, options)
106
108
  app = DeadFinderRunner.new
107
- File.open(filename).each do |line|
108
- target = line.gsub("\n", '')
109
+ File.foreach(filename) do |line|
110
+ target = line.chomp
109
111
  app.run target, options
110
112
  end
111
- gen_output
113
+ gen_output(options)
112
114
  end
113
115
 
114
116
  def run_url(url, options)
115
117
  app = DeadFinderRunner.new
116
118
  app.run url, options
117
- gen_output
119
+ gen_output(options)
118
120
  end
119
121
 
120
122
  def run_sitemap(sitemap_url, options)
@@ -125,15 +127,15 @@ def run_sitemap(sitemap_url, options)
125
127
  turl = generate_url url, base_uri
126
128
  app.run turl, options
127
129
  end
128
- gen_output
130
+ gen_output(options)
129
131
  end
130
132
 
131
- def gen_output
132
- File.write options['output'], Output.to_json if options['output'] != ''
133
+ def gen_output(options)
134
+ File.write(options['output'], Output.to_json) unless options['output'].empty?
133
135
  end
134
136
 
135
137
  class DeadFinder < Thor
136
- class_option :concurrency, aliases: :c, default: 20, type: :numeric, desc: 'Number of concurrncy'
138
+ class_option :concurrency, aliases: :c, default: 50, type: :numeric, desc: 'Number of concurrency'
137
139
  class_option :timeout, aliases: :t, default: 10, type: :numeric, desc: 'Timeout in seconds'
138
140
  class_option :output, aliases: :o, default: '', type: :string, desc: 'File to write JSON result'
139
141
  class_option :headers, aliases: :H, default: [], type: :array, desc: 'Custom HTTP headers to send with request'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.4
4
+ version: 1.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-29 00:00:00.000000000 Z
11
+ date: 2024-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colorize