deadfinder 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 248abe3bb3c321a29cb164930132c0a61ad9e3f7987a8cfb863eebe614d0350f
4
- data.tar.gz: bbd932e255500d98dbd2171d5fe3fb66a820846bb5801f4b6b1b1b74905675e7
3
+ metadata.gz: 694fc6108a1fde665a3e8b265071409ac483434b093c94e3eebb25324e4bb300
4
+ data.tar.gz: 1bd04447be853ff381263915fe2ddcaca3418699828d5e97feb674623c41c8ba
5
5
  SHA512:
6
- metadata.gz: 4b722245eafc6373f2776db13377cbb391ab9a4c4cffb7b226e206fb8c8f7a58d1474345a7fef41b7cc14cc350ef3549db95f3359b7df7b3bcd43d0d3d580dac
7
- data.tar.gz: 5b9706ea35279e5f7c881d1839bb030ab73fb412a474a3cc9ac01837a256ff121b17154246a9ef6173a7410f29e611f52cf66dff9eff7a14c7176b27a9ab00c1
6
+ metadata.gz: 42eb575191bc6fa72cca03b0a94a8f20db82242c19a93bc0fcc9dfe8872c03c2cfe1accf68c480b0432f7c73df4ee0b1782ad08906d1e4bdea35dfc268d837b3
7
+ data.tar.gz: 39d5838080f9afd84cf3272f85a4cc6e26cf663da10b76a7e9fcfe1f63ac1b41ca973a21e454743fbc1011f9955b3c9ec488e0a586df94bdaca5e01f2ea5bec2
@@ -15,6 +15,10 @@ class Logger
15
15
  puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue)
16
16
  end
17
17
 
18
+ def self.sub_done(text)
19
+ puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue)
20
+ end
21
+
18
22
  def self.found(text)
19
23
  puts " ✘ #{text}".colorize(:red)
20
24
  end
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- VERSION = '1.1.1'
3
+ VERSION = '1.2.0'
data/lib/deadfinder.rb CHANGED
@@ -9,22 +9,36 @@ require 'deadfinder/logger'
9
9
  require 'deadfinder/version'
10
10
  require 'concurrent-edge'
11
11
  require 'sitemap-parser'
12
+ require 'set'
13
+ require 'json'
12
14
 
13
15
  Channel = Concurrent::Channel
16
+ CacheSet = Set.new
17
+ CacheQue = {}
18
+ Output = {}
14
19
 
15
20
  class DeadFinderRunner
16
21
  def run(target, options)
17
22
  page = Nokogiri::HTML(URI.open(target))
18
- nodeset = page.css('a')
19
- link_a = nodeset.map { |element| element['href'] }.compact
23
+
24
+ nodeset_a = page.css('a')
25
+ link_a = nodeset_a.map { |element| element['href'] }.compact
26
+ nodeset_script = page.css('script')
27
+ link_script = nodeset_script.map { |element| element['src'] }.compact
28
+ nodeset_link = page.css('link')
29
+ link_link = nodeset_link.map { |element| element['href'] }.compact
30
+
31
+ link_merged = []
32
+ link_merged.concat link_a, link_script, link_link
33
+
20
34
  Logger.target target
21
- Logger.sub_info "Found #{link_a.length} point"
35
+ Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
22
36
  Logger.sub_info 'Checking'
23
- jobs = Channel.new(buffer: :buffered, capacity: 100)
24
- results = Channel.new(buffer: :buffered, capacity: 100)
37
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
38
+ results = Channel.new(buffer: :buffered, capacity: 1000)
25
39
 
26
40
  (1..options['concurrency']).each do |w|
27
- Channel.go { worker(w, jobs, results) }
41
+ Channel.go { worker(w, jobs, results, target, options) }
28
42
  end
29
43
 
30
44
  link_a.uniq.each do |node|
@@ -36,15 +50,26 @@ class DeadFinderRunner
36
50
  (1..link_a.uniq.length).each do
37
51
  ~results
38
52
  end
39
- Logger.sub_info 'Done'
53
+ Logger.sub_done 'Done'
40
54
  end
41
55
 
42
- def worker(_id, jobs, results)
56
+ def worker(_id, jobs, results, target, options)
43
57
  jobs.each do |j|
44
- begin
45
- URI.open(j)
46
- rescue StandardError => e
47
- Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
58
+ if !CacheSet.include? j
59
+ CacheSet.add j
60
+ begin
61
+ CacheQue[j] = true
62
+ URI.open(j, read_timeout: options['timeout'])
63
+ rescue StandardError => e
64
+ if e.to_s.include? '404 Not Found'
65
+ Logger.found "[#{e}] #{j}"
66
+ CacheQue[j] = false
67
+ Output[target] = [] if Output[target].nil?
68
+ Output[target].push j
69
+ end
70
+ end
71
+ elsif !CacheQue[j]
72
+ Logger.found "[404 Not Found] #{j}"
48
73
  end
49
74
  results << j
50
75
  end
@@ -57,6 +82,7 @@ def run_pipe(options)
57
82
  target = $LAST_READ_LINE.gsub("\n", '')
58
83
  app.run target, options
59
84
  end
85
+ gen_output
60
86
  end
61
87
 
62
88
  def run_file(filename, options)
@@ -65,11 +91,13 @@ def run_file(filename, options)
65
91
  target = line.gsub("\n", '')
66
92
  app.run target, options
67
93
  end
94
+ gen_output
68
95
  end
69
96
 
70
97
  def run_url(url, options)
71
98
  app = DeadFinderRunner.new
72
99
  app.run url, options
100
+ gen_output
73
101
  end
74
102
 
75
103
  def run_sitemap(sitemap_url, options)
@@ -78,10 +106,17 @@ def run_sitemap(sitemap_url, options)
78
106
  sitemap.to_a.each do |url|
79
107
  app.run url, options
80
108
  end
109
+ gen_output
110
+ end
111
+
112
+ def gen_output
113
+ File.write options['output'], Output.to_json if options['output'] != ''
81
114
  end
82
115
 
83
116
  class DeadFinder < Thor
84
117
  class_option :concurrency, aliases: :c, default: 20, type: :numeric
118
+ class_option :timeout, aliases: :t, default: 10, type: :numeric
119
+ class_option :output, aliases: :o, default: '', type: :string, desc: 'Save JSON Result'
85
120
 
86
121
  desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
87
122
  def pipe
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
@@ -94,6 +94,34 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.2.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: set
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.0.0
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.0.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: json
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 2.6.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 2.6.0
97
125
  description: Dead link (broken link) means a link within a web page that cannot be
98
126
  connected. These links can have a security negative impact with SEO. This tool makes
99
127
  it easy to identify and modify.