deadfinder 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 248abe3bb3c321a29cb164930132c0a61ad9e3f7987a8cfb863eebe614d0350f
4
- data.tar.gz: bbd932e255500d98dbd2171d5fe3fb66a820846bb5801f4b6b1b1b74905675e7
3
+ metadata.gz: 694fc6108a1fde665a3e8b265071409ac483434b093c94e3eebb25324e4bb300
4
+ data.tar.gz: 1bd04447be853ff381263915fe2ddcaca3418699828d5e97feb674623c41c8ba
5
5
  SHA512:
6
- metadata.gz: 4b722245eafc6373f2776db13377cbb391ab9a4c4cffb7b226e206fb8c8f7a58d1474345a7fef41b7cc14cc350ef3549db95f3359b7df7b3bcd43d0d3d580dac
7
- data.tar.gz: 5b9706ea35279e5f7c881d1839bb030ab73fb412a474a3cc9ac01837a256ff121b17154246a9ef6173a7410f29e611f52cf66dff9eff7a14c7176b27a9ab00c1
6
+ metadata.gz: 42eb575191bc6fa72cca03b0a94a8f20db82242c19a93bc0fcc9dfe8872c03c2cfe1accf68c480b0432f7c73df4ee0b1782ad08906d1e4bdea35dfc268d837b3
7
+ data.tar.gz: 39d5838080f9afd84cf3272f85a4cc6e26cf663da10b76a7e9fcfe1f63ac1b41ca973a21e454743fbc1011f9955b3c9ec488e0a586df94bdaca5e01f2ea5bec2
@@ -15,6 +15,10 @@ class Logger
15
15
  puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue)
16
16
  end
17
17
 
18
+ def self.sub_done(text)
19
+ puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue)
20
+ end
21
+
18
22
  def self.found(text)
19
23
  puts " ✘ #{text}".colorize(:red)
20
24
  end
@@ -1,3 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- VERSION = '1.1.1'
3
+ VERSION = '1.2.0'
data/lib/deadfinder.rb CHANGED
@@ -9,22 +9,36 @@ require 'deadfinder/logger'
9
9
  require 'deadfinder/version'
10
10
  require 'concurrent-edge'
11
11
  require 'sitemap-parser'
12
+ require 'set'
13
+ require 'json'
12
14
 
13
15
  Channel = Concurrent::Channel
16
+ CacheSet = Set.new
17
+ CacheQue = {}
18
+ Output = {}
14
19
 
15
20
  class DeadFinderRunner
16
21
  def run(target, options)
17
22
  page = Nokogiri::HTML(URI.open(target))
18
- nodeset = page.css('a')
19
- link_a = nodeset.map { |element| element['href'] }.compact
23
+
24
+ nodeset_a = page.css('a')
25
+ link_a = nodeset_a.map { |element| element['href'] }.compact
26
+ nodeset_script = page.css('script')
27
+ link_script = nodeset_script.map { |element| element['src'] }.compact
28
+ nodeset_link = page.css('link')
29
+ link_link = nodeset_link.map { |element| element['href'] }.compact
30
+
31
+ link_merged = []
32
+ link_merged.concat link_a, link_script, link_link
33
+
20
34
  Logger.target target
21
- Logger.sub_info "Found #{link_a.length} point"
35
+ Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
22
36
  Logger.sub_info 'Checking'
23
- jobs = Channel.new(buffer: :buffered, capacity: 100)
24
- results = Channel.new(buffer: :buffered, capacity: 100)
37
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
38
+ results = Channel.new(buffer: :buffered, capacity: 1000)
25
39
 
26
40
  (1..options['concurrency']).each do |w|
27
- Channel.go { worker(w, jobs, results) }
41
+ Channel.go { worker(w, jobs, results, target, options) }
28
42
  end
29
43
 
30
44
  link_a.uniq.each do |node|
@@ -36,15 +50,26 @@ class DeadFinderRunner
36
50
  (1..link_a.uniq.length).each do
37
51
  ~results
38
52
  end
39
- Logger.sub_info 'Done'
53
+ Logger.sub_done 'Done'
40
54
  end
41
55
 
42
- def worker(_id, jobs, results)
56
+ def worker(_id, jobs, results, target, options)
43
57
  jobs.each do |j|
44
- begin
45
- URI.open(j)
46
- rescue StandardError => e
47
- Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
58
+ if !CacheSet.include? j
59
+ CacheSet.add j
60
+ begin
61
+ CacheQue[j] = true
62
+ URI.open(j, read_timeout: options['timeout'])
63
+ rescue StandardError => e
64
+ if e.to_s.include? '404 Not Found'
65
+ Logger.found "[#{e}] #{j}"
66
+ CacheQue[j] = false
67
+ Output[target] = [] if Output[target].nil?
68
+ Output[target].push j
69
+ end
70
+ end
71
+ elsif !CacheQue[j]
72
+ Logger.found "[404 Not Found] #{j}"
48
73
  end
49
74
  results << j
50
75
  end
@@ -57,6 +82,7 @@ def run_pipe(options)
57
82
  target = $LAST_READ_LINE.gsub("\n", '')
58
83
  app.run target, options
59
84
  end
85
+ gen_output
60
86
  end
61
87
 
62
88
  def run_file(filename, options)
@@ -65,11 +91,13 @@ def run_file(filename, options)
65
91
  target = line.gsub("\n", '')
66
92
  app.run target, options
67
93
  end
94
+ gen_output
68
95
  end
69
96
 
70
97
  def run_url(url, options)
71
98
  app = DeadFinderRunner.new
72
99
  app.run url, options
100
+ gen_output
73
101
  end
74
102
 
75
103
  def run_sitemap(sitemap_url, options)
@@ -78,10 +106,17 @@ def run_sitemap(sitemap_url, options)
78
106
  sitemap.to_a.each do |url|
79
107
  app.run url, options
80
108
  end
109
+ gen_output
110
+ end
111
+
112
+ def gen_output
113
+ File.write options['output'], Output.to_json if options['output'] != ''
81
114
  end
82
115
 
83
116
  class DeadFinder < Thor
84
117
  class_option :concurrency, aliases: :c, default: 20, type: :numeric
118
+ class_option :timeout, aliases: :t, default: 10, type: :numeric
119
+ class_option :output, aliases: :o, default: '', type: :string, desc: 'Save JSON Result'
85
120
 
86
121
  desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
87
122
  def pipe
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
@@ -94,6 +94,34 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.2.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: set
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.0.0
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.0.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: json
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 2.6.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 2.6.0
97
125
  description: Dead link (broken link) means a link within a web page that cannot be
98
126
  connected. These links can have a security negative impact with SEO. This tool makes
99
127
  it easy to identify and modify.