deadfinder 1.1.0 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3dd7587737b26129d64fcd4887eb39041b8abb27c4b8e1a6c125c7c128b77fb3
4
- data.tar.gz: cf6b6dd2b663df8ffe3aaae9810deb39be18a15957f0b37cb3f70f7584bfb39d
3
+ metadata.gz: 1e08daa4368e4c3e041e642f1708619637ea618c2937c6e36f2aba841f152ec4
4
+ data.tar.gz: 5162fb9eb973b7638ece6bcac4e6b2c731f3a6bd654f842f1aeeeed286cee4ed
5
5
  SHA512:
6
- metadata.gz: 79df6bc3e4f38da2ceda5f4704dbb69f5bb11af7b0e1637096e930c929ed065f2811ca879956ef9819f8a4afde342eb49757d5fb7bd05124d6fffd5aae0cd64e
7
- data.tar.gz: 263eeae2ebaa5e4eceee0182ca1721f8d51936b3921060180cfb7efa2422e5f8f3b78f6eebc83d01214ffec83b1f6bd14ae7f79ad3e9b34306b19bef3f6f0826
6
+ metadata.gz: aa82c3b87b81f8f5293819dbd0e242e57e7fda14fe8820197aa4e32d6c774ea0c379adda7ef49e88865fc00f32cb5cce36c6fae711ac019e2d5be921e1747d65
7
+ data.tar.gz: 952de90ed7b150e106fe33ec203140c162f2f74c941400328fc3da9c41866111f940043b53d9560903fdb822337ff8973bfc70740ad937cdc04428e5598e37fb
@@ -15,6 +15,10 @@ class Logger
15
15
  puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue)
16
16
  end
17
17
 
18
+ def self.sub_done(text)
19
+ puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue)
20
+ end
21
+
18
22
  def self.found(text)
19
23
  puts " ✘ #{text}".colorize(:red)
20
24
  end
@@ -1 +1,3 @@
1
- VERSION = '1.1.0'
1
+ # frozen_string_literal: true
2
+
3
+ VERSION = '1.1.2'
data/lib/deadfinder.rb CHANGED
@@ -9,22 +9,34 @@ require 'deadfinder/logger'
9
9
  require 'deadfinder/version'
10
10
  require 'concurrent-edge'
11
11
  require 'sitemap-parser'
12
+ require 'set'
12
13
 
13
14
  Channel = Concurrent::Channel
15
+ CacheSet = Set.new
16
+ CacheQue = {}
14
17
 
15
18
  class DeadFinderRunner
16
19
  def run(target, options)
17
20
  page = Nokogiri::HTML(URI.open(target))
18
- nodeset = page.css('a')
19
- link_a = nodeset.map { |element| element['href'] }.compact
21
+
22
+ nodeset_a = page.css('a')
23
+ link_a = nodeset_a.map { |element| element['href'] }.compact
24
+ nodeset_script = page.css('script')
25
+ link_script = nodeset_script.map { |element| element['src'] }.compact
26
+ nodeset_link = page.css('link')
27
+ link_link = nodeset_link.map { |element| element['href'] }.compact
28
+
29
+ link_merged = []
30
+ link_merged = link_merged.concat link_a, link_script, link_link
31
+
20
32
  Logger.target target
21
- Logger.sub_info "Found #{link_a.length} point"
33
+ Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
22
34
  Logger.sub_info 'Checking'
23
- jobs = Channel.new(buffer: :buffered, capacity: 100)
24
- results = Channel.new(buffer: :buffered, capacity: 100)
35
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
36
+ results = Channel.new(buffer: :buffered, capacity: 1000)
25
37
 
26
38
  (1..options['concurrency']).each do |w|
27
- Channel.go { worker(w, jobs, results) }
39
+ Channel.go { worker(w, jobs, results, options) }
28
40
  end
29
41
 
30
42
  link_a.uniq.each do |node|
@@ -36,17 +48,27 @@ class DeadFinderRunner
36
48
  (1..link_a.uniq.length).each do
37
49
  ~results
38
50
  end
39
- Logger.sub_info 'Done'
51
+ Logger.sub_done 'Done'
40
52
  end
41
53
 
42
- def worker(_id, jobs, results)
54
+ def worker(_id, jobs, results, options)
43
55
  jobs.each do |j|
44
- begin
45
- URI.open(j)
46
- rescue StandardError => e
47
- Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
56
+ if !CacheSet.include? j
57
+ CacheSet.add j
58
+ begin
59
+ CacheQue[j] = true
60
+ URI.open(j, :read_timeout => options['timeout'])
61
+ rescue StandardError => e
62
+ Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
63
+ CacheQue[j] = false
64
+ end
65
+ results << j
66
+ else
67
+ if !CacheQue[j]
68
+ Logger.found "[404 Not Found] #{j}"
69
+ end
70
+ results << j
48
71
  end
49
- results << j
50
72
  end
51
73
  end
52
74
  end
@@ -59,7 +81,7 @@ def run_pipe(options)
59
81
  end
60
82
  end
61
83
 
62
- def run_file(filename,options)
84
+ def run_file(filename, options)
63
85
  app = DeadFinderRunner.new
64
86
  File.open(filename).each do |line|
65
87
  target = line.gsub("\n", '')
@@ -67,12 +89,12 @@ def run_file(filename,options)
67
89
  end
68
90
  end
69
91
 
70
- def run_url(url,options)
92
+ def run_url(url, options)
71
93
  app = DeadFinderRunner.new
72
94
  app.run url, options
73
95
  end
74
96
 
75
- def run_sitemap(sitemap_url,options)
97
+ def run_sitemap(sitemap_url, options)
76
98
  app = DeadFinderRunner.new
77
99
  sitemap = SitemapParser.new sitemap_url, { recurse: true }
78
100
  sitemap.to_a.each do |url|
@@ -82,6 +104,7 @@ end
82
104
 
83
105
  class DeadFinder < Thor
84
106
  class_option :concurrency, aliases: :c, default: 20, type: :numeric
107
+ class_option :timeout, aliases: :t, default: 10, type: :numeric
85
108
 
86
109
  desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
87
110
  def pipe
metadata CHANGED
@@ -1,16 +1,102 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-25 00:00:00.000000000 Z
12
- dependencies: []
13
- description: Find dead-links (broken links)
11
+ date: 2022-09-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: colorize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: concurrent-ruby-edge
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.6.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.6.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.13.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.13.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: open-uri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.2.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: sitemap-parser
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.5.0
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.5.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: thor
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.2.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 1.2.0
97
+ description: Dead link (broken link) means a link within a web page that cannot be
98
+ connected. These links can have a security negative impact with SEO. This tool makes
99
+ it easy to identify and modify.
14
100
  email: hahwul@gmail.com
15
101
  executables:
16
102
  - deadfinder