deadfinder 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3dd7587737b26129d64fcd4887eb39041b8abb27c4b8e1a6c125c7c128b77fb3
4
- data.tar.gz: cf6b6dd2b663df8ffe3aaae9810deb39be18a15957f0b37cb3f70f7584bfb39d
3
+ metadata.gz: 1e08daa4368e4c3e041e642f1708619637ea618c2937c6e36f2aba841f152ec4
4
+ data.tar.gz: 5162fb9eb973b7638ece6bcac4e6b2c731f3a6bd654f842f1aeeeed286cee4ed
5
5
  SHA512:
6
- metadata.gz: 79df6bc3e4f38da2ceda5f4704dbb69f5bb11af7b0e1637096e930c929ed065f2811ca879956ef9819f8a4afde342eb49757d5fb7bd05124d6fffd5aae0cd64e
7
- data.tar.gz: 263eeae2ebaa5e4eceee0182ca1721f8d51936b3921060180cfb7efa2422e5f8f3b78f6eebc83d01214ffec83b1f6bd14ae7f79ad3e9b34306b19bef3f6f0826
6
+ metadata.gz: aa82c3b87b81f8f5293819dbd0e242e57e7fda14fe8820197aa4e32d6c774ea0c379adda7ef49e88865fc00f32cb5cce36c6fae711ac019e2d5be921e1747d65
7
+ data.tar.gz: 952de90ed7b150e106fe33ec203140c162f2f74c941400328fc3da9c41866111f940043b53d9560903fdb822337ff8973bfc70740ad937cdc04428e5598e37fb
@@ -15,6 +15,10 @@ class Logger
15
15
  puts ' ● '.colorize(:blue) + text.to_s.colorize(:light_blue)
16
16
  end
17
17
 
18
+ def self.sub_done(text)
19
+ puts ' ✓ '.colorize(:blue) + text.to_s.colorize(:light_blue)
20
+ end
21
+
18
22
  def self.found(text)
19
23
  puts " ✘ #{text}".colorize(:red)
20
24
  end
@@ -1 +1,3 @@
1
- VERSION = '1.1.0'
1
+ # frozen_string_literal: true
2
+
3
+ VERSION = '1.1.2'
data/lib/deadfinder.rb CHANGED
@@ -9,22 +9,34 @@ require 'deadfinder/logger'
9
9
  require 'deadfinder/version'
10
10
  require 'concurrent-edge'
11
11
  require 'sitemap-parser'
12
+ require 'set'
12
13
 
13
14
  Channel = Concurrent::Channel
15
+ CacheSet = Set.new
16
+ CacheQue = {}
14
17
 
15
18
  class DeadFinderRunner
16
19
  def run(target, options)
17
20
  page = Nokogiri::HTML(URI.open(target))
18
- nodeset = page.css('a')
19
- link_a = nodeset.map { |element| element['href'] }.compact
21
+
22
+ nodeset_a = page.css('a')
23
+ link_a = nodeset_a.map { |element| element['href'] }.compact
24
+ nodeset_script = page.css('script')
25
+ link_script = nodeset_script.map { |element| element['src'] }.compact
26
+ nodeset_link = page.css('link')
27
+ link_link = nodeset_link.map { |element| element['href'] }.compact
28
+
29
+ link_merged = []
30
+ link_merged = link_merged.concat link_a, link_script, link_link
31
+
20
32
  Logger.target target
21
- Logger.sub_info "Found #{link_a.length} point"
33
+ Logger.sub_info "Found #{link_merged.length} point. [a:#{link_a.length}/s:#{link_script.length}/l:#{link_link.length}]"
22
34
  Logger.sub_info 'Checking'
23
- jobs = Channel.new(buffer: :buffered, capacity: 100)
24
- results = Channel.new(buffer: :buffered, capacity: 100)
35
+ jobs = Channel.new(buffer: :buffered, capacity: 1000)
36
+ results = Channel.new(buffer: :buffered, capacity: 1000)
25
37
 
26
38
  (1..options['concurrency']).each do |w|
27
- Channel.go { worker(w, jobs, results) }
39
+ Channel.go { worker(w, jobs, results, options) }
28
40
  end
29
41
 
30
42
  link_a.uniq.each do |node|
@@ -36,17 +48,27 @@ class DeadFinderRunner
36
48
  (1..link_a.uniq.length).each do
37
49
  ~results
38
50
  end
39
- Logger.sub_info 'Done'
51
+ Logger.sub_done 'Done'
40
52
  end
41
53
 
42
- def worker(_id, jobs, results)
54
+ def worker(_id, jobs, results, options)
43
55
  jobs.each do |j|
44
- begin
45
- URI.open(j)
46
- rescue StandardError => e
47
- Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
56
+ if !CacheSet.include? j
57
+ CacheSet.add j
58
+ begin
59
+ CacheQue[j] = true
60
+ URI.open(j, :read_timeout => options['timeout'])
61
+ rescue StandardError => e
62
+ Logger.found "[#{e}] #{j}" if e.to_s.include? '404 Not Found'
63
+ CacheQue[j] = false
64
+ end
65
+ results << j
66
+ else
67
+ if !CacheQue[j]
68
+ Logger.found "[404 Not Found] #{j}"
69
+ end
70
+ results << j
48
71
  end
49
- results << j
50
72
  end
51
73
  end
52
74
  end
@@ -59,7 +81,7 @@ def run_pipe(options)
59
81
  end
60
82
  end
61
83
 
62
- def run_file(filename,options)
84
+ def run_file(filename, options)
63
85
  app = DeadFinderRunner.new
64
86
  File.open(filename).each do |line|
65
87
  target = line.gsub("\n", '')
@@ -67,12 +89,12 @@ def run_file(filename,options)
67
89
  end
68
90
  end
69
91
 
70
- def run_url(url,options)
92
+ def run_url(url, options)
71
93
  app = DeadFinderRunner.new
72
94
  app.run url, options
73
95
  end
74
96
 
75
- def run_sitemap(sitemap_url,options)
97
+ def run_sitemap(sitemap_url, options)
76
98
  app = DeadFinderRunner.new
77
99
  sitemap = SitemapParser.new sitemap_url, { recurse: true }
78
100
  sitemap.to_a.each do |url|
@@ -82,6 +104,7 @@ end
82
104
 
83
105
  class DeadFinder < Thor
84
106
  class_option :concurrency, aliases: :c, default: 20, type: :numeric
107
+ class_option :timeout, aliases: :t, default: 10, type: :numeric
85
108
 
86
109
  desc 'pipe', 'Scan the URLs from STDIN. (e.g cat urls.txt | deadfinder pipe)'
87
110
  def pipe
metadata CHANGED
@@ -1,16 +1,102 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: deadfinder
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - hahwul
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-25 00:00:00.000000000 Z
12
- dependencies: []
13
- description: Find dead-links (broken links)
11
+ date: 2022-09-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: colorize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: concurrent-ruby-edge
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.6.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.6.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.13.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.13.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: open-uri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.2.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: sitemap-parser
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.5.0
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.5.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: thor
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.2.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 1.2.0
97
+ description: Dead link (broken link) means a link within a web page that cannot be
98
+ connected. These links can have a security negative impact with SEO. This tool makes
99
+ it easy to identify and modify.
14
100
  email: hahwul@gmail.com
15
101
  executables:
16
102
  - deadfinder