jekyll-link-checker 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/exe/jekyll-link-checker +63 -0
- data/lib/jekyll-hook.rb +8 -0
- data/lib/jekyll-link-checker.rb +5 -0
- data/lib/link-checker.rb +249 -0
- data/lib/version.rb +5 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ab9d005b30ac7520a5f7912083966a50948c0d21d4e713bbb9639358dd033f77
|
4
|
+
data.tar.gz: ed02ed3ec9066ce54f4f74730c1de5d50206e83e39737eb8dead37ce4ae1f7ac
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 35940136ad2ddb9ccac822abf3f1fe3999c52b92052e732c9eb846be0eadc21f9b9b3f6de2fe61b4395c7ded8706c9e57cd22ad86bf1fb6c6bcaa4446785d44a
|
7
|
+
data.tar.gz: ef06e898f334b7d6a49f10af2ab9a084e77bf890fca8cd37903083483363c8e81a4b41c8af7da96fb7318eda1701747cfd951295018cb427944c4bee8f5cb977
|
data/Gemfile
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'optionparser'
|
2
|
+
require 'yaml'
|
3
|
+
require 'jekyll-link-checker'
|
4
|
+
|
5
|
+
# Find the config file
|
6
|
+
config = nil
|
7
|
+
['.yml', '.yaml'].each do |ext|
|
8
|
+
file = "_config#{ext}"
|
9
|
+
begin
|
10
|
+
config = YAML.load_file(file)
|
11
|
+
break
|
12
|
+
rescue
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
link_checker = if config
|
17
|
+
LinkChecker.from_config(config)
|
18
|
+
else
|
19
|
+
puts "`_config.yml` or `_config.yaml` couldn't be read"
|
20
|
+
puts "The default configuration will be used"
|
21
|
+
LinkChecker.new
|
22
|
+
end
|
23
|
+
|
24
|
+
parser = OptionParser.new do |opts|
|
25
|
+
opts.banner = "Usage: jekyll-link-checker [options]"
|
26
|
+
|
27
|
+
opts.on("-n HOSTNAME", "--hostname HOSTNAME", "Hostname of the site. Ex.: example.com") do |hostname|
|
28
|
+
link_checker.hostname = hostname
|
29
|
+
end
|
30
|
+
opts.on("-b BASEURL", "--baseurl BASEURL", "(this option is currently ignored)") do |baseurl|
|
31
|
+
link_checker.baseurl = baseurl
|
32
|
+
end
|
33
|
+
opts.on("-d SITE_FOLDER", "--site-folder SITE_FOLDER", "Path to the site's folder") do |site_folder|
|
34
|
+
link_checker.site_folder = site_folder
|
35
|
+
end
|
36
|
+
opts.on("-s [ARRAY]", "--skip-list [ARRAY]", Array, "Comma separated list of links not to check") do |skip_list|
|
37
|
+
link_checker.update_skip_list(skip_list)
|
38
|
+
end
|
39
|
+
opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
|
40
|
+
link_checker.update_skip_list(skip_list)
|
41
|
+
end
|
42
|
+
opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status\n" \
|
43
|
+
"head-only: Only tries to do a HEAD request\n" \
|
44
|
+
"get-only: Only tries to do a GET request") do |mode|
|
45
|
+
link_checker.mode = mode
|
46
|
+
end
|
47
|
+
opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
|
48
|
+
link_checker.fail_fast = true
|
49
|
+
end
|
50
|
+
opts.on("-a", "--[no]-abort", "Abort the program on the first invalid link") do |abort_on_failure|
|
51
|
+
link_checker.abort_on_failure = abort_on_failure
|
52
|
+
end
|
53
|
+
opts.on("-V", "--verbose", "Run with verbose output") do |_|
|
54
|
+
link_checker.verbose = true
|
55
|
+
end
|
56
|
+
opts.on_tail("-v", "--version", "Outputs the version and exit") do |_|
|
57
|
+
puts JekyllLinkChecker::VERSION
|
58
|
+
exit
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
parser.parse!
|
63
|
+
link_checker.check_links
|
data/lib/jekyll-hook.rb
ADDED
data/lib/link-checker.rb
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "faraday"
|
4
|
+
require "faraday_middleware"
|
5
|
+
require "faraday-cookie_jar"
|
6
|
+
require "addressable"
|
7
|
+
require "pathname"
|
8
|
+
|
9
|
+
# Checks all the links of a
|
10
|
+
class LinkChecker
|
11
|
+
HEADERS = {
|
12
|
+
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
|
13
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) " \
|
14
|
+
"Chrome/41.0.2228.0 Safari/537.36",
|
15
|
+
"Accept" => "text/html," \
|
16
|
+
"application/xhtml+xml," \
|
17
|
+
"application/xml;" \
|
18
|
+
"q=0.9,*/*;q=0.8",
|
19
|
+
"Accept-Language" => "en-US,en;q=0.5",
|
20
|
+
"DNT" => "1",
|
21
|
+
"Upgrade-Insecure-Requests" => "1",
|
22
|
+
"Pragma" => "no-cache",
|
23
|
+
"Cache-Control" => "no-cache"
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
HREF = /href="([^#"\n][^"\n]*)"/.freeze
|
27
|
+
HTML = %w[.html .htm].freeze
|
28
|
+
SCHEMES = %w[https http].freeze
|
29
|
+
|
30
|
+
DEFAULT_BASE_URL = "/"
|
31
|
+
DEFAULT_SITE_FOLDER = "_site"
|
32
|
+
DEFAULT_MODE = "try_head"
|
33
|
+
|
34
|
+
attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
|
35
|
+
:fail_fast, :abort_on_failure
|
36
|
+
attr_writer :files, :html_files, :links
|
37
|
+
|
38
|
+
# Set default values for all the properties
|
39
|
+
def initialize(opts = {})
|
40
|
+
@hostname = opts[:hostname]
|
41
|
+
@baseurl = opts[:baseurl] || DEFAULT_BASE_URL
|
42
|
+
@site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
|
43
|
+
|
44
|
+
update_skip_list(opts[:skip_list] || [])
|
45
|
+
|
46
|
+
@mode = opts[:mode] || DEFAULT_MODE
|
47
|
+
@verbose = opts[:verbose]
|
48
|
+
@fail_fast = opts[:fail_fast]
|
49
|
+
|
50
|
+
abort_on_failure = opts[:abort_on_failure]
|
51
|
+
@abort_on_failure = abort_on_failure.nil? ? true : abort_on_failure
|
52
|
+
end
|
53
|
+
|
54
|
+
# Initialize the link checker from a Jekyll configuration file
|
55
|
+
def self.from_config(config)
|
56
|
+
opts = {}
|
57
|
+
|
58
|
+
url = config["url"]
|
59
|
+
if url
|
60
|
+
uri = Addressable::URI.parse(url)
|
61
|
+
opts[:hostname] = uri.hostname
|
62
|
+
end
|
63
|
+
|
64
|
+
opts[:baseurl] = config["baseurl"]
|
65
|
+
opts[:site_folder] = config["destination"]
|
66
|
+
|
67
|
+
link_checker_config = config["link-checker"]
|
68
|
+
if link_checker_config
|
69
|
+
opts[:skip_list] = link_checker_config["skip-list"]
|
70
|
+
opts[:mode] = link_checker_config["mode"]
|
71
|
+
opts[:verbose] = link_checker_config["verbose"]
|
72
|
+
opts[:fail_fast] = link_checker_config["fail-fast"]
|
73
|
+
opts[:abort_on_failure] = link_checker_config["abort"]
|
74
|
+
end
|
75
|
+
|
76
|
+
LinkChecker.new(opts)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Whether the options are valid
|
80
|
+
def valid?
|
81
|
+
return false unless @hostname
|
82
|
+
|
83
|
+
true
|
84
|
+
end
|
85
|
+
|
86
|
+
# Updates skip_list with the given argument.
|
87
|
+
# If the argument is an array, skip_list is set to the array.
|
88
|
+
# If the argument is a string, the argument will be interpreted as a filename
|
89
|
+
# where each line is a filename.
|
90
|
+
def update_skip_list(skip_list)
|
91
|
+
if skip_list.is_a?(Array)
|
92
|
+
@skip_list = skip_list
|
93
|
+
elsif skip_list.is_a?(String)
|
94
|
+
begin
|
95
|
+
@skip_list = File.readlines(File.expand_path(skip_list)).map(&:strip)
|
96
|
+
rescue StandardError => e
|
97
|
+
warn "Couldn't read the skip list"
|
98
|
+
raise e
|
99
|
+
end
|
100
|
+
@skip_list.reject!(&:empty?)
|
101
|
+
else
|
102
|
+
raise ArgumentError, "skip_list must be a String or an array of String"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Checks all the links
|
107
|
+
def check_links
|
108
|
+
# Make sure the configuration is valid
|
109
|
+
abort "Invalid configuration" unless valid?
|
110
|
+
|
111
|
+
# basepath = @baseurl ? Addressable::URI.parse(@baseurl).path : "/"
|
112
|
+
|
113
|
+
conn = create_connection
|
114
|
+
|
115
|
+
# Test each link
|
116
|
+
error_count = 0
|
117
|
+
i = 0
|
118
|
+
prev_msg_size = 0
|
119
|
+
links.each do |link, files|
|
120
|
+
i += 1
|
121
|
+
if verbose
|
122
|
+
prev_msg_size.times { print " " }
|
123
|
+
msg = "#{link} #{i}/#{links.size}"
|
124
|
+
print "\r#{msg}\r"
|
125
|
+
prev_msg_size = msg.size
|
126
|
+
end
|
127
|
+
|
128
|
+
# Skip the link if it's in the skip list
|
129
|
+
next if @skip_list.include?(link)
|
130
|
+
|
131
|
+
# Parse the uri
|
132
|
+
uri = Addressable::URI.parse(link)
|
133
|
+
next if uri.site&.end_with?(":")
|
134
|
+
|
135
|
+
error = false
|
136
|
+
|
137
|
+
# If the link is internal
|
138
|
+
if uri.hostname.nil? || uri.hostname == hostname
|
139
|
+
uri.path.chomp!("/")
|
140
|
+
|
141
|
+
# If the uri can't be found in the site's file
|
142
|
+
unless valid_links.include?(uri.path)
|
143
|
+
puts "Invalid internal link '#{link}' is present in:"
|
144
|
+
files.each { |file| puts "\t#{file}" }
|
145
|
+
error = true
|
146
|
+
end
|
147
|
+
elsif uri.scheme.nil? || SCHEMES.include?(uri.scheme)
|
148
|
+
status = make_request(conn, link)
|
149
|
+
error = !status_allowed?(status)
|
150
|
+
puts "Request to #{link} in #{files} returned #{status}" if error
|
151
|
+
end
|
152
|
+
|
153
|
+
next unless error
|
154
|
+
|
155
|
+
error_count += 1
|
156
|
+
if fail_fast
|
157
|
+
abort if abort_on_failure
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
puts if verbose
|
163
|
+
|
164
|
+
if error_count != 0
|
165
|
+
msg = "There were #{error_count} invalid links"
|
166
|
+
if @abort_on_failure
|
167
|
+
then abort msg
|
168
|
+
else puts msg
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
error_count
|
173
|
+
end
|
174
|
+
|
175
|
+
# Find all files in the site folder
|
176
|
+
def files
|
177
|
+
return @files unless @files.nil?
|
178
|
+
|
179
|
+
@files = Dir[File.join(@site_folder, "**/*")].select { |f| File.file?(f) }
|
180
|
+
@files
|
181
|
+
end
|
182
|
+
|
183
|
+
# Find all the valid links for the site
|
184
|
+
def valid_links
|
185
|
+
return @valid_links if @valid_links
|
186
|
+
|
187
|
+
@valid_links = files.map do |file|
|
188
|
+
path = Pathname.new(file)
|
189
|
+
path = path.relative_path_from(@site_folder)
|
190
|
+
path = "/" + path.to_s
|
191
|
+
path.chomp!("index.html")
|
192
|
+
path.chomp!("/")
|
193
|
+
path
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Find all HTML files
|
198
|
+
def html_files
|
199
|
+
return @html_files if @html_files
|
200
|
+
|
201
|
+
@html_files = files.filter { |file| HTML.include?(File.extname(file)) }
|
202
|
+
end
|
203
|
+
|
204
|
+
# Find all links in html_files
|
205
|
+
def links
|
206
|
+
return @links if @links
|
207
|
+
|
208
|
+
@links = {}
|
209
|
+
html_files.each do |file|
|
210
|
+
File.open(file).read.scan(HREF)
|
211
|
+
.map { |match| match[0].strip }
|
212
|
+
.uniq.each do |link|
|
213
|
+
link_files = @links[link]
|
214
|
+
if link_files
|
215
|
+
then link_files.push(file)
|
216
|
+
else @links[link] = [file]
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
@links
|
221
|
+
end
|
222
|
+
|
223
|
+
private
|
224
|
+
|
225
|
+
# Create a connection to make requests
|
226
|
+
def create_connection
|
227
|
+
Faraday.new do |faraday|
|
228
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
229
|
+
faraday.use :cookie_jar
|
230
|
+
faraday.adapter Faraday.default_adapter
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# Make a request on the connection for the URL
|
235
|
+
def make_request(conn, url)
|
236
|
+
if @mode != "get_only"
|
237
|
+
response = conn.head(url, {}, HEADERS)
|
238
|
+
return response.status if mode == "head_only" ||
|
239
|
+
status_allowed?(response.status)
|
240
|
+
end
|
241
|
+
|
242
|
+
conn.get(url, {}, HEADERS).status
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns whether the status is successfull
|
246
|
+
def status_allowed?(status)
|
247
|
+
status >= 200 && status < 300
|
248
|
+
end
|
249
|
+
end
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jekyll-link-checker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Zakary Kamal Ismail
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-09-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.15'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.15'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faraday-cookie_jar
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.0.6
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.0.6
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday_middleware
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.13'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.13'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: jekyll
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '5.0'
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '3.0'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '5.0'
|
75
|
+
description: Verifies that all the links in a Jekyll website are valid.It can also
|
76
|
+
work with any static site generator.
|
77
|
+
email: zakary.kamal.fs@outlook.com
|
78
|
+
executables:
|
79
|
+
- jekyll-link-checker
|
80
|
+
extensions: []
|
81
|
+
extra_rdoc_files: []
|
82
|
+
files:
|
83
|
+
- Gemfile
|
84
|
+
- exe/jekyll-link-checker
|
85
|
+
- lib/jekyll-hook.rb
|
86
|
+
- lib/jekyll-link-checker.rb
|
87
|
+
- lib/link-checker.rb
|
88
|
+
- lib/version.rb
|
89
|
+
homepage: https://gitlab.com/ZakCodes/jekyll-link-checker
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata:
|
93
|
+
bug_tracker_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/issues
|
94
|
+
changelog_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/-/releases
|
95
|
+
homepage_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
|
96
|
+
source_code_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
require_paths:
|
100
|
+
- lib
|
101
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
requirements: []
|
112
|
+
rubygems_version: 3.0.6
|
113
|
+
signing_key:
|
114
|
+
specification_version: 4
|
115
|
+
summary: Checks all the links of a Jekyll website.
|
116
|
+
test_files: []
|