jekyll-link-checker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/exe/jekyll-link-checker +63 -0
- data/lib/jekyll-hook.rb +8 -0
- data/lib/jekyll-link-checker.rb +5 -0
- data/lib/link-checker.rb +249 -0
- data/lib/version.rb +5 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ab9d005b30ac7520a5f7912083966a50948c0d21d4e713bbb9639358dd033f77
|
4
|
+
data.tar.gz: ed02ed3ec9066ce54f4f74730c1de5d50206e83e39737eb8dead37ce4ae1f7ac
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 35940136ad2ddb9ccac822abf3f1fe3999c52b92052e732c9eb846be0eadc21f9b9b3f6de2fe61b4395c7ded8706c9e57cd22ad86bf1fb6c6bcaa4446785d44a
|
7
|
+
data.tar.gz: ef06e898f334b7d6a49f10af2ab9a084e77bf890fca8cd37903083483363c8e81a4b41c8af7da96fb7318eda1701747cfd951295018cb427944c4bee8f5cb977
|
data/Gemfile
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'optionparser'
|
2
|
+
require 'yaml'
|
3
|
+
require 'jekyll-link-checker'
|
4
|
+
|
5
|
+
# Find the config file
|
6
|
+
config = nil
|
7
|
+
['.yml', '.yaml'].each do |ext|
|
8
|
+
file = "_config#{ext}"
|
9
|
+
begin
|
10
|
+
config = YAML.load_file(file)
|
11
|
+
break
|
12
|
+
rescue
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
link_checker = if config
|
17
|
+
LinkChecker.from_config(config)
|
18
|
+
else
|
19
|
+
puts "`_config.yml` or `_config.yaml` couldn't be read"
|
20
|
+
puts "The default configuration will be used"
|
21
|
+
LinkChecker.new
|
22
|
+
end
|
23
|
+
|
24
|
+
parser = OptionParser.new do |opts|
|
25
|
+
opts.banner = "Usage: jekyll-link-checker [options]"
|
26
|
+
|
27
|
+
opts.on("-n HOSTNAME", "--hostname HOSTNAME", "Hostname of the site. Ex.: example.com") do |hostname|
|
28
|
+
link_checker.hostname = hostname
|
29
|
+
end
|
30
|
+
opts.on("-b BASEURL", "--baseurl BASEURL", "(this option is currently ignored)") do |baseurl|
|
31
|
+
link_checker.baseurl = baseurl
|
32
|
+
end
|
33
|
+
opts.on("-d SITE_FOLDER", "--site-folder SITE_FOLDER", "Path to the site's folder") do |site_folder|
|
34
|
+
link_checker.site_folder = site_folder
|
35
|
+
end
|
36
|
+
opts.on("-s [ARRAY]", "--skip-list [ARRAY]", Array, "Comma separated list of links not to check") do |skip_list|
|
37
|
+
link_checker.update_skip_list(skip_list)
|
38
|
+
end
|
39
|
+
opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
|
40
|
+
link_checker.update_skip_list(skip_list)
|
41
|
+
end
|
42
|
+
opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status\n" \
|
43
|
+
"head-only: Only tries to do a HEAD request\n" \
|
44
|
+
"get-only: Only tries to do a GET request") do |mode|
|
45
|
+
link_checker.mode = mode
|
46
|
+
end
|
47
|
+
opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
|
48
|
+
link_checker.fail_fast = true
|
49
|
+
end
|
50
|
+
opts.on("-a", "--[no]-abort", "Abort the program on the first invalid link") do |abort_on_failure|
|
51
|
+
link_checker.abort_on_failure = abort_on_failure
|
52
|
+
end
|
53
|
+
opts.on("-V", "--verbose", "Run with verbose output") do |_|
|
54
|
+
link_checker.verbose = true
|
55
|
+
end
|
56
|
+
opts.on_tail("-v", "--version", "Outputs the version and exit") do |_|
|
57
|
+
puts JekyllLinkChecker::VERSION
|
58
|
+
exit
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
parser.parse!
|
63
|
+
link_checker.check_links
|
data/lib/jekyll-hook.rb
ADDED
data/lib/link-checker.rb
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "faraday"
|
4
|
+
require "faraday_middleware"
|
5
|
+
require "faraday-cookie_jar"
|
6
|
+
require "addressable"
|
7
|
+
require "pathname"
|
8
|
+
|
9
|
+
# Checks all the links of a
|
10
|
+
class LinkChecker
|
11
|
+
HEADERS = {
|
12
|
+
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
|
13
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) " \
|
14
|
+
"Chrome/41.0.2228.0 Safari/537.36",
|
15
|
+
"Accept" => "text/html," \
|
16
|
+
"application/xhtml+xml," \
|
17
|
+
"application/xml;" \
|
18
|
+
"q=0.9,*/*;q=0.8",
|
19
|
+
"Accept-Language" => "en-US,en;q=0.5",
|
20
|
+
"DNT" => "1",
|
21
|
+
"Upgrade-Insecure-Requests" => "1",
|
22
|
+
"Pragma" => "no-cache",
|
23
|
+
"Cache-Control" => "no-cache"
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
HREF = /href="([^#"\n][^"\n]*)"/.freeze
|
27
|
+
HTML = %w[.html .htm].freeze
|
28
|
+
SCHEMES = %w[https http].freeze
|
29
|
+
|
30
|
+
DEFAULT_BASE_URL = "/"
|
31
|
+
DEFAULT_SITE_FOLDER = "_site"
|
32
|
+
DEFAULT_MODE = "try_head"
|
33
|
+
|
34
|
+
attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
|
35
|
+
:fail_fast, :abort_on_failure
|
36
|
+
attr_writer :files, :html_files, :links
|
37
|
+
|
38
|
+
# Set default values for all the properties
|
39
|
+
def initialize(opts = {})
|
40
|
+
@hostname = opts[:hostname]
|
41
|
+
@baseurl = opts[:baseurl] || DEFAULT_BASE_URL
|
42
|
+
@site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
|
43
|
+
|
44
|
+
update_skip_list(opts[:skip_list] || [])
|
45
|
+
|
46
|
+
@mode = opts[:mode] || DEFAULT_MODE
|
47
|
+
@verbose = opts[:verbose]
|
48
|
+
@fail_fast = opts[:fail_fast]
|
49
|
+
|
50
|
+
abort_on_failure = opts[:abort_on_failure]
|
51
|
+
@abort_on_failure = abort_on_failure.nil? ? true : abort_on_failure
|
52
|
+
end
|
53
|
+
|
54
|
+
# Initialize the link checker from a Jekyll configuration file
|
55
|
+
def self.from_config(config)
|
56
|
+
opts = {}
|
57
|
+
|
58
|
+
url = config["url"]
|
59
|
+
if url
|
60
|
+
uri = Addressable::URI.parse(url)
|
61
|
+
opts[:hostname] = uri.hostname
|
62
|
+
end
|
63
|
+
|
64
|
+
opts[:baseurl] = config["baseurl"]
|
65
|
+
opts[:site_folder] = config["destination"]
|
66
|
+
|
67
|
+
link_checker_config = config["link-checker"]
|
68
|
+
if link_checker_config
|
69
|
+
opts[:skip_list] = link_checker_config["skip-list"]
|
70
|
+
opts[:mode] = link_checker_config["mode"]
|
71
|
+
opts[:verbose] = link_checker_config["verbose"]
|
72
|
+
opts[:fail_fast] = link_checker_config["fail-fast"]
|
73
|
+
opts[:abort_on_failure] = link_checker_config["abort"]
|
74
|
+
end
|
75
|
+
|
76
|
+
LinkChecker.new(opts)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Whether the options are valid
|
80
|
+
def valid?
|
81
|
+
return false unless @hostname
|
82
|
+
|
83
|
+
true
|
84
|
+
end
|
85
|
+
|
86
|
+
# Updates skip_list with the given argument.
|
87
|
+
# If the argument is an array, skip_list is set to the array.
|
88
|
+
# If the argument is a string, the argument will be interpreted as a filename
|
89
|
+
# where each line is a filename.
|
90
|
+
def update_skip_list(skip_list)
|
91
|
+
if skip_list.is_a?(Array)
|
92
|
+
@skip_list = skip_list
|
93
|
+
elsif skip_list.is_a?(String)
|
94
|
+
begin
|
95
|
+
@skip_list = File.readlines(File.expand_path(skip_list)).map(&:strip)
|
96
|
+
rescue StandardError => e
|
97
|
+
warn "Couldn't read the skip list"
|
98
|
+
raise e
|
99
|
+
end
|
100
|
+
@skip_list.reject!(&:empty?)
|
101
|
+
else
|
102
|
+
raise ArgumentError, "skip_list must be a String or an array of String"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Checks all the links
|
107
|
+
def check_links
|
108
|
+
# Make sure the configuration is valid
|
109
|
+
abort "Invalid configuration" unless valid?
|
110
|
+
|
111
|
+
# basepath = @baseurl ? Addressable::URI.parse(@baseurl).path : "/"
|
112
|
+
|
113
|
+
conn = create_connection
|
114
|
+
|
115
|
+
# Test each link
|
116
|
+
error_count = 0
|
117
|
+
i = 0
|
118
|
+
prev_msg_size = 0
|
119
|
+
links.each do |link, files|
|
120
|
+
i += 1
|
121
|
+
if verbose
|
122
|
+
prev_msg_size.times { print " " }
|
123
|
+
msg = "#{link} #{i}/#{links.size}"
|
124
|
+
print "\r#{msg}\r"
|
125
|
+
prev_msg_size = msg.size
|
126
|
+
end
|
127
|
+
|
128
|
+
# Skip the link if it's in the skip list
|
129
|
+
next if @skip_list.include?(link)
|
130
|
+
|
131
|
+
# Parse the uri
|
132
|
+
uri = Addressable::URI.parse(link)
|
133
|
+
next if uri.site&.end_with?(":")
|
134
|
+
|
135
|
+
error = false
|
136
|
+
|
137
|
+
# If the link is internal
|
138
|
+
if uri.hostname.nil? || uri.hostname == hostname
|
139
|
+
uri.path.chomp!("/")
|
140
|
+
|
141
|
+
# If the uri can't be found in the site's file
|
142
|
+
unless valid_links.include?(uri.path)
|
143
|
+
puts "Invalid internal link '#{link}' is present in:"
|
144
|
+
files.each { |file| puts "\t#{file}" }
|
145
|
+
error = true
|
146
|
+
end
|
147
|
+
elsif uri.scheme.nil? || SCHEMES.include?(uri.scheme)
|
148
|
+
status = make_request(conn, link)
|
149
|
+
error = !status_allowed?(status)
|
150
|
+
puts "Request to #{link} in #{files} returned #{status}" if error
|
151
|
+
end
|
152
|
+
|
153
|
+
next unless error
|
154
|
+
|
155
|
+
error_count += 1
|
156
|
+
if fail_fast
|
157
|
+
abort if abort_on_failure
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
puts if verbose
|
163
|
+
|
164
|
+
if error_count != 0
|
165
|
+
msg = "There were #{error_count} invalid links"
|
166
|
+
if @abort_on_failure
|
167
|
+
then abort msg
|
168
|
+
else puts msg
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
error_count
|
173
|
+
end
|
174
|
+
|
175
|
+
# Find all files in the site folder
|
176
|
+
def files
|
177
|
+
return @files unless @files.nil?
|
178
|
+
|
179
|
+
@files = Dir[File.join(@site_folder, "**/*")].select { |f| File.file?(f) }
|
180
|
+
@files
|
181
|
+
end
|
182
|
+
|
183
|
+
# Find all the valid links for the site
|
184
|
+
def valid_links
|
185
|
+
return @valid_links if @valid_links
|
186
|
+
|
187
|
+
@valid_links = files.map do |file|
|
188
|
+
path = Pathname.new(file)
|
189
|
+
path = path.relative_path_from(@site_folder)
|
190
|
+
path = "/" + path.to_s
|
191
|
+
path.chomp!("index.html")
|
192
|
+
path.chomp!("/")
|
193
|
+
path
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Find all HTML files
|
198
|
+
def html_files
|
199
|
+
return @html_files if @html_files
|
200
|
+
|
201
|
+
@html_files = files.filter { |file| HTML.include?(File.extname(file)) }
|
202
|
+
end
|
203
|
+
|
204
|
+
# Find all links in html_files
|
205
|
+
def links
|
206
|
+
return @links if @links
|
207
|
+
|
208
|
+
@links = {}
|
209
|
+
html_files.each do |file|
|
210
|
+
File.open(file).read.scan(HREF)
|
211
|
+
.map { |match| match[0].strip }
|
212
|
+
.uniq.each do |link|
|
213
|
+
link_files = @links[link]
|
214
|
+
if link_files
|
215
|
+
then link_files.push(file)
|
216
|
+
else @links[link] = [file]
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
@links
|
221
|
+
end
|
222
|
+
|
223
|
+
private
|
224
|
+
|
225
|
+
# Create a connection to make requests
|
226
|
+
def create_connection
|
227
|
+
Faraday.new do |faraday|
|
228
|
+
faraday.use FaradayMiddleware::FollowRedirects
|
229
|
+
faraday.use :cookie_jar
|
230
|
+
faraday.adapter Faraday.default_adapter
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# Make a request on the connection for the URL
|
235
|
+
def make_request(conn, url)
|
236
|
+
if @mode != "get_only"
|
237
|
+
response = conn.head(url, {}, HEADERS)
|
238
|
+
return response.status if mode == "head_only" ||
|
239
|
+
status_allowed?(response.status)
|
240
|
+
end
|
241
|
+
|
242
|
+
conn.get(url, {}, HEADERS).status
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns whether the status is successfull
|
246
|
+
def status_allowed?(status)
|
247
|
+
status >= 200 && status < 300
|
248
|
+
end
|
249
|
+
end
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jekyll-link-checker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Zakary Kamal Ismail
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-09-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: faraday
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.15'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.15'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faraday-cookie_jar
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.0.6
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.0.6
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday_middleware
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.13'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.13'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: jekyll
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '5.0'
|
65
|
+
type: :runtime
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '3.0'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '5.0'
|
75
|
+
description: Verifies that all the links in a Jekyll website are valid.It can also
|
76
|
+
work with any static site generator.
|
77
|
+
email: zakary.kamal.fs@outlook.com
|
78
|
+
executables:
|
79
|
+
- jekyll-link-checker
|
80
|
+
extensions: []
|
81
|
+
extra_rdoc_files: []
|
82
|
+
files:
|
83
|
+
- Gemfile
|
84
|
+
- exe/jekyll-link-checker
|
85
|
+
- lib/jekyll-hook.rb
|
86
|
+
- lib/jekyll-link-checker.rb
|
87
|
+
- lib/link-checker.rb
|
88
|
+
- lib/version.rb
|
89
|
+
homepage: https://gitlab.com/ZakCodes/jekyll-link-checker
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata:
|
93
|
+
bug_tracker_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/issues
|
94
|
+
changelog_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/-/releases
|
95
|
+
homepage_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
|
96
|
+
source_code_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
require_paths:
|
100
|
+
- lib
|
101
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
requirements: []
|
112
|
+
rubygems_version: 3.0.6
|
113
|
+
signing_key:
|
114
|
+
specification_version: 4
|
115
|
+
summary: Checks all the links of a Jekyll website.
|
116
|
+
test_files: []
|