jekyll-link-checker 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/jekyll-link-checker +7 -3
- data/lib/link-checker.rb +143 -36
- data/lib/version.rb +1 -1
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95ce2357aa0eaea04dbc5695f935918cfb98b8a76786dfb2c9317579e8801a4f
|
4
|
+
data.tar.gz: 2e1cbde04eaa0f36daf75027f14a45364a57b203409d30abfd353d82a1242235
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77c8b80e72b2ff133b2188eed42c60fb07e174997097d9b7f4c558833bbe3db75b2c91c308a566f3f6fe757056c0a6c1913942062d886180ea552d15ea48348
|
7
|
+
data.tar.gz: 7b2714ae7ebe65845a225d882091d3c34a0aa62a4e5104cc577296a759d37a0fde82369e85c0d420d6383e7690be774f3185490a4d835ad44b06a99dce03fbc0
|
data/exe/jekyll-link-checker
CHANGED
@@ -39,15 +39,19 @@ parser = OptionParser.new do |opts|
|
|
39
39
|
opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
|
40
40
|
link_checker.update_skip_list(skip_list)
|
41
41
|
end
|
42
|
-
opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status
|
43
|
-
"head-only: Only tries to do a HEAD request
|
42
|
+
opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status",
|
43
|
+
"head-only: Only tries to do a HEAD request",
|
44
44
|
"get-only: Only tries to do a GET request") do |mode|
|
45
45
|
link_checker.mode = mode
|
46
46
|
end
|
47
|
+
opts.on("-i", "--[no-]ignore-fragments", "Whether to ignore the fragments " \
|
48
|
+
"in links. Defaults to false.") do |ignore_fragments|
|
49
|
+
link_checker.ignore_fragments = ignore_fragments
|
50
|
+
end
|
47
51
|
opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
|
48
52
|
link_checker.fail_fast = true
|
49
53
|
end
|
50
|
-
opts.on("-a", "--[no]
|
54
|
+
opts.on("-a", "--[no-]abort", "Abort the program on the first invalid link") do |abort_on_failure|
|
51
55
|
link_checker.abort_on_failure = abort_on_failure
|
52
56
|
end
|
53
57
|
opts.on("-V", "--verbose", "Run with verbose output") do |_|
|
data/lib/link-checker.rb
CHANGED
@@ -6,7 +6,7 @@ require "faraday-cookie_jar"
|
|
6
6
|
require "addressable"
|
7
7
|
require "pathname"
|
8
8
|
|
9
|
-
# Checks all the links of a
|
9
|
+
# Checks all the links of a static website to make sure they're all valid
|
10
10
|
class LinkChecker
|
11
11
|
HEADERS = {
|
12
12
|
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
|
@@ -23,7 +23,8 @@ class LinkChecker
|
|
23
23
|
"Cache-Control" => "no-cache"
|
24
24
|
}.freeze
|
25
25
|
|
26
|
-
HREF = /href="([
|
26
|
+
HREF = /href="([^"\n]+)"/.freeze
|
27
|
+
ID = /id="([^"\n]+)"/.freeze
|
27
28
|
HTML = %w[.html .htm].freeze
|
28
29
|
SCHEMES = %w[https http].freeze
|
29
30
|
|
@@ -32,7 +33,7 @@ class LinkChecker
|
|
32
33
|
DEFAULT_MODE = "try_head"
|
33
34
|
|
34
35
|
attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
|
35
|
-
:fail_fast, :abort_on_failure
|
36
|
+
:ignore_fragments, :fail_fast, :abort_on_failure
|
36
37
|
attr_writer :files, :html_files, :links
|
37
38
|
|
38
39
|
# Set default values for all the properties
|
@@ -42,6 +43,7 @@ class LinkChecker
|
|
42
43
|
@site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
|
43
44
|
|
44
45
|
update_skip_list(opts[:skip_list] || [])
|
46
|
+
@ignore_fragments = opts[:ignore_fragments]
|
45
47
|
|
46
48
|
@mode = opts[:mode] || DEFAULT_MODE
|
47
49
|
@verbose = opts[:verbose]
|
@@ -67,6 +69,7 @@ class LinkChecker
|
|
67
69
|
link_checker_config = config["link-checker"]
|
68
70
|
if link_checker_config
|
69
71
|
opts[:skip_list] = link_checker_config["skip-list"]
|
72
|
+
opts[:ignore_fragments] = link_checker_config["ignore-fragments"]
|
70
73
|
opts[:mode] = link_checker_config["mode"]
|
71
74
|
opts[:verbose] = link_checker_config["verbose"]
|
72
75
|
opts[:fail_fast] = link_checker_config["fail-fast"]
|
@@ -116,21 +119,17 @@ class LinkChecker
|
|
116
119
|
error_count = 0
|
117
120
|
i = 0
|
118
121
|
prev_msg_size = 0
|
119
|
-
links.each do |
|
122
|
+
links.each do |uri, fragments|
|
120
123
|
i += 1
|
121
124
|
if verbose
|
122
125
|
prev_msg_size.times { print " " }
|
123
|
-
msg = "#{
|
126
|
+
msg = "#{uri} #{i}/#{links.size}"
|
124
127
|
print "\r#{msg}\r"
|
125
128
|
prev_msg_size = msg.size
|
126
129
|
end
|
127
130
|
|
128
131
|
# Skip the link if it's in the skip list
|
129
|
-
next if @skip_list.include?(
|
130
|
-
|
131
|
-
# Parse the uri
|
132
|
-
uri = Addressable::URI.parse(link)
|
133
|
-
next if uri.site&.end_with?(":")
|
132
|
+
next if @skip_list.include?(uri.to_s)
|
134
133
|
|
135
134
|
error = false
|
136
135
|
|
@@ -138,16 +137,52 @@ class LinkChecker
|
|
138
137
|
if uri.hostname.nil? || uri.hostname == hostname
|
139
138
|
uri.path.chomp!("/")
|
140
139
|
|
141
|
-
# If the uri
|
142
|
-
|
143
|
-
|
144
|
-
|
140
|
+
# If the uri's path is valid
|
141
|
+
valid_fragments = valid_links[uri.path]
|
142
|
+
if valid_fragments
|
143
|
+
fragments.each do |fragment, files|
|
144
|
+
# Skip the base fragment
|
145
|
+
next unless fragment
|
146
|
+
|
147
|
+
next if valid_fragments.include?(fragment)
|
148
|
+
|
149
|
+
error = true
|
150
|
+
puts "Invalid fragment '#{fragment}' in link '#{uri}' " \
|
151
|
+
"is present in:"
|
152
|
+
files.each { |file| puts "\t#{file}" }
|
153
|
+
end
|
154
|
+
else
|
145
155
|
error = true
|
156
|
+
puts "Invalid internal link '#{link}' is present in:"
|
157
|
+
fragments.flat_map { |_, files| files }.uniq
|
158
|
+
.each { |file| puts "\t#{file}" }
|
146
159
|
end
|
147
|
-
elsif
|
148
|
-
status = make_request(conn,
|
160
|
+
elsif fragments.keys == [nil]
|
161
|
+
status = make_request(conn, uri)
|
149
162
|
error = !status_allowed?(status)
|
150
|
-
|
163
|
+
if error
|
164
|
+
puts "Request to #{link} returned #{status} present in"
|
165
|
+
fragments[nil].each { |file| puts "\t#{file}" }
|
166
|
+
end
|
167
|
+
else
|
168
|
+
response = get_request(conn, uri)
|
169
|
+
status = response.status
|
170
|
+
if status == 200
|
171
|
+
valid_fragments = uniq_string_matches(response.body, ID)
|
172
|
+
fragments.each do |fragment, files|
|
173
|
+
unless valid_fragments.include?(fragment)
|
174
|
+
puts "Invalid link to fragment '#{fragment}' present in: "
|
175
|
+
files.each { |file| puts "\t#{file}" }
|
176
|
+
end
|
177
|
+
end
|
178
|
+
else
|
179
|
+
error = true
|
180
|
+
puts "Request to #{link} in #{files} returned #{status}"
|
181
|
+
error = true
|
182
|
+
puts "Invalid internal link '#{link}' is present in:"
|
183
|
+
fragments.flat_map { |_, files| files }.uniq
|
184
|
+
.each { |file| puts "\t#{file}" }
|
185
|
+
end
|
151
186
|
end
|
152
187
|
|
153
188
|
next unless error
|
@@ -181,40 +216,75 @@ class LinkChecker
|
|
181
216
|
end
|
182
217
|
|
183
218
|
# Find all the valid links for the site
|
219
|
+
# The value returned by this method is formatted like so:
|
220
|
+
# {
|
221
|
+
# "path": [
|
222
|
+
# fragment
|
223
|
+
# ]
|
224
|
+
# }
|
184
225
|
def valid_links
|
185
226
|
return @valid_links if @valid_links
|
186
227
|
|
187
228
|
@valid_links = files.map do |file|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
path
|
229
|
+
fragments = []
|
230
|
+
fragments = uniq_file_matches(file, ID) if html?(file) &&
|
231
|
+
!@ignore_fragments
|
232
|
+
|
233
|
+
[file_url(file), fragments]
|
194
234
|
end
|
235
|
+
@valid_links = @valid_links.to_h
|
195
236
|
end
|
196
237
|
|
197
238
|
# Find all HTML files
|
198
239
|
def html_files
|
199
240
|
return @html_files if @html_files
|
200
241
|
|
201
|
-
@html_files = files.filter { |file|
|
242
|
+
@html_files = files.filter { |file| html?(file) }
|
202
243
|
end
|
203
244
|
|
204
245
|
# Find all links in html_files
|
246
|
+
# The value returned by this method is formatted like so:
|
247
|
+
# {
|
248
|
+
# uri without fragment: {
|
249
|
+
# uri's fragment: Set [
|
250
|
+
# "file containing this link"
|
251
|
+
# ]
|
252
|
+
# }
|
253
|
+
# }
|
205
254
|
def links
|
206
255
|
return @links if @links
|
207
256
|
|
208
257
|
@links = {}
|
209
258
|
html_files.each do |file|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
259
|
+
file_path = file_url(file)
|
260
|
+
|
261
|
+
# For each link in the file
|
262
|
+
uniq_file_matches(file, HREF).each do |link|
|
263
|
+
uri = Addressable::URI.parse(link)
|
264
|
+
|
265
|
+
# Skip the emails and phone numbers URIs
|
266
|
+
next if uri.site&.end_with?(":")
|
267
|
+
# Skip the URIs with unknown schemes
|
268
|
+
next unless uri.scheme.nil? || SCHEMES.include?(uri.scheme)
|
269
|
+
|
270
|
+
# Set the URI's path to the file's valid link if the link is a
|
271
|
+
# fragment of the current file
|
272
|
+
uri.path = file_path if link.start_with?("#")
|
273
|
+
uri.path = uri.path.dup
|
274
|
+
|
275
|
+
# Remove the fragment from the URI and put it in a local variable
|
276
|
+
fragment = uri.fragment.nil? || uri.fragment.empty? ? nil : uri.fragment
|
277
|
+
uri.fragment = nil
|
278
|
+
|
279
|
+
fragment = nil if @ignore_fragments
|
280
|
+
|
281
|
+
# Get the link for the URI
|
282
|
+
uri_fragments = @links[uri] ||= {}
|
283
|
+
|
284
|
+
# Get the files for the fragment
|
285
|
+
fragment_files = uri_fragments[fragment] ||= Set.new
|
286
|
+
|
287
|
+
fragment_files << file
|
218
288
|
end
|
219
289
|
end
|
220
290
|
@links
|
@@ -234,16 +304,53 @@ class LinkChecker
|
|
234
304
|
# Make a request on the connection for the URL
|
235
305
|
def make_request(conn, url)
|
236
306
|
if @mode != "get_only"
|
237
|
-
|
238
|
-
return
|
239
|
-
status_allowed?(
|
307
|
+
response_status = head_request(conn, url).status
|
308
|
+
return response_status if mode == "head_only" ||
|
309
|
+
status_allowed?(response_status)
|
240
310
|
end
|
241
311
|
|
242
|
-
conn
|
312
|
+
get_request(conn, url).status
|
313
|
+
end
|
314
|
+
|
315
|
+
# Make a get request on the connection for the URL
|
316
|
+
def get_request(conn, url)
|
317
|
+
conn.get(url, {}, HEADERS)
|
318
|
+
end
|
319
|
+
|
320
|
+
# Make a head request on the connection for the URL
|
321
|
+
def head_request(conn, url)
|
322
|
+
conn.head(url, {}, HEADERS)
|
243
323
|
end
|
244
324
|
|
245
325
|
# Returns whether the status is successfull
|
246
326
|
def status_allowed?(status)
|
247
327
|
status >= 200 && status < 300
|
248
328
|
end
|
329
|
+
|
330
|
+
# Finds all the matches in a file for a given regex
|
331
|
+
def uniq_file_matches(path, regex)
|
332
|
+
uniq_string_matches(File.open(path).read, regex)
|
333
|
+
end
|
334
|
+
|
335
|
+
# Finds all the matches in a String for a given regex
|
336
|
+
def uniq_string_matches(str, regex)
|
337
|
+
str.scan(regex)
|
338
|
+
.map { |matches| matches[0].strip }
|
339
|
+
.uniq
|
340
|
+
end
|
341
|
+
|
342
|
+
# Determines whether the file is an HTML file based on it's extension
|
343
|
+
def html?(path)
|
344
|
+
HTML.include?(File.extname(path))
|
345
|
+
end
|
346
|
+
|
347
|
+
# Gets the url of a file in the static site based on its path
|
348
|
+
def file_url(path)
|
349
|
+
path = Pathname.new(path)
|
350
|
+
path = path.relative_path_from(@site_folder)
|
351
|
+
path = "/" + path.to_s
|
352
|
+
path.chomp!("index.html")
|
353
|
+
path.chomp!("/")
|
354
|
+
path
|
355
|
+
end
|
249
356
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-link-checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zakary Kamal Ismail
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -72,6 +72,34 @@ dependencies:
|
|
72
72
|
- - "<"
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '5.0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: rspec
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: rubocop
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
type: :development
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
75
103
|
description: Verifies that all the links in a Jekyll website are valid.It can also
|
76
104
|
work with any static site generator.
|
77
105
|
email: zakary.kamal.fs@outlook.com
|