jekyll-link-checker 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/jekyll-link-checker +7 -3
- data/lib/link-checker.rb +143 -36
- data/lib/version.rb +1 -1
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95ce2357aa0eaea04dbc5695f935918cfb98b8a76786dfb2c9317579e8801a4f
|
4
|
+
data.tar.gz: 2e1cbde04eaa0f36daf75027f14a45364a57b203409d30abfd353d82a1242235
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d77c8b80e72b2ff133b2188eed42c60fb07e174997097d9b7f4c558833bbe3db75b2c91c308a566f3f6fe757056c0a6c1913942062d886180ea552d15ea48348
|
7
|
+
data.tar.gz: 7b2714ae7ebe65845a225d882091d3c34a0aa62a4e5104cc577296a759d37a0fde82369e85c0d420d6383e7690be774f3185490a4d835ad44b06a99dce03fbc0
|
data/exe/jekyll-link-checker
CHANGED
@@ -39,15 +39,19 @@ parser = OptionParser.new do |opts|
|
|
39
39
|
opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
|
40
40
|
link_checker.update_skip_list(skip_list)
|
41
41
|
end
|
42
|
-
opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status
|
43
|
-
"head-only: Only tries to do a HEAD request
|
42
|
+
opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status",
|
43
|
+
"head-only: Only tries to do a HEAD request",
|
44
44
|
"get-only: Only tries to do a GET request") do |mode|
|
45
45
|
link_checker.mode = mode
|
46
46
|
end
|
47
|
+
opts.on("-i", "--[no-]ignore-fragments", "Whether to ignore the fragments " \
|
48
|
+
"in links. Defaults to false.") do |ignore_fragments|
|
49
|
+
link_checker.ignore_fragments = ignore_fragments
|
50
|
+
end
|
47
51
|
opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
|
48
52
|
link_checker.fail_fast = true
|
49
53
|
end
|
50
|
-
opts.on("-a", "--[no]
|
54
|
+
opts.on("-a", "--[no-]abort", "Abort the program on the first invalid link") do |abort_on_failure|
|
51
55
|
link_checker.abort_on_failure = abort_on_failure
|
52
56
|
end
|
53
57
|
opts.on("-V", "--verbose", "Run with verbose output") do |_|
|
data/lib/link-checker.rb
CHANGED
@@ -6,7 +6,7 @@ require "faraday-cookie_jar"
|
|
6
6
|
require "addressable"
|
7
7
|
require "pathname"
|
8
8
|
|
9
|
-
# Checks all the links of a
|
9
|
+
# Checks all the links of a static website to make sure they're all valid
|
10
10
|
class LinkChecker
|
11
11
|
HEADERS = {
|
12
12
|
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
|
@@ -23,7 +23,8 @@ class LinkChecker
|
|
23
23
|
"Cache-Control" => "no-cache"
|
24
24
|
}.freeze
|
25
25
|
|
26
|
-
HREF = /href="([
|
26
|
+
HREF = /href="([^"\n]+)"/.freeze
|
27
|
+
ID = /id="([^"\n]+)"/.freeze
|
27
28
|
HTML = %w[.html .htm].freeze
|
28
29
|
SCHEMES = %w[https http].freeze
|
29
30
|
|
@@ -32,7 +33,7 @@ class LinkChecker
|
|
32
33
|
DEFAULT_MODE = "try_head"
|
33
34
|
|
34
35
|
attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
|
35
|
-
:fail_fast, :abort_on_failure
|
36
|
+
:ignore_fragments, :fail_fast, :abort_on_failure
|
36
37
|
attr_writer :files, :html_files, :links
|
37
38
|
|
38
39
|
# Set default values for all the properties
|
@@ -42,6 +43,7 @@ class LinkChecker
|
|
42
43
|
@site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
|
43
44
|
|
44
45
|
update_skip_list(opts[:skip_list] || [])
|
46
|
+
@ignore_fragments = opts[:ignore_fragments]
|
45
47
|
|
46
48
|
@mode = opts[:mode] || DEFAULT_MODE
|
47
49
|
@verbose = opts[:verbose]
|
@@ -67,6 +69,7 @@ class LinkChecker
|
|
67
69
|
link_checker_config = config["link-checker"]
|
68
70
|
if link_checker_config
|
69
71
|
opts[:skip_list] = link_checker_config["skip-list"]
|
72
|
+
opts[:ignore_fragments] = link_checker_config["ignore-fragments"]
|
70
73
|
opts[:mode] = link_checker_config["mode"]
|
71
74
|
opts[:verbose] = link_checker_config["verbose"]
|
72
75
|
opts[:fail_fast] = link_checker_config["fail-fast"]
|
@@ -116,21 +119,17 @@ class LinkChecker
|
|
116
119
|
error_count = 0
|
117
120
|
i = 0
|
118
121
|
prev_msg_size = 0
|
119
|
-
links.each do |
|
122
|
+
links.each do |uri, fragments|
|
120
123
|
i += 1
|
121
124
|
if verbose
|
122
125
|
prev_msg_size.times { print " " }
|
123
|
-
msg = "#{
|
126
|
+
msg = "#{uri} #{i}/#{links.size}"
|
124
127
|
print "\r#{msg}\r"
|
125
128
|
prev_msg_size = msg.size
|
126
129
|
end
|
127
130
|
|
128
131
|
# Skip the link if it's in the skip list
|
129
|
-
next if @skip_list.include?(
|
130
|
-
|
131
|
-
# Parse the uri
|
132
|
-
uri = Addressable::URI.parse(link)
|
133
|
-
next if uri.site&.end_with?(":")
|
132
|
+
next if @skip_list.include?(uri.to_s)
|
134
133
|
|
135
134
|
error = false
|
136
135
|
|
@@ -138,16 +137,52 @@ class LinkChecker
|
|
138
137
|
if uri.hostname.nil? || uri.hostname == hostname
|
139
138
|
uri.path.chomp!("/")
|
140
139
|
|
141
|
-
# If the uri
|
142
|
-
|
143
|
-
|
144
|
-
|
140
|
+
# If the uri's path is valid
|
141
|
+
valid_fragments = valid_links[uri.path]
|
142
|
+
if valid_fragments
|
143
|
+
fragments.each do |fragment, files|
|
144
|
+
# Skip the base fragment
|
145
|
+
next unless fragment
|
146
|
+
|
147
|
+
next if valid_fragments.include?(fragment)
|
148
|
+
|
149
|
+
error = true
|
150
|
+
puts "Invalid fragment '#{fragment}' in link '#{uri}' " \
|
151
|
+
"is present in:"
|
152
|
+
files.each { |file| puts "\t#{file}" }
|
153
|
+
end
|
154
|
+
else
|
145
155
|
error = true
|
156
|
+
puts "Invalid internal link '#{link}' is present in:"
|
157
|
+
fragments.flat_map { |_, files| files }.uniq
|
158
|
+
.each { |file| puts "\t#{file}" }
|
146
159
|
end
|
147
|
-
elsif
|
148
|
-
status = make_request(conn,
|
160
|
+
elsif fragments.keys == [nil]
|
161
|
+
status = make_request(conn, uri)
|
149
162
|
error = !status_allowed?(status)
|
150
|
-
|
163
|
+
if error
|
164
|
+
puts "Request to #{link} returned #{status} present in"
|
165
|
+
fragments[nil].each { |file| puts "\t#{file}" }
|
166
|
+
end
|
167
|
+
else
|
168
|
+
response = get_request(conn, uri)
|
169
|
+
status = response.status
|
170
|
+
if status == 200
|
171
|
+
valid_fragments = uniq_string_matches(response.body, ID)
|
172
|
+
fragments.each do |fragment, files|
|
173
|
+
unless valid_fragments.include?(fragment)
|
174
|
+
puts "Invalid link to fragment '#{fragment}' present in: "
|
175
|
+
files.each { |file| puts "\t#{file}" }
|
176
|
+
end
|
177
|
+
end
|
178
|
+
else
|
179
|
+
error = true
|
180
|
+
puts "Request to #{link} in #{files} returned #{status}"
|
181
|
+
error = true
|
182
|
+
puts "Invalid internal link '#{link}' is present in:"
|
183
|
+
fragments.flat_map { |_, files| files }.uniq
|
184
|
+
.each { |file| puts "\t#{file}" }
|
185
|
+
end
|
151
186
|
end
|
152
187
|
|
153
188
|
next unless error
|
@@ -181,40 +216,75 @@ class LinkChecker
|
|
181
216
|
end
|
182
217
|
|
183
218
|
# Find all the valid links for the site
|
219
|
+
# The value returned by this method is formatted like so:
|
220
|
+
# {
|
221
|
+
# "path": [
|
222
|
+
# fragment
|
223
|
+
# ]
|
224
|
+
# }
|
184
225
|
def valid_links
|
185
226
|
return @valid_links if @valid_links
|
186
227
|
|
187
228
|
@valid_links = files.map do |file|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
path
|
229
|
+
fragments = []
|
230
|
+
fragments = uniq_file_matches(file, ID) if html?(file) &&
|
231
|
+
!@ignore_fragments
|
232
|
+
|
233
|
+
[file_url(file), fragments]
|
194
234
|
end
|
235
|
+
@valid_links = @valid_links.to_h
|
195
236
|
end
|
196
237
|
|
197
238
|
# Find all HTML files
|
198
239
|
def html_files
|
199
240
|
return @html_files if @html_files
|
200
241
|
|
201
|
-
@html_files = files.filter { |file|
|
242
|
+
@html_files = files.filter { |file| html?(file) }
|
202
243
|
end
|
203
244
|
|
204
245
|
# Find all links in html_files
|
246
|
+
# The value returned by this method is formatted like so:
|
247
|
+
# {
|
248
|
+
# uri without fragment: {
|
249
|
+
# uri's fragment: Set [
|
250
|
+
# "file containing this link"
|
251
|
+
# ]
|
252
|
+
# }
|
253
|
+
# }
|
205
254
|
def links
|
206
255
|
return @links if @links
|
207
256
|
|
208
257
|
@links = {}
|
209
258
|
html_files.each do |file|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
259
|
+
file_path = file_url(file)
|
260
|
+
|
261
|
+
# For each link in the file
|
262
|
+
uniq_file_matches(file, HREF).each do |link|
|
263
|
+
uri = Addressable::URI.parse(link)
|
264
|
+
|
265
|
+
# Skip the emails and phone numbers URIs
|
266
|
+
next if uri.site&.end_with?(":")
|
267
|
+
# Skip the URIs with unknown schemes
|
268
|
+
next unless uri.scheme.nil? || SCHEMES.include?(uri.scheme)
|
269
|
+
|
270
|
+
# Set the URI's path to the file's valid link if the link is a
|
271
|
+
# fragment of the current file
|
272
|
+
uri.path = file_path if link.start_with?("#")
|
273
|
+
uri.path = uri.path.dup
|
274
|
+
|
275
|
+
# Remove the fragment from the URI and put it in a local variable
|
276
|
+
fragment = uri.fragment.nil? || uri.fragment.empty? ? nil : uri.fragment
|
277
|
+
uri.fragment = nil
|
278
|
+
|
279
|
+
fragment = nil if @ignore_fragments
|
280
|
+
|
281
|
+
# Get the link for the URI
|
282
|
+
uri_fragments = @links[uri] ||= {}
|
283
|
+
|
284
|
+
# Get the files for the fragment
|
285
|
+
fragment_files = uri_fragments[fragment] ||= Set.new
|
286
|
+
|
287
|
+
fragment_files << file
|
218
288
|
end
|
219
289
|
end
|
220
290
|
@links
|
@@ -234,16 +304,53 @@ class LinkChecker
|
|
234
304
|
# Make a request on the connection for the URL
|
235
305
|
def make_request(conn, url)
|
236
306
|
if @mode != "get_only"
|
237
|
-
|
238
|
-
return
|
239
|
-
status_allowed?(
|
307
|
+
response_status = head_request(conn, url).status
|
308
|
+
return response_status if mode == "head_only" ||
|
309
|
+
status_allowed?(response_status)
|
240
310
|
end
|
241
311
|
|
242
|
-
conn
|
312
|
+
get_request(conn, url).status
|
313
|
+
end
|
314
|
+
|
315
|
+
# Make a get request on the connection for the URL
|
316
|
+
def get_request(conn, url)
|
317
|
+
conn.get(url, {}, HEADERS)
|
318
|
+
end
|
319
|
+
|
320
|
+
# Make a head request on the connection for the URL
|
321
|
+
def head_request(conn, url)
|
322
|
+
conn.head(url, {}, HEADERS)
|
243
323
|
end
|
244
324
|
|
245
325
|
# Returns whether the status is successfull
|
246
326
|
def status_allowed?(status)
|
247
327
|
status >= 200 && status < 300
|
248
328
|
end
|
329
|
+
|
330
|
+
# Finds all the matches in a file for a given regex
|
331
|
+
def uniq_file_matches(path, regex)
|
332
|
+
uniq_string_matches(File.open(path).read, regex)
|
333
|
+
end
|
334
|
+
|
335
|
+
# Finds all the matches in a String for a given regex
|
336
|
+
def uniq_string_matches(str, regex)
|
337
|
+
str.scan(regex)
|
338
|
+
.map { |matches| matches[0].strip }
|
339
|
+
.uniq
|
340
|
+
end
|
341
|
+
|
342
|
+
# Determines whether the file is an HTML file based on it's extension
|
343
|
+
def html?(path)
|
344
|
+
HTML.include?(File.extname(path))
|
345
|
+
end
|
346
|
+
|
347
|
+
# Gets the url of a file in the static site based on its path
|
348
|
+
def file_url(path)
|
349
|
+
path = Pathname.new(path)
|
350
|
+
path = path.relative_path_from(@site_folder)
|
351
|
+
path = "/" + path.to_s
|
352
|
+
path.chomp!("index.html")
|
353
|
+
path.chomp!("/")
|
354
|
+
path
|
355
|
+
end
|
249
356
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-link-checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Zakary Kamal Ismail
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -72,6 +72,34 @@ dependencies:
|
|
72
72
|
- - "<"
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '5.0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: rspec
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
- !ruby/object:Gem::Dependency
|
90
|
+
name: rubocop
|
91
|
+
requirement: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
type: :development
|
97
|
+
prerelease: false
|
98
|
+
version_requirements: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
75
103
|
description: Verifies that all the links in a Jekyll website are valid.It can also
|
76
104
|
work with any static site generator.
|
77
105
|
email: zakary.kamal.fs@outlook.com
|