jekyll-link-checker 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 651ec0822b7f2a5acf4000eb0a6d61a41ee52d02ef9f761387fccebe010756fc
4
- data.tar.gz: 1f2ce68855ff4c8806c55fbc8bb18d8152adffca6e19d7c698a7998d258169f9
3
+ metadata.gz: 95ce2357aa0eaea04dbc5695f935918cfb98b8a76786dfb2c9317579e8801a4f
4
+ data.tar.gz: 2e1cbde04eaa0f36daf75027f14a45364a57b203409d30abfd353d82a1242235
5
5
  SHA512:
6
- metadata.gz: dbf951f15490ccc110cbd58fc277e6375521d42d30c5f9b202624c39ce4ba6d6a3042282a51e9d9f8fc7b706cca9c91fcfa2f421032c2a36b6cf10894a7e7429
7
- data.tar.gz: 6001df2b2bee99cb2ac04a8b1481de2b7553c11e3e48868b44c0328e17faf88efee79aaeebe54bd8a626febae969cc344eef04f44f0e92ea83c4dd25902b977d
6
+ metadata.gz: d77c8b80e72b2ff133b2188eed42c60fb07e174997097d9b7f4c558833bbe3db75b2c91c308a566f3f6fe757056c0a6c1913942062d886180ea552d15ea48348
7
+ data.tar.gz: 7b2714ae7ebe65845a225d882091d3c34a0aa62a4e5104cc577296a759d37a0fde82369e85c0d420d6383e7690be774f3185490a4d835ad44b06a99dce03fbc0
@@ -39,15 +39,19 @@ parser = OptionParser.new do |opts|
39
39
  opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
40
40
  link_checker.update_skip_list(skip_list)
41
41
  end
42
- opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status\n" \
43
- "head-only: Only tries to do a HEAD request\n" \
42
+ opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status",
43
+ "head-only: Only tries to do a HEAD request",
44
44
  "get-only: Only tries to do a GET request") do |mode|
45
45
  link_checker.mode = mode
46
46
  end
47
+ opts.on("-i", "--[no-]ignore-fragments", "Whether to ignore the fragments " \
48
+ "in links. Defaults to false.") do |ignore_fragments|
49
+ link_checker.ignore_fragments = ignore_fragments
50
+ end
47
51
  opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
48
52
  link_checker.fail_fast = true
49
53
  end
50
- opts.on("-a", "--[no]-abort", "Abort the program on the first invalid link") do |abort_on_failure|
54
+ opts.on("-a", "--[no-]abort", "Abort the program on the first invalid link") do |abort_on_failure|
51
55
  link_checker.abort_on_failure = abort_on_failure
52
56
  end
53
57
  opts.on("-V", "--verbose", "Run with verbose output") do |_|
data/lib/link-checker.rb CHANGED
@@ -6,7 +6,7 @@ require "faraday-cookie_jar"
6
6
  require "addressable"
7
7
  require "pathname"
8
8
 
9
- # Checks all the links of a
9
+ # Checks all the links of a static website to make sure they're all valid
10
10
  class LinkChecker
11
11
  HEADERS = {
12
12
  "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
@@ -23,7 +23,8 @@ class LinkChecker
23
23
  "Cache-Control" => "no-cache"
24
24
  }.freeze
25
25
 
26
- HREF = /href="([^#"\n][^"\n]*)"/.freeze
26
+ HREF = /href="([^"\n]+)"/.freeze
27
+ ID = /id="([^"\n]+)"/.freeze
27
28
  HTML = %w[.html .htm].freeze
28
29
  SCHEMES = %w[https http].freeze
29
30
 
@@ -32,7 +33,7 @@ class LinkChecker
32
33
  DEFAULT_MODE = "try_head"
33
34
 
34
35
  attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
35
- :fail_fast, :abort_on_failure
36
+ :ignore_fragments, :fail_fast, :abort_on_failure
36
37
  attr_writer :files, :html_files, :links
37
38
 
38
39
  # Set default values for all the properties
@@ -42,6 +43,7 @@ class LinkChecker
42
43
  @site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
43
44
 
44
45
  update_skip_list(opts[:skip_list] || [])
46
+ @ignore_fragments = opts[:ignore_fragments]
45
47
 
46
48
  @mode = opts[:mode] || DEFAULT_MODE
47
49
  @verbose = opts[:verbose]
@@ -67,6 +69,7 @@ class LinkChecker
67
69
  link_checker_config = config["link-checker"]
68
70
  if link_checker_config
69
71
  opts[:skip_list] = link_checker_config["skip-list"]
72
+ opts[:ignore_fragments] = link_checker_config["ignore-fragments"]
70
73
  opts[:mode] = link_checker_config["mode"]
71
74
  opts[:verbose] = link_checker_config["verbose"]
72
75
  opts[:fail_fast] = link_checker_config["fail-fast"]
@@ -116,21 +119,17 @@ class LinkChecker
116
119
  error_count = 0
117
120
  i = 0
118
121
  prev_msg_size = 0
119
- links.each do |link, files|
122
+ links.each do |uri, fragments|
120
123
  i += 1
121
124
  if verbose
122
125
  prev_msg_size.times { print " " }
123
- msg = "#{link} #{i}/#{links.size}"
126
+ msg = "#{uri} #{i}/#{links.size}"
124
127
  print "\r#{msg}\r"
125
128
  prev_msg_size = msg.size
126
129
  end
127
130
 
128
131
  # Skip the link if it's in the skip list
129
- next if @skip_list.include?(link)
130
-
131
- # Parse the uri
132
- uri = Addressable::URI.parse(link)
133
- next if uri.site&.end_with?(":")
132
+ next if @skip_list.include?(uri.to_s)
134
133
 
135
134
  error = false
136
135
 
@@ -138,16 +137,52 @@ class LinkChecker
138
137
  if uri.hostname.nil? || uri.hostname == hostname
139
138
  uri.path.chomp!("/")
140
139
 
141
- # If the uri can't be found in the site's file
142
- unless valid_links.include?(uri.path)
143
- puts "Invalid internal link '#{link}' is present in:"
144
- files.each { |file| puts "\t#{file}" }
140
+ # If the uri's path is valid
141
+ valid_fragments = valid_links[uri.path]
142
+ if valid_fragments
143
+ fragments.each do |fragment, files|
144
+ # Skip the base fragment
145
+ next unless fragment
146
+
147
+ next if valid_fragments.include?(fragment)
148
+
149
+ error = true
150
+ puts "Invalid fragment '#{fragment}' in link '#{uri}' " \
151
+ "is present in:"
152
+ files.each { |file| puts "\t#{file}" }
153
+ end
154
+ else
145
155
  error = true
156
+ puts "Invalid internal link '#{link}' is present in:"
157
+ fragments.flat_map { |_, files| files }.uniq
158
+ .each { |file| puts "\t#{file}" }
146
159
  end
147
- elsif uri.scheme.nil? || SCHEMES.include?(uri.scheme)
148
- status = make_request(conn, link)
160
+ elsif fragments.keys == [nil]
161
+ status = make_request(conn, uri)
149
162
  error = !status_allowed?(status)
150
- puts "Request to #{link} in #{files} returned #{status}" if error
163
+ if error
164
+ puts "Request to #{link} returned #{status} present in"
165
+ fragments[nil].each { |file| puts "\t#{file}" }
166
+ end
167
+ else
168
+ response = get_request(conn, uri)
169
+ status = response.status
170
+ if status == 200
171
+ valid_fragments = uniq_string_matches(response.body, ID)
172
+ fragments.each do |fragment, files|
173
+ unless valid_fragments.include?(fragment)
174
+ puts "Invalid link to fragment '#{fragment}' present in: "
175
+ files.each { |file| puts "\t#{file}" }
176
+ end
177
+ end
178
+ else
179
+ error = true
180
+ puts "Request to #{link} in #{files} returned #{status}"
181
+ error = true
182
+ puts "Invalid internal link '#{link}' is present in:"
183
+ fragments.flat_map { |_, files| files }.uniq
184
+ .each { |file| puts "\t#{file}" }
185
+ end
151
186
  end
152
187
 
153
188
  next unless error
@@ -181,40 +216,75 @@ class LinkChecker
181
216
  end
182
217
 
183
218
  # Find all the valid links for the site
219
+ # The value returned by this method is formatted like so:
220
+ # {
221
+ # "path": [
222
+ # fragment
223
+ # ]
224
+ # }
184
225
  def valid_links
185
226
  return @valid_links if @valid_links
186
227
 
187
228
  @valid_links = files.map do |file|
188
- path = Pathname.new(file)
189
- path = path.relative_path_from(@site_folder)
190
- path = "/" + path.to_s
191
- path.chomp!("index.html")
192
- path.chomp!("/")
193
- path
229
+ fragments = []
230
+ fragments = uniq_file_matches(file, ID) if html?(file) &&
231
+ !@ignore_fragments
232
+
233
+ [file_url(file), fragments]
194
234
  end
235
+ @valid_links = @valid_links.to_h
195
236
  end
196
237
 
197
238
  # Find all HTML files
198
239
  def html_files
199
240
  return @html_files if @html_files
200
241
 
201
- @html_files = files.filter { |file| HTML.include?(File.extname(file)) }
242
+ @html_files = files.filter { |file| html?(file) }
202
243
  end
203
244
 
204
245
  # Find all links in html_files
246
+ # The value returned by this method is formatted like so:
247
+ # {
248
+ # uri without fragment: {
249
+ # uri's fragment: Set [
250
+ # "file containing this link"
251
+ # ]
252
+ # }
253
+ # }
205
254
  def links
206
255
  return @links if @links
207
256
 
208
257
  @links = {}
209
258
  html_files.each do |file|
210
- File.open(file).read.scan(HREF)
211
- .map { |match| match[0].strip }
212
- .uniq.each do |link|
213
- link_files = @links[link]
214
- if link_files
215
- then link_files.push(file)
216
- else @links[link] = [file]
217
- end
259
+ file_path = file_url(file)
260
+
261
+ # For each link in the file
262
+ uniq_file_matches(file, HREF).each do |link|
263
+ uri = Addressable::URI.parse(link)
264
+
265
+ # Skip the emails and phone numbers URIs
266
+ next if uri.site&.end_with?(":")
267
+ # Skip the URIs with unknown schemes
268
+ next unless uri.scheme.nil? || SCHEMES.include?(uri.scheme)
269
+
270
+ # Set the URI's path to the file's valid link if the link is a
271
+ # fragment of the current file
272
+ uri.path = file_path if link.start_with?("#")
273
+ uri.path = uri.path.dup
274
+
275
+ # Remove the fragment from the URI and put it in a local variable
276
+ fragment = uri.fragment.nil? || uri.fragment.empty? ? nil : uri.fragment
277
+ uri.fragment = nil
278
+
279
+ fragment = nil if @ignore_fragments
280
+
281
+ # Get the link for the URI
282
+ uri_fragments = @links[uri] ||= {}
283
+
284
+ # Get the files for the fragment
285
+ fragment_files = uri_fragments[fragment] ||= Set.new
286
+
287
+ fragment_files << file
218
288
  end
219
289
  end
220
290
  @links
@@ -234,16 +304,53 @@ class LinkChecker
234
304
  # Make a request on the connection for the URL
235
305
  def make_request(conn, url)
236
306
  if @mode != "get_only"
237
- response = conn.head(url, {}, HEADERS)
238
- return response.status if mode == "head_only" ||
239
- status_allowed?(response.status)
307
+ response_status = head_request(conn, url).status
308
+ return response_status if mode == "head_only" ||
309
+ status_allowed?(response_status)
240
310
  end
241
311
 
242
- conn.get(url, {}, HEADERS).status
312
+ get_request(conn, url).status
313
+ end
314
+
315
+ # Make a get request on the connection for the URL
316
+ def get_request(conn, url)
317
+ conn.get(url, {}, HEADERS)
318
+ end
319
+
320
+ # Make a head request on the connection for the URL
321
+ def head_request(conn, url)
322
+ conn.head(url, {}, HEADERS)
243
323
  end
244
324
 
245
325
  # Returns whether the status is successfull
246
326
  def status_allowed?(status)
247
327
  status >= 200 && status < 300
248
328
  end
329
+
330
+ # Finds all the matches in a file for a given regex
331
+ def uniq_file_matches(path, regex)
332
+ uniq_string_matches(File.open(path).read, regex)
333
+ end
334
+
335
+ # Finds all the matches in a String for a given regex
336
+ def uniq_string_matches(str, regex)
337
+ str.scan(regex)
338
+ .map { |matches| matches[0].strip }
339
+ .uniq
340
+ end
341
+
342
+ # Determines whether the file is an HTML file based on it's extension
343
+ def html?(path)
344
+ HTML.include?(File.extname(path))
345
+ end
346
+
347
+ # Gets the url of a file in the static site based on its path
348
+ def file_url(path)
349
+ path = Pathname.new(path)
350
+ path = path.relative_path_from(@site_folder)
351
+ path = "/" + path.to_s
352
+ path.chomp!("index.html")
353
+ path.chomp!("/")
354
+ path
355
+ end
249
356
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JekyllLinkChecker
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-link-checker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zakary Kamal Ismail
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-15 00:00:00.000000000 Z
11
+ date: 2019-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -72,6 +72,34 @@ dependencies:
72
72
  - - "<"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '5.0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rspec
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ - !ruby/object:Gem::Dependency
90
+ name: rubocop
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
75
103
  description: Verifies that all the links in a Jekyll website are valid.It can also
76
104
  work with any static site generator.
77
105
  email: zakary.kamal.fs@outlook.com