jekyll-link-checker 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 651ec0822b7f2a5acf4000eb0a6d61a41ee52d02ef9f761387fccebe010756fc
4
- data.tar.gz: 1f2ce68855ff4c8806c55fbc8bb18d8152adffca6e19d7c698a7998d258169f9
3
+ metadata.gz: 95ce2357aa0eaea04dbc5695f935918cfb98b8a76786dfb2c9317579e8801a4f
4
+ data.tar.gz: 2e1cbde04eaa0f36daf75027f14a45364a57b203409d30abfd353d82a1242235
5
5
  SHA512:
6
- metadata.gz: dbf951f15490ccc110cbd58fc277e6375521d42d30c5f9b202624c39ce4ba6d6a3042282a51e9d9f8fc7b706cca9c91fcfa2f421032c2a36b6cf10894a7e7429
7
- data.tar.gz: 6001df2b2bee99cb2ac04a8b1481de2b7553c11e3e48868b44c0328e17faf88efee79aaeebe54bd8a626febae969cc344eef04f44f0e92ea83c4dd25902b977d
6
+ metadata.gz: d77c8b80e72b2ff133b2188eed42c60fb07e174997097d9b7f4c558833bbe3db75b2c91c308a566f3f6fe757056c0a6c1913942062d886180ea552d15ea48348
7
+ data.tar.gz: 7b2714ae7ebe65845a225d882091d3c34a0aa62a4e5104cc577296a759d37a0fde82369e85c0d420d6383e7690be774f3185490a4d835ad44b06a99dce03fbc0
@@ -39,15 +39,19 @@ parser = OptionParser.new do |opts|
39
39
  opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
40
40
  link_checker.update_skip_list(skip_list)
41
41
  end
42
- opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status\n" \
43
- "head-only: Only tries to do a HEAD request\n" \
42
+ opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status",
43
+ "head-only: Only tries to do a HEAD request",
44
44
  "get-only: Only tries to do a GET request") do |mode|
45
45
  link_checker.mode = mode
46
46
  end
47
+ opts.on("-i", "--[no-]ignore-fragments", "Whether to ignore the fragments " \
48
+ "in links. Defaults to false.") do |ignore_fragments|
49
+ link_checker.ignore_fragments = ignore_fragments
50
+ end
47
51
  opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
48
52
  link_checker.fail_fast = true
49
53
  end
50
- opts.on("-a", "--[no]-abort", "Abort the program on the first invalid link") do |abort_on_failure|
54
+ opts.on("-a", "--[no-]abort", "Abort the program on the first invalid link") do |abort_on_failure|
51
55
  link_checker.abort_on_failure = abort_on_failure
52
56
  end
53
57
  opts.on("-V", "--verbose", "Run with verbose output") do |_|
data/lib/link-checker.rb CHANGED
@@ -6,7 +6,7 @@ require "faraday-cookie_jar"
6
6
  require "addressable"
7
7
  require "pathname"
8
8
 
9
- # Checks all the links of a
9
+ # Checks all the links of a static website to make sure they're all valid
10
10
  class LinkChecker
11
11
  HEADERS = {
12
12
  "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
@@ -23,7 +23,8 @@ class LinkChecker
23
23
  "Cache-Control" => "no-cache"
24
24
  }.freeze
25
25
 
26
- HREF = /href="([^#"\n][^"\n]*)"/.freeze
26
+ HREF = /href="([^"\n]+)"/.freeze
27
+ ID = /id="([^"\n]+)"/.freeze
27
28
  HTML = %w[.html .htm].freeze
28
29
  SCHEMES = %w[https http].freeze
29
30
 
@@ -32,7 +33,7 @@ class LinkChecker
32
33
  DEFAULT_MODE = "try_head"
33
34
 
34
35
  attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
35
- :fail_fast, :abort_on_failure
36
+ :ignore_fragments, :fail_fast, :abort_on_failure
36
37
  attr_writer :files, :html_files, :links
37
38
 
38
39
  # Set default values for all the properties
@@ -42,6 +43,7 @@ class LinkChecker
42
43
  @site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
43
44
 
44
45
  update_skip_list(opts[:skip_list] || [])
46
+ @ignore_fragments = opts[:ignore_fragments]
45
47
 
46
48
  @mode = opts[:mode] || DEFAULT_MODE
47
49
  @verbose = opts[:verbose]
@@ -67,6 +69,7 @@ class LinkChecker
67
69
  link_checker_config = config["link-checker"]
68
70
  if link_checker_config
69
71
  opts[:skip_list] = link_checker_config["skip-list"]
72
+ opts[:ignore_fragments] = link_checker_config["ignore-fragments"]
70
73
  opts[:mode] = link_checker_config["mode"]
71
74
  opts[:verbose] = link_checker_config["verbose"]
72
75
  opts[:fail_fast] = link_checker_config["fail-fast"]
@@ -116,21 +119,17 @@ class LinkChecker
116
119
  error_count = 0
117
120
  i = 0
118
121
  prev_msg_size = 0
119
- links.each do |link, files|
122
+ links.each do |uri, fragments|
120
123
  i += 1
121
124
  if verbose
122
125
  prev_msg_size.times { print " " }
123
- msg = "#{link} #{i}/#{links.size}"
126
+ msg = "#{uri} #{i}/#{links.size}"
124
127
  print "\r#{msg}\r"
125
128
  prev_msg_size = msg.size
126
129
  end
127
130
 
128
131
  # Skip the link if it's in the skip list
129
- next if @skip_list.include?(link)
130
-
131
- # Parse the uri
132
- uri = Addressable::URI.parse(link)
133
- next if uri.site&.end_with?(":")
132
+ next if @skip_list.include?(uri.to_s)
134
133
 
135
134
  error = false
136
135
 
@@ -138,16 +137,52 @@ class LinkChecker
138
137
  if uri.hostname.nil? || uri.hostname == hostname
139
138
  uri.path.chomp!("/")
140
139
 
141
- # If the uri can't be found in the site's file
142
- unless valid_links.include?(uri.path)
143
- puts "Invalid internal link '#{link}' is present in:"
144
- files.each { |file| puts "\t#{file}" }
140
+ # If the uri's path is valid
141
+ valid_fragments = valid_links[uri.path]
142
+ if valid_fragments
143
+ fragments.each do |fragment, files|
144
+ # Skip the base fragment
145
+ next unless fragment
146
+
147
+ next if valid_fragments.include?(fragment)
148
+
149
+ error = true
150
+ puts "Invalid fragment '#{fragment}' in link '#{uri}' " \
151
+ "is present in:"
152
+ files.each { |file| puts "\t#{file}" }
153
+ end
154
+ else
145
155
  error = true
156
+ puts "Invalid internal link '#{link}' is present in:"
157
+ fragments.flat_map { |_, files| files }.uniq
158
+ .each { |file| puts "\t#{file}" }
146
159
  end
147
- elsif uri.scheme.nil? || SCHEMES.include?(uri.scheme)
148
- status = make_request(conn, link)
160
+ elsif fragments.keys == [nil]
161
+ status = make_request(conn, uri)
149
162
  error = !status_allowed?(status)
150
- puts "Request to #{link} in #{files} returned #{status}" if error
163
+ if error
164
+ puts "Request to #{link} returned #{status} present in"
165
+ fragments[nil].each { |file| puts "\t#{file}" }
166
+ end
167
+ else
168
+ response = get_request(conn, uri)
169
+ status = response.status
170
+ if status == 200
171
+ valid_fragments = uniq_string_matches(response.body, ID)
172
+ fragments.each do |fragment, files|
173
+ unless valid_fragments.include?(fragment)
174
+ puts "Invalid link to fragment '#{fragment}' present in: "
175
+ files.each { |file| puts "\t#{file}" }
176
+ end
177
+ end
178
+ else
179
+ error = true
180
+ puts "Request to #{link} in #{files} returned #{status}"
181
+ error = true
182
+ puts "Invalid internal link '#{link}' is present in:"
183
+ fragments.flat_map { |_, files| files }.uniq
184
+ .each { |file| puts "\t#{file}" }
185
+ end
151
186
  end
152
187
 
153
188
  next unless error
@@ -181,40 +216,75 @@ class LinkChecker
181
216
  end
182
217
 
183
218
  # Find all the valid links for the site
219
+ # The value returned by this method is formatted like so:
220
+ # {
221
+ # "path": [
222
+ # fragment
223
+ # ]
224
+ # }
184
225
  def valid_links
185
226
  return @valid_links if @valid_links
186
227
 
187
228
  @valid_links = files.map do |file|
188
- path = Pathname.new(file)
189
- path = path.relative_path_from(@site_folder)
190
- path = "/" + path.to_s
191
- path.chomp!("index.html")
192
- path.chomp!("/")
193
- path
229
+ fragments = []
230
+ fragments = uniq_file_matches(file, ID) if html?(file) &&
231
+ !@ignore_fragments
232
+
233
+ [file_url(file), fragments]
194
234
  end
235
+ @valid_links = @valid_links.to_h
195
236
  end
196
237
 
197
238
  # Find all HTML files
198
239
  def html_files
199
240
  return @html_files if @html_files
200
241
 
201
- @html_files = files.filter { |file| HTML.include?(File.extname(file)) }
242
+ @html_files = files.filter { |file| html?(file) }
202
243
  end
203
244
 
204
245
  # Find all links in html_files
246
+ # The value returned by this method is formatted like so:
247
+ # {
248
+ # uri without fragment: {
249
+ # uri's fragment: Set [
250
+ # "file containing this link"
251
+ # ]
252
+ # }
253
+ # }
205
254
  def links
206
255
  return @links if @links
207
256
 
208
257
  @links = {}
209
258
  html_files.each do |file|
210
- File.open(file).read.scan(HREF)
211
- .map { |match| match[0].strip }
212
- .uniq.each do |link|
213
- link_files = @links[link]
214
- if link_files
215
- then link_files.push(file)
216
- else @links[link] = [file]
217
- end
259
+ file_path = file_url(file)
260
+
261
+ # For each link in the file
262
+ uniq_file_matches(file, HREF).each do |link|
263
+ uri = Addressable::URI.parse(link)
264
+
265
+ # Skip the emails and phone numbers URIs
266
+ next if uri.site&.end_with?(":")
267
+ # Skip the URIs with unknown schemes
268
+ next unless uri.scheme.nil? || SCHEMES.include?(uri.scheme)
269
+
270
+ # Set the URI's path to the file's valid link if the link is a
271
+ # fragment of the current file
272
+ uri.path = file_path if link.start_with?("#")
273
+ uri.path = uri.path.dup
274
+
275
+ # Remove the fragment from the URI and put it in a local variable
276
+ fragment = uri.fragment.nil? || uri.fragment.empty? ? nil : uri.fragment
277
+ uri.fragment = nil
278
+
279
+ fragment = nil if @ignore_fragments
280
+
281
+ # Get the link for the URI
282
+ uri_fragments = @links[uri] ||= {}
283
+
284
+ # Get the files for the fragment
285
+ fragment_files = uri_fragments[fragment] ||= Set.new
286
+
287
+ fragment_files << file
218
288
  end
219
289
  end
220
290
  @links
@@ -234,16 +304,53 @@ class LinkChecker
234
304
  # Make a request on the connection for the URL
235
305
  def make_request(conn, url)
236
306
  if @mode != "get_only"
237
- response = conn.head(url, {}, HEADERS)
238
- return response.status if mode == "head_only" ||
239
- status_allowed?(response.status)
307
+ response_status = head_request(conn, url).status
308
+ return response_status if mode == "head_only" ||
309
+ status_allowed?(response_status)
240
310
  end
241
311
 
242
- conn.get(url, {}, HEADERS).status
312
+ get_request(conn, url).status
313
+ end
314
+
315
+ # Make a get request on the connection for the URL
316
+ def get_request(conn, url)
317
+ conn.get(url, {}, HEADERS)
318
+ end
319
+
320
+ # Make a head request on the connection for the URL
321
+ def head_request(conn, url)
322
+ conn.head(url, {}, HEADERS)
243
323
  end
244
324
 
245
325
  # Returns whether the status is successfull
246
326
  def status_allowed?(status)
247
327
  status >= 200 && status < 300
248
328
  end
329
+
330
+ # Finds all the matches in a file for a given regex
331
+ def uniq_file_matches(path, regex)
332
+ uniq_string_matches(File.open(path).read, regex)
333
+ end
334
+
335
+ # Finds all the matches in a String for a given regex
336
+ def uniq_string_matches(str, regex)
337
+ str.scan(regex)
338
+ .map { |matches| matches[0].strip }
339
+ .uniq
340
+ end
341
+
342
+ # Determines whether the file is an HTML file based on it's extension
343
+ def html?(path)
344
+ HTML.include?(File.extname(path))
345
+ end
346
+
347
+ # Gets the url of a file in the static site based on its path
348
+ def file_url(path)
349
+ path = Pathname.new(path)
350
+ path = path.relative_path_from(@site_folder)
351
+ path = "/" + path.to_s
352
+ path.chomp!("index.html")
353
+ path.chomp!("/")
354
+ path
355
+ end
249
356
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JekyllLinkChecker
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-link-checker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Zakary Kamal Ismail
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-15 00:00:00.000000000 Z
11
+ date: 2019-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -72,6 +72,34 @@ dependencies:
72
72
  - - "<"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '5.0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rspec
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ - !ruby/object:Gem::Dependency
90
+ name: rubocop
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
75
103
  description: Verifies that all the links in a Jekyll website are valid.It can also
76
104
  work with any static site generator.
77
105
  email: zakary.kamal.fs@outlook.com