jekyll-link-checker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ab9d005b30ac7520a5f7912083966a50948c0d21d4e713bbb9639358dd033f77
4
+ data.tar.gz: ed02ed3ec9066ce54f4f74730c1de5d50206e83e39737eb8dead37ce4ae1f7ac
5
+ SHA512:
6
+ metadata.gz: 35940136ad2ddb9ccac822abf3f1fe3999c52b92052e732c9eb846be0eadc21f9b9b3f6de2fe61b4395c7ded8706c9e57cd22ad86bf1fb6c6bcaa4446785d44a
7
+ data.tar.gz: ef06e898f334b7d6a49f10af2ab9a084e77bf890fca8cd37903083483363c8e81a4b41c8af7da96fb7318eda1701747cfd951295018cb427944c4bee8f5cb977
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+ gemspec
@@ -0,0 +1,63 @@
1
+ require 'optionparser'
2
+ require 'yaml'
3
+ require 'jekyll-link-checker'
4
+
5
+ # Find the config file
6
+ config = nil
7
+ ['.yml', '.yaml'].each do |ext|
8
+ file = "_config#{ext}"
9
+ begin
10
+ config = YAML.load_file(file)
11
+ break
12
+ rescue
13
+ end
14
+ end
15
+
16
+ link_checker = if config
17
+ LinkChecker.from_config(config)
18
+ else
19
+ puts "`_config.yml` or `_config.yaml` couldn't be read"
20
+ puts "The default configuration will be used"
21
+ LinkChecker.new
22
+ end
23
+
24
+ parser = OptionParser.new do |opts|
25
+ opts.banner = "Usage: jekyll-link-checker [options]"
26
+
27
+ opts.on("-n HOSTNAME", "--hostname HOSTNAME", "Hostname of the site. Ex.: example.com") do |hostname|
28
+ link_checker.hostname = hostname
29
+ end
30
+ opts.on("-b BASEURL", "--baseurl BASEURL", "(this option is currently ignored)") do |baseurl|
31
+ link_checker.baseurl = baseurl
32
+ end
33
+ opts.on("-d SITE_FOLDER", "--site-folder SITE_FOLDER", "Path to the site's folder") do |site_folder|
34
+ link_checker.site_folder = site_folder
35
+ end
36
+ opts.on("-s [ARRAY]", "--skip-list [ARRAY]", Array, "Comma separated list of links not to check") do |skip_list|
37
+ link_checker.update_skip_list(skip_list)
38
+ end
39
+ opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
40
+ link_checker.update_skip_list(skip_list)
41
+ end
42
+ opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status\n" \
43
+ "head-only: Only tries to do a HEAD request\n" \
44
+ "get-only: Only tries to do a GET request") do |mode|
45
+ link_checker.mode = mode
46
+ end
47
+ opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
48
+ link_checker.fail_fast = true
49
+ end
50
+ opts.on("-a", "--[no]-abort", "Abort the program on the first invalid link") do |abort_on_failure|
51
+ link_checker.abort_on_failure = abort_on_failure
52
+ end
53
+ opts.on("-V", "--verbose", "Run with verbose output") do |_|
54
+ link_checker.verbose = true
55
+ end
56
+ opts.on_tail("-v", "--version", "Outputs the version and exit") do |_|
57
+ puts JekyllLinkChecker::VERSION
58
+ exit
59
+ end
60
+ end
61
+
62
+ parser.parse!
63
+ link_checker.check_links
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "jekyll"
4
+
5
+ Jekyll::Hooks.register :site, :post_write, priority: 0 do |site|
6
+ link_checker = LinkChecker.from_config(site.config)
7
+ link_checker.check_links
8
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "link-checker"
4
+ require_relative "jekyll-hook"
5
+ require_relative "version"
@@ -0,0 +1,249 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "faraday_middleware"
5
+ require "faraday-cookie_jar"
6
+ require "addressable"
7
+ require "pathname"
8
+
9
+ # Checks all the links of a
10
+ class LinkChecker
11
+ HEADERS = {
12
+ "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
13
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
14
+ "Chrome/41.0.2228.0 Safari/537.36",
15
+ "Accept" => "text/html," \
16
+ "application/xhtml+xml," \
17
+ "application/xml;" \
18
+ "q=0.9,*/*;q=0.8",
19
+ "Accept-Language" => "en-US,en;q=0.5",
20
+ "DNT" => "1",
21
+ "Upgrade-Insecure-Requests" => "1",
22
+ "Pragma" => "no-cache",
23
+ "Cache-Control" => "no-cache"
24
+ }.freeze
25
+
26
+ HREF = /href="([^#"\n][^"\n]*)"/.freeze
27
+ HTML = %w[.html .htm].freeze
28
+ SCHEMES = %w[https http].freeze
29
+
30
+ DEFAULT_BASE_URL = "/"
31
+ DEFAULT_SITE_FOLDER = "_site"
32
+ DEFAULT_MODE = "try_head"
33
+
34
+ attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
35
+ :fail_fast, :abort_on_failure
36
+ attr_writer :files, :html_files, :links
37
+
38
+ # Set default values for all the properties
39
+ def initialize(opts = {})
40
+ @hostname = opts[:hostname]
41
+ @baseurl = opts[:baseurl] || DEFAULT_BASE_URL
42
+ @site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
43
+
44
+ update_skip_list(opts[:skip_list] || [])
45
+
46
+ @mode = opts[:mode] || DEFAULT_MODE
47
+ @verbose = opts[:verbose]
48
+ @fail_fast = opts[:fail_fast]
49
+
50
+ abort_on_failure = opts[:abort_on_failure]
51
+ @abort_on_failure = abort_on_failure.nil? ? true : abort_on_failure
52
+ end
53
+
54
+ # Initialize the link checker from a Jekyll configuration file
55
+ def self.from_config(config)
56
+ opts = {}
57
+
58
+ url = config["url"]
59
+ if url
60
+ uri = Addressable::URI.parse(url)
61
+ opts[:hostname] = uri.hostname
62
+ end
63
+
64
+ opts[:baseurl] = config["baseurl"]
65
+ opts[:site_folder] = config["destination"]
66
+
67
+ link_checker_config = config["link-checker"]
68
+ if link_checker_config
69
+ opts[:skip_list] = link_checker_config["skip-list"]
70
+ opts[:mode] = link_checker_config["mode"]
71
+ opts[:verbose] = link_checker_config["verbose"]
72
+ opts[:fail_fast] = link_checker_config["fail-fast"]
73
+ opts[:abort_on_failure] = link_checker_config["abort"]
74
+ end
75
+
76
+ LinkChecker.new(opts)
77
+ end
78
+
79
+ # Whether the options are valid
80
+ def valid?
81
+ return false unless @hostname
82
+
83
+ true
84
+ end
85
+
86
+ # Updates skip_list with the given argument.
87
+ # If the argument is an array, skip_list is set to the array.
88
+ # If the argument is a string, the argument will be interpreted as a filename
89
+ # where each line is a filename.
90
+ def update_skip_list(skip_list)
91
+ if skip_list.is_a?(Array)
92
+ @skip_list = skip_list
93
+ elsif skip_list.is_a?(String)
94
+ begin
95
+ @skip_list = File.readlines(File.expand_path(skip_list)).map(&:strip)
96
+ rescue StandardError => e
97
+ warn "Couldn't read the skip list"
98
+ raise e
99
+ end
100
+ @skip_list.reject!(&:empty?)
101
+ else
102
+ raise ArgumentError, "skip_list must be a String or an array of String"
103
+ end
104
+ end
105
+
106
+ # Checks all the links
107
+ def check_links
108
+ # Make sure the configuration is valid
109
+ abort "Invalid configuration" unless valid?
110
+
111
+ # basepath = @baseurl ? Addressable::URI.parse(@baseurl).path : "/"
112
+
113
+ conn = create_connection
114
+
115
+ # Test each link
116
+ error_count = 0
117
+ i = 0
118
+ prev_msg_size = 0
119
+ links.each do |link, files|
120
+ i += 1
121
+ if verbose
122
+ prev_msg_size.times { print " " }
123
+ msg = "#{link} #{i}/#{links.size}"
124
+ print "\r#{msg}\r"
125
+ prev_msg_size = msg.size
126
+ end
127
+
128
+ # Skip the link if it's in the skip list
129
+ next if @skip_list.include?(link)
130
+
131
+ # Parse the uri
132
+ uri = Addressable::URI.parse(link)
133
+ next if uri.site&.end_with?(":")
134
+
135
+ error = false
136
+
137
+ # If the link is internal
138
+ if uri.hostname.nil? || uri.hostname == hostname
139
+ uri.path.chomp!("/")
140
+
141
+ # If the uri can't be found in the site's file
142
+ unless valid_links.include?(uri.path)
143
+ puts "Invalid internal link '#{link}' is present in:"
144
+ files.each { |file| puts "\t#{file}" }
145
+ error = true
146
+ end
147
+ elsif uri.scheme.nil? || SCHEMES.include?(uri.scheme)
148
+ status = make_request(conn, link)
149
+ error = !status_allowed?(status)
150
+ puts "Request to #{link} in #{files} returned #{status}" if error
151
+ end
152
+
153
+ next unless error
154
+
155
+ error_count += 1
156
+ if fail_fast
157
+ abort if abort_on_failure
158
+ return nil
159
+ end
160
+ end
161
+
162
+ puts if verbose
163
+
164
+ if error_count != 0
165
+ msg = "There were #{error_count} invalid links"
166
+ if @abort_on_failure
167
+ then abort msg
168
+ else puts msg
169
+ end
170
+ end
171
+
172
+ error_count
173
+ end
174
+
175
+ # Find all files in the site folder
176
+ def files
177
+ return @files unless @files.nil?
178
+
179
+ @files = Dir[File.join(@site_folder, "**/*")].select { |f| File.file?(f) }
180
+ @files
181
+ end
182
+
183
+ # Find all the valid links for the site
184
+ def valid_links
185
+ return @valid_links if @valid_links
186
+
187
+ @valid_links = files.map do |file|
188
+ path = Pathname.new(file)
189
+ path = path.relative_path_from(@site_folder)
190
+ path = "/" + path.to_s
191
+ path.chomp!("index.html")
192
+ path.chomp!("/")
193
+ path
194
+ end
195
+ end
196
+
197
+ # Find all HTML files
198
+ def html_files
199
+ return @html_files if @html_files
200
+
201
+ @html_files = files.filter { |file| HTML.include?(File.extname(file)) }
202
+ end
203
+
204
+ # Find all links in html_files
205
+ def links
206
+ return @links if @links
207
+
208
+ @links = {}
209
+ html_files.each do |file|
210
+ File.open(file).read.scan(HREF)
211
+ .map { |match| match[0].strip }
212
+ .uniq.each do |link|
213
+ link_files = @links[link]
214
+ if link_files
215
+ then link_files.push(file)
216
+ else @links[link] = [file]
217
+ end
218
+ end
219
+ end
220
+ @links
221
+ end
222
+
223
+ private
224
+
225
+ # Create a connection to make requests
226
+ def create_connection
227
+ Faraday.new do |faraday|
228
+ faraday.use FaradayMiddleware::FollowRedirects
229
+ faraday.use :cookie_jar
230
+ faraday.adapter Faraday.default_adapter
231
+ end
232
+ end
233
+
234
+ # Make a request on the connection for the URL
235
+ def make_request(conn, url)
236
+ if @mode != "get_only"
237
+ response = conn.head(url, {}, HEADERS)
238
+ return response.status if mode == "head_only" ||
239
+ status_allowed?(response.status)
240
+ end
241
+
242
+ conn.get(url, {}, HEADERS).status
243
+ end
244
+
245
+ # Returns whether the status is successfull
246
+ def status_allowed?(status)
247
+ status >= 200 && status < 300
248
+ end
249
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllLinkChecker
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-link-checker
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Zakary Kamal Ismail
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-09-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.15'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday-cookie_jar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.6
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.0.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday_middleware
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.13'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.13'
55
+ - !ruby/object:Gem::Dependency
56
+ name: jekyll
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '5.0'
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '3.0'
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '5.0'
75
+ description: Verifies that all the links in a Jekyll website are valid.It can also
76
+ work with any static site generator.
77
+ email: zakary.kamal.fs@outlook.com
78
+ executables:
79
+ - jekyll-link-checker
80
+ extensions: []
81
+ extra_rdoc_files: []
82
+ files:
83
+ - Gemfile
84
+ - exe/jekyll-link-checker
85
+ - lib/jekyll-hook.rb
86
+ - lib/jekyll-link-checker.rb
87
+ - lib/link-checker.rb
88
+ - lib/version.rb
89
+ homepage: https://gitlab.com/ZakCodes/jekyll-link-checker
90
+ licenses:
91
+ - MIT
92
+ metadata:
93
+ bug_tracker_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/issues
94
+ changelog_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/-/releases
95
+ homepage_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
96
+ source_code_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubygems_version: 3.0.6
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: Checks all the links of a Jekyll website.
116
+ test_files: []