jekyll-link-checker 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ab9d005b30ac7520a5f7912083966a50948c0d21d4e713bbb9639358dd033f77
4
+ data.tar.gz: ed02ed3ec9066ce54f4f74730c1de5d50206e83e39737eb8dead37ce4ae1f7ac
5
+ SHA512:
6
+ metadata.gz: 35940136ad2ddb9ccac822abf3f1fe3999c52b92052e732c9eb846be0eadc21f9b9b3f6de2fe61b4395c7ded8706c9e57cd22ad86bf1fb6c6bcaa4446785d44a
7
+ data.tar.gz: ef06e898f334b7d6a49f10af2ab9a084e77bf890fca8cd37903083483363c8e81a4b41c8af7da96fb7318eda1701747cfd951295018cb427944c4bee8f5cb977
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+ gemspec
@@ -0,0 +1,63 @@
1
+ require 'optionparser'
2
+ require 'yaml'
3
+ require 'jekyll-link-checker'
4
+
5
+ # Find the config file
6
+ config = nil
7
+ ['.yml', '.yaml'].each do |ext|
8
+ file = "_config#{ext}"
9
+ begin
10
+ config = YAML.load_file(file)
11
+ break
12
+ rescue
13
+ end
14
+ end
15
+
16
+ link_checker = if config
17
+ LinkChecker.from_config(config)
18
+ else
19
+ puts "`_config.yml` or `_config.yaml` couldn't be read"
20
+ puts "The default configuration will be used"
21
+ LinkChecker.new
22
+ end
23
+
24
+ parser = OptionParser.new do |opts|
25
+ opts.banner = "Usage: jekyll-link-checker [options]"
26
+
27
+ opts.on("-n HOSTNAME", "--hostname HOSTNAME", "Hostname of the site. Ex.: example.com") do |hostname|
28
+ link_checker.hostname = hostname
29
+ end
30
+ opts.on("-b BASEURL", "--baseurl BASEURL", "(this option is currently ignored)") do |baseurl|
31
+ link_checker.baseurl = baseurl
32
+ end
33
+ opts.on("-d SITE_FOLDER", "--site-folder SITE_FOLDER", "Path to the site's folder") do |site_folder|
34
+ link_checker.site_folder = site_folder
35
+ end
36
+ opts.on("-s [ARRAY]", "--skip-list [ARRAY]", Array, "Comma separated list of links not to check") do |skip_list|
37
+ link_checker.update_skip_list(skip_list)
38
+ end
39
+ opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
40
+ link_checker.update_skip_list(skip_list)
41
+ end
42
+ opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status\n" \
43
+ "head-only: Only tries to do a HEAD request\n" \
44
+ "get-only: Only tries to do a GET request") do |mode|
45
+ link_checker.mode = mode
46
+ end
47
+ opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
48
+ link_checker.fail_fast = true
49
+ end
50
+ opts.on("-a", "--[no]-abort", "Abort the program on the first invalid link") do |abort_on_failure|
51
+ link_checker.abort_on_failure = abort_on_failure
52
+ end
53
+ opts.on("-V", "--verbose", "Run with verbose output") do |_|
54
+ link_checker.verbose = true
55
+ end
56
+ opts.on_tail("-v", "--version", "Outputs the version and exit") do |_|
57
+ puts JekyllLinkChecker::VERSION
58
+ exit
59
+ end
60
+ end
61
+
62
+ parser.parse!
63
+ link_checker.check_links
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "jekyll"
4
+
5
+ Jekyll::Hooks.register :site, :post_write, priority: 0 do |site|
6
+ link_checker = LinkChecker.from_config(site.config)
7
+ link_checker.check_links
8
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "link-checker"
4
+ require_relative "jekyll-hook"
5
+ require_relative "version"
@@ -0,0 +1,249 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "faraday_middleware"
5
+ require "faraday-cookie_jar"
6
+ require "addressable"
7
+ require "pathname"
8
+
9
+ # Checks all the links of a
10
+ class LinkChecker
11
+ HEADERS = {
12
+ "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
13
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
14
+ "Chrome/41.0.2228.0 Safari/537.36",
15
+ "Accept" => "text/html," \
16
+ "application/xhtml+xml," \
17
+ "application/xml;" \
18
+ "q=0.9,*/*;q=0.8",
19
+ "Accept-Language" => "en-US,en;q=0.5",
20
+ "DNT" => "1",
21
+ "Upgrade-Insecure-Requests" => "1",
22
+ "Pragma" => "no-cache",
23
+ "Cache-Control" => "no-cache"
24
+ }.freeze
25
+
26
+ HREF = /href="([^#"\n][^"\n]*)"/.freeze
27
+ HTML = %w[.html .htm].freeze
28
+ SCHEMES = %w[https http].freeze
29
+
30
+ DEFAULT_BASE_URL = "/"
31
+ DEFAULT_SITE_FOLDER = "_site"
32
+ DEFAULT_MODE = "try_head"
33
+
34
+ attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
35
+ :fail_fast, :abort_on_failure
36
+ attr_writer :files, :html_files, :links
37
+
38
+ # Set default values for all the properties
39
+ def initialize(opts = {})
40
+ @hostname = opts[:hostname]
41
+ @baseurl = opts[:baseurl] || DEFAULT_BASE_URL
42
+ @site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
43
+
44
+ update_skip_list(opts[:skip_list] || [])
45
+
46
+ @mode = opts[:mode] || DEFAULT_MODE
47
+ @verbose = opts[:verbose]
48
+ @fail_fast = opts[:fail_fast]
49
+
50
+ abort_on_failure = opts[:abort_on_failure]
51
+ @abort_on_failure = abort_on_failure.nil? ? true : abort_on_failure
52
+ end
53
+
54
+ # Initialize the link checker from a Jekyll configuration file
55
+ def self.from_config(config)
56
+ opts = {}
57
+
58
+ url = config["url"]
59
+ if url
60
+ uri = Addressable::URI.parse(url)
61
+ opts[:hostname] = uri.hostname
62
+ end
63
+
64
+ opts[:baseurl] = config["baseurl"]
65
+ opts[:site_folder] = config["destination"]
66
+
67
+ link_checker_config = config["link-checker"]
68
+ if link_checker_config
69
+ opts[:skip_list] = link_checker_config["skip-list"]
70
+ opts[:mode] = link_checker_config["mode"]
71
+ opts[:verbose] = link_checker_config["verbose"]
72
+ opts[:fail_fast] = link_checker_config["fail-fast"]
73
+ opts[:abort_on_failure] = link_checker_config["abort"]
74
+ end
75
+
76
+ LinkChecker.new(opts)
77
+ end
78
+
79
+ # Whether the options are valid
80
+ def valid?
81
+ return false unless @hostname
82
+
83
+ true
84
+ end
85
+
86
+ # Updates skip_list with the given argument.
87
+ # If the argument is an array, skip_list is set to the array.
88
+ # If the argument is a string, the argument will be interpreted as a filename
89
+ # where each line is a filename.
90
+ def update_skip_list(skip_list)
91
+ if skip_list.is_a?(Array)
92
+ @skip_list = skip_list
93
+ elsif skip_list.is_a?(String)
94
+ begin
95
+ @skip_list = File.readlines(File.expand_path(skip_list)).map(&:strip)
96
+ rescue StandardError => e
97
+ warn "Couldn't read the skip list"
98
+ raise e
99
+ end
100
+ @skip_list.reject!(&:empty?)
101
+ else
102
+ raise ArgumentError, "skip_list must be a String or an array of String"
103
+ end
104
+ end
105
+
106
+ # Checks all the links
107
+ def check_links
108
+ # Make sure the configuration is valid
109
+ abort "Invalid configuration" unless valid?
110
+
111
+ # basepath = @baseurl ? Addressable::URI.parse(@baseurl).path : "/"
112
+
113
+ conn = create_connection
114
+
115
+ # Test each link
116
+ error_count = 0
117
+ i = 0
118
+ prev_msg_size = 0
119
+ links.each do |link, files|
120
+ i += 1
121
+ if verbose
122
+ prev_msg_size.times { print " " }
123
+ msg = "#{link} #{i}/#{links.size}"
124
+ print "\r#{msg}\r"
125
+ prev_msg_size = msg.size
126
+ end
127
+
128
+ # Skip the link if it's in the skip list
129
+ next if @skip_list.include?(link)
130
+
131
+ # Parse the uri
132
+ uri = Addressable::URI.parse(link)
133
+ next if uri.site&.end_with?(":")
134
+
135
+ error = false
136
+
137
+ # If the link is internal
138
+ if uri.hostname.nil? || uri.hostname == hostname
139
+ uri.path.chomp!("/")
140
+
141
+ # If the uri can't be found in the site's file
142
+ unless valid_links.include?(uri.path)
143
+ puts "Invalid internal link '#{link}' is present in:"
144
+ files.each { |file| puts "\t#{file}" }
145
+ error = true
146
+ end
147
+ elsif uri.scheme.nil? || SCHEMES.include?(uri.scheme)
148
+ status = make_request(conn, link)
149
+ error = !status_allowed?(status)
150
+ puts "Request to #{link} in #{files} returned #{status}" if error
151
+ end
152
+
153
+ next unless error
154
+
155
+ error_count += 1
156
+ if fail_fast
157
+ abort if abort_on_failure
158
+ return nil
159
+ end
160
+ end
161
+
162
+ puts if verbose
163
+
164
+ if error_count != 0
165
+ msg = "There were #{error_count} invalid links"
166
+ if @abort_on_failure
167
+ then abort msg
168
+ else puts msg
169
+ end
170
+ end
171
+
172
+ error_count
173
+ end
174
+
175
+ # Find all files in the site folder
176
+ def files
177
+ return @files unless @files.nil?
178
+
179
+ @files = Dir[File.join(@site_folder, "**/*")].select { |f| File.file?(f) }
180
+ @files
181
+ end
182
+
183
+ # Find all the valid links for the site
184
+ def valid_links
185
+ return @valid_links if @valid_links
186
+
187
+ @valid_links = files.map do |file|
188
+ path = Pathname.new(file)
189
+ path = path.relative_path_from(@site_folder)
190
+ path = "/" + path.to_s
191
+ path.chomp!("index.html")
192
+ path.chomp!("/")
193
+ path
194
+ end
195
+ end
196
+
197
+ # Find all HTML files
198
+ def html_files
199
+ return @html_files if @html_files
200
+
201
+ @html_files = files.filter { |file| HTML.include?(File.extname(file)) }
202
+ end
203
+
204
+ # Find all links in html_files
205
+ def links
206
+ return @links if @links
207
+
208
+ @links = {}
209
+ html_files.each do |file|
210
+ File.open(file).read.scan(HREF)
211
+ .map { |match| match[0].strip }
212
+ .uniq.each do |link|
213
+ link_files = @links[link]
214
+ if link_files
215
+ then link_files.push(file)
216
+ else @links[link] = [file]
217
+ end
218
+ end
219
+ end
220
+ @links
221
+ end
222
+
223
+ private
224
+
225
+ # Create a connection to make requests
226
+ def create_connection
227
+ Faraday.new do |faraday|
228
+ faraday.use FaradayMiddleware::FollowRedirects
229
+ faraday.use :cookie_jar
230
+ faraday.adapter Faraday.default_adapter
231
+ end
232
+ end
233
+
234
+ # Make a request on the connection for the URL
235
+ def make_request(conn, url)
236
+ if @mode != "get_only"
237
+ response = conn.head(url, {}, HEADERS)
238
+ return response.status if mode == "head_only" ||
239
+ status_allowed?(response.status)
240
+ end
241
+
242
+ conn.get(url, {}, HEADERS).status
243
+ end
244
+
245
+ # Returns whether the status is successfull
246
+ def status_allowed?(status)
247
+ status >= 200 && status < 300
248
+ end
249
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllLinkChecker
4
+ VERSION = "0.1.0"
5
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jekyll-link-checker
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Zakary Kamal Ismail
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-09-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.15'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday-cookie_jar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.0.6
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.0.6
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday_middleware
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.13'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.13'
55
+ - !ruby/object:Gem::Dependency
56
+ name: jekyll
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '5.0'
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '3.0'
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '5.0'
75
+ description: Verifies that all the links in a Jekyll website are valid.It can also
76
+ work with any static site generator.
77
+ email: zakary.kamal.fs@outlook.com
78
+ executables:
79
+ - jekyll-link-checker
80
+ extensions: []
81
+ extra_rdoc_files: []
82
+ files:
83
+ - Gemfile
84
+ - exe/jekyll-link-checker
85
+ - lib/jekyll-hook.rb
86
+ - lib/jekyll-link-checker.rb
87
+ - lib/link-checker.rb
88
+ - lib/version.rb
89
+ homepage: https://gitlab.com/ZakCodes/jekyll-link-checker
90
+ licenses:
91
+ - MIT
92
+ metadata:
93
+ bug_tracker_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/issues
94
+ changelog_uri: https://gitlab.com/ZakCodes/jekyll-link-checker/-/releases
95
+ homepage_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
96
+ source_code_uri: https://gitlab.com/ZakCodes/jekyll-link-checker
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubygems_version: 3.0.6
113
+ signing_key:
114
+ specification_version: 4
115
+ summary: Checks all the links of a Jekyll website.
116
+ test_files: []