bookbindery 9.10.0 → 9.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bookbinder.gemspec +2 -3
- data/lib/bookbinder/commands/bind.rb +5 -6
- data/lib/bookbinder/commands/collection.rb +2 -2
- data/lib/bookbinder/css_link_checker.rb +1 -2
- data/lib/bookbinder/ingest/local_filesystem_cloner.rb +5 -2
- data/lib/bookbinder/local_filesystem_accessor.rb +8 -1
- data/lib/bookbinder/postprocessing/link_checker.rb +144 -0
- data/master_middleman/quicklinks_renderer.rb +1 -0
- data/master_middleman/source/layouts/layout.erb +1 -1
- data/master_middleman/source/stylesheets/base.scss +5 -0
- data/template_app/Gemfile +1 -1
- data/template_app/Gemfile.lock +1 -1
- metadata +7 -24
- data/lib/bookbinder/postprocessing/broken_links_checker.rb +0 -43
- data/lib/bookbinder/sieve.rb +0 -67
- data/lib/bookbinder/spider.rb +0 -77
- data/lib/bookbinder/stabilimentum.rb +0 -63
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a71017cf24127c36b2352c29a791a0615505e1e9
|
4
|
+
data.tar.gz: 52f02876f8af186158833b6d5e45abfa4b15a138
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e798bc960b21e114bef43adfc4beccce3f314183121233a8cbe94e6971308e5ebc5c601caff83168c55be95100ecf27c7956f0f73786076153935f3cee8045c
|
7
|
+
data.tar.gz: da0b1eca48164044ceab210bd53d6d5c2b3454a64c2f91dcab7d19bc917716de1ae91942b118ed4b9b0b99194f459c4b3e4f23fa28c7101a832bd60391665372
|
data/bookbinder.gemspec
CHANGED
@@ -2,7 +2,7 @@ require 'base64'
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'bookbindery'
|
5
|
-
s.version = '9.
|
5
|
+
s.version = '9.11.0'
|
6
6
|
s.summary = 'Markdown to Rackup application documentation generator'
|
7
7
|
s.description = 'A command line utility to be run in Book repositories to stitch together their constituent Markdown repos into a static-HTML-serving application'
|
8
8
|
s.authors = ['Mike Grafton', 'Lucas Marks', 'Gavin Morgan', 'Nikhil Gajwani', 'Dan Wendorf', 'Brenda Chan', 'Matthew Boedicker', 'Andrew Bruce', 'Frank Kotsianas', 'Elena Sharma', 'Christa Hartsock', 'Michael Trestman', 'Alpha Chen', 'Sarah McAlear', 'Gregg Van Hove']
|
@@ -22,7 +22,6 @@ Gem::Specification.new do |s|
|
|
22
22
|
s.add_runtime_dependency 'middleman-syntax', ['~> 2.0']
|
23
23
|
s.add_runtime_dependency 'rouge', '!= 1.9.1'
|
24
24
|
s.add_runtime_dependency 'redcarpet', ['~> 3.2.3']
|
25
|
-
s.add_runtime_dependency 'anemone'
|
26
25
|
s.add_runtime_dependency 'css_parser'
|
27
26
|
s.add_runtime_dependency 'puma'
|
28
27
|
s.add_runtime_dependency 'rack-rewrite'
|
@@ -37,6 +36,6 @@ Gem::Specification.new do |s|
|
|
37
36
|
s.add_development_dependency 'pry-byebug'
|
38
37
|
s.add_development_dependency 'rake'
|
39
38
|
s.add_development_dependency 'rspec'
|
40
|
-
s.add_development_dependency 'sendgrid-ruby'
|
39
|
+
s.add_development_dependency 'sendgrid-ruby', '< 3.0'
|
41
40
|
s.add_development_dependency 'jasmine'
|
42
41
|
end
|
@@ -11,7 +11,7 @@ module Bookbinder
|
|
11
11
|
config_decorator: nil,
|
12
12
|
file_system_accessor: nil,
|
13
13
|
middleman_runner: nil,
|
14
|
-
|
14
|
+
link_checker: nil,
|
15
15
|
preprocessor: nil,
|
16
16
|
cloner_factory: nil,
|
17
17
|
section_repository: nil,
|
@@ -23,7 +23,7 @@ module Bookbinder
|
|
23
23
|
@config_decorator = config_decorator
|
24
24
|
@file_system_accessor = file_system_accessor
|
25
25
|
@middleman_runner = middleman_runner
|
26
|
-
@
|
26
|
+
@link_checker = link_checker
|
27
27
|
@preprocessor = preprocessor
|
28
28
|
@cloner_factory = cloner_factory
|
29
29
|
@section_repository = section_repository
|
@@ -75,12 +75,11 @@ module Bookbinder
|
|
75
75
|
if generation_result.success?
|
76
76
|
file_system_accessor.copy(output_locations.build_dir, output_locations.public_dir)
|
77
77
|
|
78
|
-
|
79
|
-
broken_links_checker.announce(bind_options.streams)
|
78
|
+
link_checker.check!(bind_config.broken_link_exclusions)
|
80
79
|
|
81
80
|
bind_options.streams[:success].puts "Bookbinder bound your book into #{output_locations.final_app_dir}"
|
82
81
|
|
83
|
-
|
82
|
+
link_checker.has_errors? ? 1 : 0
|
84
83
|
else
|
85
84
|
bind_options.streams[:err].puts "Your bind failed. Rerun with --verbose to troubleshoot."
|
86
85
|
1
|
@@ -100,7 +99,7 @@ module Bookbinder
|
|
100
99
|
:output_locations,
|
101
100
|
:preprocessor,
|
102
101
|
:section_repository,
|
103
|
-
:
|
102
|
+
:link_checker,
|
104
103
|
:middleman_runner,
|
105
104
|
)
|
106
105
|
|
@@ -17,7 +17,7 @@ require_relative '../ingest/cloner_factory'
|
|
17
17
|
require_relative '../ingest/section_repository'
|
18
18
|
require_relative '../local_filesystem_accessor'
|
19
19
|
require_relative '../middleman_runner'
|
20
|
-
require_relative '../postprocessing/
|
20
|
+
require_relative '../postprocessing/link_checker'
|
21
21
|
require_relative '../preprocessing/dita_html_preprocessor'
|
22
22
|
require_relative '../preprocessing/dita_pdf_preprocessor'
|
23
23
|
require_relative '../preprocessing/link_to_site_gen_dir'
|
@@ -69,7 +69,7 @@ module Bookbinder
|
|
69
69
|
config_fetcher: configuration_fetcher(Config::Configuration),
|
70
70
|
config_decorator: Config::ConfigurationDecorator.new(loader: config_loader, config_filename: 'bookbinder.yml'),
|
71
71
|
file_system_accessor: local_filesystem_accessor,
|
72
|
-
|
72
|
+
link_checker: Postprocessing::LinkChecker.new(local_filesystem_accessor, final_app_directory, streams),
|
73
73
|
preprocessor: Preprocessing::Preprocessor.new(
|
74
74
|
Preprocessing::DitaHTMLPreprocessor.new(
|
75
75
|
local_filesystem_accessor,
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'css_parser'
|
2
|
-
require_relative 'spider'
|
3
2
|
|
4
3
|
module Bookbinder
|
5
4
|
class CssLinkChecker
|
@@ -24,7 +23,7 @@ module Bookbinder
|
|
24
23
|
css.each_selector do |s, declaration, sp|
|
25
24
|
contents_of_url_value = /url\((.*?)\)/
|
26
25
|
file_url = declaration.match contents_of_url_value
|
27
|
-
localized_uris <<
|
26
|
+
localized_uris << "#{URI(stylesheet).path} => #{file_url[1]}" if file_url
|
28
27
|
end
|
29
28
|
|
30
29
|
localized_uris
|
@@ -18,7 +18,8 @@ module Bookbinder
|
|
18
18
|
link!(
|
19
19
|
source_repo_name,
|
20
20
|
Pathname(user_repo_dir).join(source_repo_name.split('/').last),
|
21
|
-
Pathname(destination_parent_dir).join(DestinationDirectory.new(source_repo_name, destination_dir_name))
|
21
|
+
Pathname(destination_parent_dir).join(DestinationDirectory.new(source_repo_name, destination_dir_name)),
|
22
|
+
source_ref
|
22
23
|
)
|
23
24
|
end
|
24
25
|
|
@@ -26,7 +27,7 @@ module Bookbinder
|
|
26
27
|
|
27
28
|
attr_reader :streams, :filesystem, :user_repo_dir
|
28
29
|
|
29
|
-
def link!(source_repo_name, source_dir, dest_dir)
|
30
|
+
def link!(source_repo_name, source_dir, dest_dir, source_ref)
|
30
31
|
source_exists = filesystem.file_exist?(source_dir)
|
31
32
|
|
32
33
|
if source_exists && filesystem.file_exist?(dest_dir)
|
@@ -34,6 +35,7 @@ module Bookbinder
|
|
34
35
|
WorkingCopy.new(
|
35
36
|
copied_to: dest_dir,
|
36
37
|
full_name: source_repo_name,
|
38
|
+
ref: source_ref
|
37
39
|
)
|
38
40
|
elsif source_exists
|
39
41
|
announce(source_dir)
|
@@ -41,6 +43,7 @@ module Bookbinder
|
|
41
43
|
WorkingCopy.new(
|
42
44
|
copied_to: dest_dir,
|
43
45
|
full_name: source_repo_name,
|
46
|
+
ref: source_ref
|
44
47
|
)
|
45
48
|
else
|
46
49
|
streams[:out].puts " skipping (not found) #{source_dir}"
|
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'find'
|
2
2
|
require 'pathname'
|
3
|
-
require 'nokogiri'
|
4
3
|
require_relative 'errors/programmer_mistake'
|
5
4
|
|
6
5
|
module Bookbinder
|
@@ -10,6 +9,14 @@ module Bookbinder
|
|
10
9
|
File.exist?(path)
|
11
10
|
end
|
12
11
|
|
12
|
+
def is_file?(path)
|
13
|
+
File.file?(path)
|
14
|
+
end
|
15
|
+
|
16
|
+
def is_dir?(path)
|
17
|
+
Dir.exists?(path)
|
18
|
+
end
|
19
|
+
|
13
20
|
def write(to: nil, text: nil)
|
14
21
|
make_directory(File.dirname to)
|
15
22
|
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require_relative '../css_link_checker'
|
3
|
+
|
4
|
+
module Bookbinder
|
5
|
+
module Postprocessing
|
6
|
+
class LinkChecker
|
7
|
+
def initialize(fs, root_path, output_streams)
|
8
|
+
@fs = fs
|
9
|
+
@root_path = root_path
|
10
|
+
@output_streams = output_streams
|
11
|
+
@broken_link_count = 0
|
12
|
+
@redirect_regexes = {}
|
13
|
+
@redirect_strings = {}
|
14
|
+
|
15
|
+
@convert_to_relative = %r{\A.*#{root_path.to_s}/public}
|
16
|
+
@default_link_exclusions = %r{\A(?:https?://|javascript:|mailto:)}
|
17
|
+
@excluded_pages = %r{\A/(?:404\.html|subnavs|javascripts|stylesheets|style_guide)}
|
18
|
+
end
|
19
|
+
|
20
|
+
def check!(link_exclusions = /(?!.*)/)
|
21
|
+
@output_streams[:out].puts "\nChecking for broken links..."
|
22
|
+
load_redirects!
|
23
|
+
load_page_links
|
24
|
+
|
25
|
+
report_broken_links!(link_exclusions)
|
26
|
+
report_orphaned_pages!
|
27
|
+
|
28
|
+
if has_errors?
|
29
|
+
err "\nFound #{@broken_link_count} broken links!"
|
30
|
+
else
|
31
|
+
out "\nNo broken links!"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def has_errors?
|
36
|
+
@broken_link_count > 0
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def report_broken_links!(link_exclusions)
|
42
|
+
@page_links.each do |page, links|
|
43
|
+
links.each do |link|
|
44
|
+
next if skip?(link, link_exclusions)
|
45
|
+
|
46
|
+
absolute_link, fragment = normalize_link(link, page)
|
47
|
+
|
48
|
+
if !page_exists?(absolute_link) && !file_exists?(absolute_link)
|
49
|
+
@broken_link_count += 1
|
50
|
+
err "#{page} => #{absolute_link}#{fragment ? "##{fragment}" : ''}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
broken_css_links = Dir.chdir(@root_path) { CssLinkChecker.new.broken_links_in_all_stylesheets }
|
56
|
+
|
57
|
+
@broken_link_count += broken_css_links.size
|
58
|
+
broken_css_links.each do |link|
|
59
|
+
err link
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def report_orphaned_pages!
|
64
|
+
linked_pages = @page_links.map do |page, links|
|
65
|
+
links.map do |link|
|
66
|
+
normalize_link(link, page)[0]
|
67
|
+
end
|
68
|
+
end.flatten.uniq
|
69
|
+
|
70
|
+
orphaned_pages = @page_links.keys.reject { |page| page == '/index.html' || linked_pages.include?(page) }
|
71
|
+
if orphaned_pages.size > 0
|
72
|
+
err "\nOrphaned pages"
|
73
|
+
orphaned_pages.each do |page|
|
74
|
+
err "No links to => #{page}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def skip?(link_path, link_exclusions)
|
80
|
+
@default_link_exclusions.match(link_path) || link_path.match(link_exclusions)
|
81
|
+
end
|
82
|
+
|
83
|
+
def page_exists?(link)
|
84
|
+
@page_links.has_key?(link) ||
|
85
|
+
@redirect_strings.has_key?(link) ||
|
86
|
+
@redirect_regexes.keys.detect {|reg| reg.match(link)}
|
87
|
+
end
|
88
|
+
|
89
|
+
def normalize_link(link, page)
|
90
|
+
return [page, link.sub(/\A#/, '')] if link[0] == '#'
|
91
|
+
|
92
|
+
absolute_link = link[0] == '/' ? link : File.expand_path(link, File.dirname(page))
|
93
|
+
absolute_link.split('#')
|
94
|
+
end
|
95
|
+
|
96
|
+
def file_exists?(link)
|
97
|
+
full_path = File.join(@root_path, 'public', link)
|
98
|
+
@fs.is_file?(full_path) || (@fs.is_dir?(full_path) && @fs.is_file?(File.join(full_path, 'index.html')))
|
99
|
+
end
|
100
|
+
|
101
|
+
def load_page_links
|
102
|
+
files = @fs.find_files_with_ext('html', File.join(@root_path, 'public'))
|
103
|
+
|
104
|
+
@page_links = files.each.with_object({}) do |file_path, links|
|
105
|
+
public_path = file_path.sub(@convert_to_relative, '')
|
106
|
+
|
107
|
+
if !@excluded_pages.match(public_path)
|
108
|
+
html = Nokogiri::HTML(@fs.read(file_path))
|
109
|
+
|
110
|
+
links[public_path] = html.css('a[href]').map { |link| link['href'] }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def load_redirects!
|
116
|
+
redirects_path = File.join(@root_path, 'redirects.rb')
|
117
|
+
if @fs.is_file?(redirects_path)
|
118
|
+
contents = @fs.read(redirects_path)
|
119
|
+
instance_eval contents
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def out(str)
|
124
|
+
@output_streams[:out].puts(str)
|
125
|
+
end
|
126
|
+
|
127
|
+
def err(str)
|
128
|
+
@output_streams[:err].puts(str)
|
129
|
+
end
|
130
|
+
|
131
|
+
def r301(source, dest, options={})
|
132
|
+
return if options.has_key?(:if)
|
133
|
+
|
134
|
+
case source
|
135
|
+
when Regexp
|
136
|
+
@redirect_regexes[source] = dest
|
137
|
+
when String
|
138
|
+
@redirect_strings[source] = dest
|
139
|
+
end
|
140
|
+
end
|
141
|
+
alias r302 r301
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -45,6 +45,7 @@ class QuicklinksRenderer < Redcarpet::Render::Base
|
|
45
45
|
doc = Nokogiri::HTML(text)
|
46
46
|
target_anchor = doc.css('a').first
|
47
47
|
return unless target_anchor && target_anchor['id']
|
48
|
+
return if (target_anchor['class'] || '').match(/\bno-quick-link\b/)
|
48
49
|
|
49
50
|
anchor = Nokogiri::XML::Node.new('a', document)
|
50
51
|
anchor['href'] = "##{target_anchor['id']}"
|
@@ -43,7 +43,7 @@
|
|
43
43
|
<%= partial 'layouts/local-header' %>
|
44
44
|
<%= partial 'layouts/title' %>
|
45
45
|
<% if quick_links %>
|
46
|
-
<div id="js-quick-links"
|
46
|
+
<div id="js-quick-links" <%= 'class="list-style-none"' if data.page.list_style_none %>>
|
47
47
|
<%= quick_links %>
|
48
48
|
</div>
|
49
49
|
<% end %>
|
@@ -198,6 +198,10 @@ category: basics
|
|
198
198
|
}
|
199
199
|
}
|
200
200
|
|
201
|
+
.list-style-none .quick-links > ul {
|
202
|
+
list-style-type: none;
|
203
|
+
}
|
204
|
+
|
201
205
|
// ~CONTENT
|
202
206
|
// ===================================================
|
203
207
|
// Designate external links with an icon
|
@@ -344,6 +348,7 @@ category: basics
|
|
344
348
|
bottom: 4em;
|
345
349
|
position: fixed;
|
346
350
|
right: 4em;
|
351
|
+
z-index: 10;
|
347
352
|
&:hover {
|
348
353
|
background: none;
|
349
354
|
}
|
data/template_app/Gemfile
CHANGED
data/template_app/Gemfile.lock
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bookbindery
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 9.
|
4
|
+
version: 9.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Grafton
|
@@ -22,7 +22,7 @@ authors:
|
|
22
22
|
autorequire:
|
23
23
|
bindir: install_bin
|
24
24
|
cert_chain: []
|
25
|
-
date: 2016-06-
|
25
|
+
date: 2016-06-21 00:00:00.000000000 Z
|
26
26
|
dependencies:
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: fog-aws
|
@@ -122,20 +122,6 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: 3.2.3
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: anemone
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - ">="
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0'
|
132
|
-
type: :runtime
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - ">="
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0'
|
139
125
|
- !ruby/object:Gem::Dependency
|
140
126
|
name: css_parser
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -322,16 +308,16 @@ dependencies:
|
|
322
308
|
name: sendgrid-ruby
|
323
309
|
requirement: !ruby/object:Gem::Requirement
|
324
310
|
requirements:
|
325
|
-
- - "
|
311
|
+
- - "<"
|
326
312
|
- !ruby/object:Gem::Version
|
327
|
-
version: '0'
|
313
|
+
version: '3.0'
|
328
314
|
type: :development
|
329
315
|
prerelease: false
|
330
316
|
version_requirements: !ruby/object:Gem::Requirement
|
331
317
|
requirements:
|
332
|
-
- - "
|
318
|
+
- - "<"
|
333
319
|
- !ruby/object:Gem::Version
|
334
|
-
version: '0'
|
320
|
+
version: '3.0'
|
335
321
|
- !ruby/object:Gem::Dependency
|
336
322
|
name: jasmine
|
337
323
|
requirement: !ruby/object:Gem::Requirement
|
@@ -405,16 +391,13 @@ files:
|
|
405
391
|
- lib/bookbinder/ingest/working_copy.rb
|
406
392
|
- lib/bookbinder/local_filesystem_accessor.rb
|
407
393
|
- lib/bookbinder/middleman_runner.rb
|
408
|
-
- lib/bookbinder/postprocessing/
|
394
|
+
- lib/bookbinder/postprocessing/link_checker.rb
|
409
395
|
- lib/bookbinder/preprocessing/dita_html_preprocessor.rb
|
410
396
|
- lib/bookbinder/preprocessing/dita_pdf_preprocessor.rb
|
411
397
|
- lib/bookbinder/preprocessing/link_to_site_gen_dir.rb
|
412
398
|
- lib/bookbinder/preprocessing/preprocessor.rb
|
413
399
|
- lib/bookbinder/server_director.rb
|
414
400
|
- lib/bookbinder/sheller.rb
|
415
|
-
- lib/bookbinder/sieve.rb
|
416
|
-
- lib/bookbinder/spider.rb
|
417
|
-
- lib/bookbinder/stabilimentum.rb
|
418
401
|
- lib/bookbinder/streams/colorized_stream.rb
|
419
402
|
- lib/bookbinder/streams/filter_stream.rb
|
420
403
|
- lib/bookbinder/subnav/navigation_entries_from_html_toc.rb
|
@@ -1,43 +0,0 @@
|
|
1
|
-
require_relative '../server_director'
|
2
|
-
require_relative '../spider'
|
3
|
-
require_relative '../../../template_app/rack_app'
|
4
|
-
|
5
|
-
module Bookbinder
|
6
|
-
module Postprocessing
|
7
|
-
class BrokenLinksChecker
|
8
|
-
def self.build(final_app_directory, port)
|
9
|
-
new(
|
10
|
-
Spider.new(app_dir: final_app_directory),
|
11
|
-
ServerDirector.new(
|
12
|
-
app: RackApp.new(Pathname('redirects.rb'), auth_required: false).app,
|
13
|
-
directory: final_app_directory,
|
14
|
-
port: port
|
15
|
-
)
|
16
|
-
)
|
17
|
-
end
|
18
|
-
|
19
|
-
def initialize(spider, server_director)
|
20
|
-
@spider = spider
|
21
|
-
@server_director = server_director
|
22
|
-
end
|
23
|
-
|
24
|
-
def check!(broken_link_exclusions)
|
25
|
-
server_director.use_server { |port|
|
26
|
-
@result = spider.find_broken_links(port, broken_link_exclusions: broken_link_exclusions)
|
27
|
-
}
|
28
|
-
end
|
29
|
-
|
30
|
-
def announce(streams)
|
31
|
-
@result.announce_broken_links(streams)
|
32
|
-
end
|
33
|
-
|
34
|
-
def has_broken_links?
|
35
|
-
@result.has_broken_links?
|
36
|
-
end
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
attr_reader :server_director, :spider
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
data/lib/bookbinder/sieve.rb
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
require_relative 'spider'
|
2
|
-
|
3
|
-
module Bookbinder
|
4
|
-
class Sieve
|
5
|
-
def initialize(domain: ->(){ raise 'You must supply a domain parameter.' }.call)
|
6
|
-
@unverified_fragments_by_url = {}
|
7
|
-
@domain = domain
|
8
|
-
end
|
9
|
-
|
10
|
-
def links_from(page, is_first_pass)
|
11
|
-
if page.not_found? && page.referer
|
12
|
-
working = []
|
13
|
-
broken = [Spider.prepend_location(page.referer, page.url)]
|
14
|
-
elsif page.not_found?
|
15
|
-
working = []
|
16
|
-
broken = []
|
17
|
-
else
|
18
|
-
working = [page.url.to_s]
|
19
|
-
broken = broken_fragments_targeting(page, is_first_pass)
|
20
|
-
store_unverified_fragments_from(page) if is_first_pass
|
21
|
-
end
|
22
|
-
|
23
|
-
return broken, working
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def store_unverified_fragments_from(page)
|
29
|
-
@unverified_fragments_by_url.merge! fragments_targeting_other_pages_from page
|
30
|
-
end
|
31
|
-
|
32
|
-
def broken_fragments_targeting(page, first_pass)
|
33
|
-
first_pass ? local_fragments_missing_from(page) : remote_fragments_missing_from(page)
|
34
|
-
end
|
35
|
-
|
36
|
-
def local_fragments_missing_from(page)
|
37
|
-
local_fragments = page.fragment_identifiers targeting_locally: true
|
38
|
-
local_fragments.reject { |uri| page.has_target_for?(uri) }.map { |uri| Spider.prepend_location(page.url, uri) }
|
39
|
-
end
|
40
|
-
|
41
|
-
def remote_fragments_missing_from(page)
|
42
|
-
@unverified_fragments_by_url.fetch(page.url, []).reject { |localized_identifier| page.has_target_for? URI(strip_location(localized_identifier)) }
|
43
|
-
end
|
44
|
-
|
45
|
-
def fragments_targeting_other_pages_from(page)
|
46
|
-
uris_with_fragments = page.fragment_identifiers(targeting_locally: false)
|
47
|
-
uris_with_fragments.reduce({}) { |dict, uri| merge_uris_under_targets(dict, page, uri) }
|
48
|
-
end
|
49
|
-
|
50
|
-
def merge_uris_under_targets(dict, page, uri)
|
51
|
-
target_url = URI::join @domain, uri.path
|
52
|
-
localized_identifier = Spider.prepend_location(page.url, "##{uri.fragment}")
|
53
|
-
|
54
|
-
if dict.has_key? target_url
|
55
|
-
dict[target_url] << localized_identifier
|
56
|
-
else
|
57
|
-
dict[target_url] = [localized_identifier]
|
58
|
-
end
|
59
|
-
|
60
|
-
dict
|
61
|
-
end
|
62
|
-
|
63
|
-
def strip_location(id)
|
64
|
-
id.split('=> ').last
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
data/lib/bookbinder/spider.rb
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
require 'anemone'
|
2
|
-
require 'pty'
|
3
|
-
require_relative 'css_link_checker'
|
4
|
-
require_relative 'sieve'
|
5
|
-
require_relative 'stabilimentum'
|
6
|
-
|
7
|
-
module Bookbinder
|
8
|
-
class Spider
|
9
|
-
class Result
|
10
|
-
def initialize(broken_links)
|
11
|
-
@broken_links = broken_links
|
12
|
-
end
|
13
|
-
|
14
|
-
def has_broken_links?
|
15
|
-
@broken_links.any?
|
16
|
-
end
|
17
|
-
|
18
|
-
def announce_broken_links(streams)
|
19
|
-
if @broken_links.none?
|
20
|
-
streams[:out].puts "\nNo broken links!"
|
21
|
-
else
|
22
|
-
streams[:err].puts(<<-MESSAGE)
|
23
|
-
|
24
|
-
Found #{@broken_links.count} broken links!
|
25
|
-
|
26
|
-
#{@broken_links.sort.join("\n")}
|
27
|
-
|
28
|
-
Found #{@broken_links.count} broken links!
|
29
|
-
MESSAGE
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.prepend_location(location, url)
|
35
|
-
"#{URI(location).path} => #{url}"
|
36
|
-
end
|
37
|
-
|
38
|
-
def initialize(app_dir: nil)
|
39
|
-
@app_dir = app_dir || raise('Spiders must be initialized with an app directory.')
|
40
|
-
end
|
41
|
-
|
42
|
-
def find_broken_links(port, broken_link_exclusions: /(?!.*)/)
|
43
|
-
temp_host = "localhost:#{port}"
|
44
|
-
sieve = Sieve.new domain: "http://#{temp_host}"
|
45
|
-
broken_links = crawl_from "http://#{temp_host}#{ENV['CUSTOM_ROOT']}/index.html", sieve
|
46
|
-
public_broken_links = broken_links.reject {|l| l.match(broken_link_exclusions)}
|
47
|
-
|
48
|
-
Result.new(public_broken_links)
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
attr_reader :app_dir
|
54
|
-
|
55
|
-
def crawl_from(url, sieve)
|
56
|
-
broken_links = []
|
57
|
-
2.times do |i|
|
58
|
-
is_first_pass = (i==0)
|
59
|
-
|
60
|
-
Anemone.crawl(url, discard_page_bodies: true) do |anemone|
|
61
|
-
dont_visit_fragments(anemone)
|
62
|
-
anemone.on_every_page do |page|
|
63
|
-
broken, working = sieve.links_from(Stabilimentum.new(page), is_first_pass)
|
64
|
-
broken_links.concat broken
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
broken_links.concat Dir.chdir(@app_dir) { CssLinkChecker.new.broken_links_in_all_stylesheets }
|
70
|
-
broken_links.compact.uniq
|
71
|
-
end
|
72
|
-
|
73
|
-
def dont_visit_fragments(anemone)
|
74
|
-
anemone.focus_crawl { |page| page.links.reject { |link| link.to_s.match(/%23/) } }
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
@@ -1,63 +0,0 @@
|
|
1
|
-
module Bookbinder
|
2
|
-
class Spider
|
3
|
-
class Stabilimentum # Decorates a piece of the web.
|
4
|
-
FudgedUri = Struct.new(:path, :fragment, :to_s)
|
5
|
-
|
6
|
-
def initialize(page)
|
7
|
-
@page = page
|
8
|
-
end
|
9
|
-
|
10
|
-
def referer
|
11
|
-
@page.referer
|
12
|
-
end
|
13
|
-
|
14
|
-
def not_found?
|
15
|
-
@page.not_found?
|
16
|
-
end
|
17
|
-
|
18
|
-
def url
|
19
|
-
@page.url
|
20
|
-
end
|
21
|
-
|
22
|
-
def has_target_for?(uri)
|
23
|
-
id_selector = uri.fragment
|
24
|
-
name_selector = "[name=#{uri.fragment}]"
|
25
|
-
|
26
|
-
if @page.doc
|
27
|
-
@page.doc.css("##{id_selector}").any? || @page.doc.css(name_selector).any?
|
28
|
-
else
|
29
|
-
false
|
30
|
-
end
|
31
|
-
rescue Nokogiri::CSS::SyntaxError
|
32
|
-
false
|
33
|
-
end
|
34
|
-
|
35
|
-
def fragment_identifiers(targeting_locally: false)
|
36
|
-
if targeting_locally
|
37
|
-
fragment_anchor_uris.select { |uri| uri.path.empty? }
|
38
|
-
else
|
39
|
-
fragment_anchor_uris.reject { |uri| uri.path.empty? }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
private
|
44
|
-
|
45
|
-
def fragment_anchor_uris
|
46
|
-
anchors = @page.doc ? @page.doc.css('a') : []
|
47
|
-
anchors.map { |a| convert_to_uri(a) }.select { |u| u.fragment }
|
48
|
-
end
|
49
|
-
|
50
|
-
def convert_to_uri(anchor)
|
51
|
-
URI anchor['href'].to_s
|
52
|
-
rescue URI::InvalidURIError
|
53
|
-
create_fudged_uri(anchor['href'])
|
54
|
-
end
|
55
|
-
|
56
|
-
def create_fudged_uri(target)
|
57
|
-
path = target.split('#')[0]
|
58
|
-
fragment = target.include?('#') ? '#' + target.split('#')[1] : nil
|
59
|
-
FudgedUri.new(path, fragment, target)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|