site_checker 0.1.1 → 0.2.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rbenv-version +1 -0
- data/.rspec +1 -0
- data/History.md +9 -0
- data/LICENSE +29 -0
- data/README.md +102 -0
- data/gem_tasks/rspec.rake +6 -0
- data/gem_tasks/yard.rake +6 -0
- data/lib/site_checker/dsl.rb +17 -0
- data/lib/site_checker/io/content_from_file_system.rb +43 -0
- data/lib/site_checker/io/content_from_web.rb +36 -0
- data/lib/site_checker/link.rb +60 -0
- data/lib/site_checker/link_collector.rb +153 -0
- data/lib/site_checker/parse/page.rb +82 -0
- data/lib/site_checker.rb +90 -206
- data/site_checker.gemspec +24 -0
- data/spec/dsl_spec.rb +37 -0
- data/spec/integration_spec.rb +191 -0
- data/spec/site_checker/io/content_from_file_system_spec.rb +61 -0
- data/spec/site_checker/io/content_from_web_spec.rb +46 -0
- data/spec/site_checker/io/io_spec_helper.rb +22 -0
- data/spec/site_checker/link_collector_spec.rb +41 -0
- data/spec/site_checker/link_spec.rb +94 -0
- data/spec/site_checker/parse/page_spec.rb +71 -0
- data/spec/site_checker/parse/parse_spec_helper.rb +8 -0
- data/spec/spec_helper.rb +10 -0
- metadata +134 -66
data/lib/site_checker.rb
CHANGED
@@ -1,224 +1,108 @@
|
|
1
|
-
require 'nokogiri'
|
2
1
|
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
private
|
43
|
-
def process_local_page(url, parent_url)
|
44
|
-
links = collect_links(url, parent_url)
|
45
|
-
|
46
|
-
filter_out_working_anchors!(links)
|
47
|
-
report_and_remove_anchors!(links, parent_url)
|
48
|
-
|
49
|
-
links.each do |link, kind|
|
50
|
-
if kind != :anchor
|
51
|
-
visit(kind, url, link) unless visited?(kind, link)
|
52
|
-
else
|
53
|
-
end
|
4
|
+
require 'site_checker/io/content_from_file_system'
|
5
|
+
require 'site_checker/io/content_from_web'
|
6
|
+
require 'site_checker/parse/page'
|
7
|
+
require 'site_checker/link'
|
8
|
+
require 'site_checker/link_collector'
|
9
|
+
require 'site_checker/dsl'
|
10
|
+
|
11
|
+
module SiteChecker
|
12
|
+
class << self
|
13
|
+
attr_accessor :ignore_list
|
14
|
+
attr_accessor :visit_references
|
15
|
+
attr_accessor :max_recursion_depth
|
16
|
+
attr_accessor :dsl_enabled
|
17
|
+
attr_reader :link_collector
|
18
|
+
|
19
|
+
##
|
20
|
+
# The following configuration options, which can be used together, are available:
|
21
|
+
#
|
22
|
+
# - ignoring certain links:
|
23
|
+
#
|
24
|
+
# SiteChecker.configure do |config|
|
25
|
+
# config.ignore_list = ["/", "/atom.xml"]
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# - visit the external references as well:
|
29
|
+
#
|
30
|
+
# SiteChecker.configure do |config|
|
31
|
+
# config.visit_references = true
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# - set the depth of the recursion:
|
35
|
+
#
|
36
|
+
# SiteChecker.configure do |config|
|
37
|
+
# config.max_recursion_depth = 3
|
38
|
+
# end
|
39
|
+
def configure
|
40
|
+
yield self
|
54
41
|
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def register_visit(kind, link)
|
58
|
-
@visits[kind] = [] unless @visits.has_key?(kind)
|
59
|
-
@visits[kind] << link
|
60
|
-
end
|
61
42
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
else
|
72
|
-
unless stop_recursion?
|
73
|
-
@recursion_depth += 1
|
74
|
-
process_local_page(link, parent_url)
|
75
|
-
@recursion_depth -= 1
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def open_reference(kind, link, parent_url)
|
81
|
-
content = nil
|
82
|
-
begin
|
83
|
-
if kind == :local_page
|
84
|
-
if URI(@root).absolute?
|
85
|
-
content = open(link)
|
86
|
-
else
|
87
|
-
link = add_index_html(link)
|
88
|
-
content = File.open(link).read
|
89
|
-
end
|
90
|
-
elsif kind == :local_image
|
91
|
-
if URI(@root).absolute?
|
92
|
-
open(link)
|
93
|
-
else
|
94
|
-
File.open(link)
|
95
|
-
end
|
96
|
-
elsif @visit_references
|
97
|
-
open(link)
|
98
|
-
end
|
99
|
-
rescue OpenURI::HTTPError => e
|
100
|
-
new_problem(strip_root(parent_url), "#{strip_root(link)} (#{e.message.strip})")
|
101
|
-
rescue Errno::ENOENT => e
|
102
|
-
link = remove_index_html(link) if kind == :local_page
|
103
|
-
new_problem(strip_root(parent_url), "#{strip_root(link)} (404 Not Found)")
|
104
|
-
rescue => e
|
105
|
-
new_problem(strip_root(parent_url), "#{strip_root(link)} (#{e.message.strip})")
|
43
|
+
##
|
44
|
+
# Recursively visits the provided url looking for reference problems.
|
45
|
+
#
|
46
|
+
# @param [String] url where the processing starts
|
47
|
+
# @param [String] root the root URL of the site
|
48
|
+
#
|
49
|
+
def check(url, root)
|
50
|
+
create_instance
|
51
|
+
@link_collector.check(url, root)
|
106
52
|
end
|
107
|
-
content
|
108
|
-
end
|
109
53
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
new_problem(strip_root(parent_url), "#{strip_root(anchor)} (404 Not Found)")
|
118
|
-
links.delete(anchor)
|
54
|
+
##
|
55
|
+
# Returns the Array of the visited local pages.
|
56
|
+
#
|
57
|
+
# @return [Array] list of the visited local pages
|
58
|
+
#
|
59
|
+
def local_pages
|
60
|
+
@link_collector.local_pages
|
119
61
|
end
|
120
|
-
end
|
121
|
-
|
122
|
-
def has_anchor?(links, link)
|
123
|
-
anchor = link.gsub(/^.+#/, "")
|
124
|
-
links.has_key?(anchor) && links[anchor] == :anchor
|
125
|
-
end
|
126
|
-
|
127
|
-
|
128
|
-
def absolute_reference?(link)
|
129
|
-
link.start_with?(@root)
|
130
|
-
end
|
131
62
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
if content
|
140
|
-
doc = Nokogiri(content)
|
141
|
-
doc.xpath("//img").reject {|img| ignored?(img['src'])}.each do |img|
|
142
|
-
link_kind = detect_link_and_kind(img['src'], url, :remote_image, :local_image)
|
143
|
-
links.merge!(link_kind) unless link_kind.empty?
|
144
|
-
end
|
145
|
-
doc.xpath("//a").reject {|a| ignored?(a['href'])}.each do |a|
|
146
|
-
link_kind = detect_link_and_kind(a['href'], url, :remote_page, :local_page)
|
147
|
-
links.merge!(link_kind) unless link_kind.empty?
|
148
|
-
end
|
149
|
-
|
150
|
-
doc.xpath("//a").reject {|a| !a['id']}.each do |a|
|
151
|
-
links.merge!({a['id'] => :anchor})
|
152
|
-
end
|
63
|
+
##
|
64
|
+
# Returns the Array of the visited remote (external) pages.
|
65
|
+
#
|
66
|
+
# @return [Array] list of the visited remote pages
|
67
|
+
#
|
68
|
+
def remote_pages
|
69
|
+
@link_collector.remote_pages
|
153
70
|
end
|
154
|
-
links
|
155
|
-
end
|
156
71
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
link_kind[link.to_s] = external_kind
|
165
|
-
else
|
166
|
-
link_kind[create_absolute_reference(link.to_s)] = local_kind
|
167
|
-
end
|
72
|
+
##
|
73
|
+
# Returns the Array of the visited local images.
|
74
|
+
#
|
75
|
+
# @return [Array] list of the visited local images
|
76
|
+
#
|
77
|
+
def local_images
|
78
|
+
@link_collector.local_images
|
168
79
|
end
|
169
|
-
link_kind
|
170
|
-
end
|
171
|
-
|
172
|
-
def strip_trailing_slash(link)
|
173
|
-
link.gsub(/\/$/, "")
|
174
|
-
end
|
175
80
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
81
|
+
##
|
82
|
+
# Returns the Array of the visited remote (external) images.
|
83
|
+
#
|
84
|
+
# @return [Array] list of the visited remote images
|
85
|
+
#
|
86
|
+
def remote_images
|
87
|
+
@link_collector.remote_images
|
181
88
|
end
|
182
|
-
end
|
183
|
-
|
184
|
-
def add_index_html(path)
|
185
|
-
path.end_with?(".html") ? path : File.join(path, "index.html")
|
186
|
-
end
|
187
|
-
|
188
|
-
def remove_index_html(path)
|
189
|
-
path.gsub(/\/index.html$/, "")
|
190
|
-
end
|
191
89
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
90
|
+
##
|
91
|
+
# Returns the Hash (:parent_url => [Array of problematic links]) of the problems.
|
92
|
+
#
|
93
|
+
# @return [Hash] the result of the check
|
94
|
+
#
|
95
|
+
def problems
|
96
|
+
@link_collector.problems
|
198
97
|
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def new_problem(url, message)
|
202
|
-
url = @root if url.empty?
|
203
|
-
@problems[url] = [] unless problems.has_key?(url)
|
204
|
-
@problems[url] << message
|
205
|
-
end
|
206
98
|
|
207
|
-
|
208
|
-
|
209
|
-
@
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
def stop_recursion?
|
216
|
-
if @max_recursion_depth == -1
|
217
|
-
false
|
218
|
-
elsif @max_recursion_depth > @recursion_depth
|
219
|
-
false
|
220
|
-
else
|
221
|
-
true
|
99
|
+
private
|
100
|
+
def create_instance
|
101
|
+
@link_collector = SiteChecker::LinkCollector.new do |config|
|
102
|
+
config.visit_references = @visit_references if @visit_references
|
103
|
+
config.ignore_list = @ignore_list if @ignore_list
|
104
|
+
config.max_recursion_depth = @max_recursion_depth if @max_recursion_depth
|
105
|
+
end
|
222
106
|
end
|
223
107
|
end
|
224
|
-
end
|
108
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
Gem::Specification.new do |s|
|
3
|
+
s.name = 'site_checker'
|
4
|
+
s.version = '0.2.0.pre'
|
5
|
+
s.date = '2012-12-22'
|
6
|
+
s.summary = "site_checker-#{s.version}"
|
7
|
+
s.description = "A simple tool for checking references on your website"
|
8
|
+
s.authors = ["Zsolt Fabok"]
|
9
|
+
s.email = 'me@zsoltfabok.com'
|
10
|
+
s.homepage = 'https://github.com/ZsoltFabok/site_checker'
|
11
|
+
s.license = 'BSD'
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n").reject {|path| path =~ /\.gitignore$/ || path =~ /file$/ }
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
s.add_development_dependency('rspec' , '2.12.0')
|
19
|
+
s.add_development_dependency('webmock', '1.9.0')
|
20
|
+
s.add_development_dependency('rake' , '10.0.3')
|
21
|
+
s.add_development_dependency('yard' , '0.8.3')
|
22
|
+
|
23
|
+
s.add_runtime_dependency('nokogiri', '1.5.6')
|
24
|
+
end
|
data/spec/dsl_spec.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'site_checker/io/io_spec_helper'
|
3
|
+
|
4
|
+
describe "DSL" do
|
5
|
+
include IoSpecHelper
|
6
|
+
|
7
|
+
before(:each) do
|
8
|
+
@test_url = "http://localhost:4000"
|
9
|
+
@root = "http://localhost:4000"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should forward all the method calls if DSL is enabled" do
|
13
|
+
SiteChecker.configure do |config|
|
14
|
+
config.dsl_enabled = true
|
15
|
+
end
|
16
|
+
|
17
|
+
local_pages = mock()
|
18
|
+
local_images = mock()
|
19
|
+
remote_pages = mock()
|
20
|
+
remote_images = mock()
|
21
|
+
problems = mock()
|
22
|
+
|
23
|
+
SiteChecker.should_receive(:check).with(@test_url, @root)
|
24
|
+
SiteChecker.should_receive(:local_pages).and_return(local_pages)
|
25
|
+
SiteChecker.should_receive(:remote_pages).and_return(remote_pages)
|
26
|
+
SiteChecker.should_receive(:local_images).and_return(local_images)
|
27
|
+
SiteChecker.should_receive(:remote_images).and_return(remote_images)
|
28
|
+
SiteChecker.should_receive(:problems).and_return(problems)
|
29
|
+
|
30
|
+
check_site(@test_url, @root)
|
31
|
+
collected_local_pages.should eql(local_pages)
|
32
|
+
collected_remote_pages.should eql(remote_pages)
|
33
|
+
collected_local_images.should eql(local_images)
|
34
|
+
collected_remote_images.should eql(remote_images)
|
35
|
+
collected_problems.should eql(problems)
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'site_checker/io/io_spec_helper'
|
3
|
+
|
4
|
+
describe "Integration" do
|
5
|
+
include IoSpecHelper
|
6
|
+
|
7
|
+
before(:each) do
|
8
|
+
SiteChecker.configure do |config|
|
9
|
+
config.visit_references = true
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "server based checking" do
|
14
|
+
before(:each) do
|
15
|
+
@test_url = "http://localhost:4000"
|
16
|
+
@root = "http://localhost:4000"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should visit the page" do
|
20
|
+
content = "<html></html>"
|
21
|
+
webmock(@test_url, 200, content)
|
22
|
+
SiteChecker.check(@test_url, @root)
|
23
|
+
SiteChecker.local_pages.should eql([@test_url])
|
24
|
+
SiteChecker.problems.should be_empty
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should check the link to an external page" do
|
28
|
+
content = "<html>text<a href=\"http://external.org/\"/></html>"
|
29
|
+
webmock(@test_url, 200, content)
|
30
|
+
webmock("http://external.org", 200, "")
|
31
|
+
SiteChecker.check(@test_url, @root)
|
32
|
+
SiteChecker.remote_pages.should eql(["http://external.org/" ])
|
33
|
+
SiteChecker.problems.should be_empty
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should not check the link to an external page if the reference checking is turned off" do
|
37
|
+
SiteChecker.configure do |config|
|
38
|
+
config.visit_references = false
|
39
|
+
end
|
40
|
+
content = "<html>text<a href=\"http://external.org/\"/></html>"
|
41
|
+
webmock(@test_url, 200, content)
|
42
|
+
SiteChecker.check(@test_url, @root)
|
43
|
+
SiteChecker.problems.should be_empty
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should report a problem if the external link is dead" do
|
47
|
+
content = "<html>text<a href=\"http://external.org/\"/></html>"
|
48
|
+
webmock(@test_url, 200, content)
|
49
|
+
webmock("http://external.org", 404, "")
|
50
|
+
SiteChecker.check(@test_url, @root)
|
51
|
+
SiteChecker.problems.should eql({@test_url => ["http://external.org/ (404)"]})
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should check the link to an external image" do
|
55
|
+
content = "<html>text<img src=\"http://external.org/a.png\"/></html>"
|
56
|
+
webmock(@test_url, 200, content)
|
57
|
+
webmock("http://external.org/a.png", 200, "")
|
58
|
+
SiteChecker.check(@test_url, @root)
|
59
|
+
SiteChecker.problems.should be_empty
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should check the link to a local image" do
|
63
|
+
content = "<html>text<img src=\"/a.png\"/></html>"
|
64
|
+
webmock(@test_url, 200, content)
|
65
|
+
webmock("#{@test_url}/a.png", 200, "")
|
66
|
+
SiteChecker.check(@test_url, @root)
|
67
|
+
SiteChecker.problems.should be_empty
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should report a problem if the image cannot be found" do
|
71
|
+
content = "<html>text<img src=\"http://external.org/a.png\"/></html>"
|
72
|
+
webmock(@test_url, 200, content)
|
73
|
+
webmock("http://external.org/a.png", 404, "")
|
74
|
+
SiteChecker.check(@test_url, @root)
|
75
|
+
SiteChecker.problems.should eql({@test_url => ["http://external.org/a.png (404)"]})
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should report a problem for a local page with absolute path" do
|
79
|
+
content = "<html>text<a href=\"#{@test_url}/another\"/></html>"
|
80
|
+
webmock(@test_url, 200, content)
|
81
|
+
webmock("#{@test_url}/another", 200, "")
|
82
|
+
SiteChecker.check(@test_url, @root)
|
83
|
+
SiteChecker.problems.should eql({@test_url => ["#{@test_url}/another (absolute path)"]})
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should report a problem for a local image with absolute path" do
|
87
|
+
content = "<html>text<img src=\"#{@test_url}/a.png\"/></html>"
|
88
|
+
webmock("#{@test_url}/a.png", 200, "")
|
89
|
+
webmock(@test_url, 200, content)
|
90
|
+
SiteChecker.check(@test_url, @root)
|
91
|
+
SiteChecker.problems.should eql({@test_url => ["#{@test_url}/a.png (absolute path)"]})
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should filter out certain links" do
|
95
|
+
SiteChecker.configure do |config|
|
96
|
+
config.ignore_list = ["/atom.xml", "/"]
|
97
|
+
end
|
98
|
+
content = "<html>text<a href=\"/atom.xml\"/><br/><a href=\"/\"/></html>"
|
99
|
+
webmock(@test_url, 200, content)
|
100
|
+
SiteChecker.check(@test_url, @root)
|
101
|
+
SiteChecker.problems.should be_empty
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should not report a valid internal anchor" do
|
105
|
+
content = "<html><a href=\"#goto\">goto</a>text<a id=\"goto\"></a></html>"
|
106
|
+
webmock(@test_url, 200, content)
|
107
|
+
SiteChecker.check(@test_url, @root)
|
108
|
+
SiteChecker.problems.should be_empty
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should report an invalid internal anchor" do
|
112
|
+
content = "<html><a href=\"#goto\">goto</a>text<a id=\"got\"></a></html>"
|
113
|
+
webmock(@test_url, 200, content)
|
114
|
+
SiteChecker.check(@test_url, @root)
|
115
|
+
SiteChecker.problems.should eql({@test_url => ["#goto (404 Not Found)"]})
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should follow an external anchor to the external page" do
|
119
|
+
content = "<html><a href=\"http://example.org#goto\">goto</a></html>"
|
120
|
+
webmock(@test_url, 200, content)
|
121
|
+
webmock("http://example.org", 200, content)
|
122
|
+
SiteChecker.check(@test_url, @root)
|
123
|
+
SiteChecker.problems.should be_empty
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should go down one level down for an internal page" do
|
127
|
+
content = "<html>text<a href=\"/one-level-down\"/></html>"
|
128
|
+
webmock(@test_url, 200, content)
|
129
|
+
webmock("#{@root}/one-level-down", 200, "<html></html>")
|
130
|
+
SiteChecker.check(@test_url, @root)
|
131
|
+
SiteChecker.problems.should be_empty
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should report a problem with a linked local page" do
|
135
|
+
content = "<html>text<a href=\"/one-level-down\"/></html>"
|
136
|
+
webmock(@test_url, 200, content)
|
137
|
+
webmock("#{@root}/one-level-down", 404, "<html></html>")
|
138
|
+
SiteChecker.check(@test_url, @root)
|
139
|
+
SiteChecker.problems.should eql({@test_url => ["/one-level-down (404)"]})
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
describe "file system based checking" do
|
144
|
+
before(:each) do
|
145
|
+
@root = fs_test_path
|
146
|
+
clean_fs_test_path
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should find a referenced page" do
|
150
|
+
@root = fs_test_path
|
151
|
+
content = "<html>text<a href=\"/one-level-down\"/></html>"
|
152
|
+
filesystemmock("index.html", content)
|
153
|
+
filesystemmock("/one-level-down/index.html", content)
|
154
|
+
SiteChecker.check(fs_test_path, @root)
|
155
|
+
SiteChecker.local_pages.should eql([fs_test_path, "/one-level-down"])
|
156
|
+
SiteChecker.problems.should be_empty
|
157
|
+
end
|
158
|
+
|
159
|
+
it "should report a problem when the local page cannot be found" do
|
160
|
+
content = "<html>text<a href=\"/one-level-down\"/></html>"
|
161
|
+
filesystemmock("index.html", content)
|
162
|
+
SiteChecker.check(fs_test_path, @root)
|
163
|
+
SiteChecker.problems.should eql({fs_test_path => ["/one-level-down (404 Not Found)"]})
|
164
|
+
end
|
165
|
+
|
166
|
+
it "should use the local images" do
|
167
|
+
content = "<html>text<img src=\"/a.png\"/></html>"
|
168
|
+
filesystemmock("index.html", content)
|
169
|
+
filesystemmock("a.png", "")
|
170
|
+
SiteChecker.check(fs_test_path, @root)
|
171
|
+
SiteChecker.local_images.should eql(["/a.png"])
|
172
|
+
SiteChecker.problems.should be_empty
|
173
|
+
end
|
174
|
+
|
175
|
+
it "should report a problem when the local image cannot be found" do
|
176
|
+
content = "<html>text<img src=\"/a.png\"/></html>"
|
177
|
+
filesystemmock("index.html", content)
|
178
|
+
SiteChecker.check(fs_test_path, @root)
|
179
|
+
SiteChecker.problems.should eql({fs_test_path => ["/a.png (404 Not Found)"]})
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should be able to handle anchors in other files" do
|
183
|
+
content = "<html><a href=\"/other#goto\">goto</a>text<a id=\"goto\"></a></html>"
|
184
|
+
content2 = "<html><a id=\"goto\">goto</a>"
|
185
|
+
filesystemmock("index.html", content)
|
186
|
+
filesystemmock("other/index.html", content2)
|
187
|
+
SiteChecker.check(fs_test_path, @root)
|
188
|
+
SiteChecker.problems.should be_empty
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe SiteChecker::IO::ContentFromFileSystem do
|
4
|
+
context "#get" do
|
5
|
+
before(:each) do
|
6
|
+
@root = "/home/test/web/public"
|
7
|
+
@link = SiteChecker::Link.create({:url => "link", :kind => :page, :location => :local})
|
8
|
+
@file = mock(File)
|
9
|
+
@content = mock()
|
10
|
+
@content_reader = SiteChecker::IO::ContentFromFileSystem.new(false, @root)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return the content of a link using the local index.html" do
|
14
|
+
File.should_receive(:open).with("#{@root}/#{@link.url}/index.html") {@file}
|
15
|
+
@file.should_receive(:read) {@content}
|
16
|
+
@content_reader.get(@link).should eql(@content)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should return the content of a link which points to a real .html file" do
|
20
|
+
@link.url = "/about.html"
|
21
|
+
File.should_receive(:open).with("#{@root}/about.html") {@file}
|
22
|
+
@file.should_receive(:read) {@content}
|
23
|
+
@content_reader.get(@link).should eql(@content)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should return the content of a link with anchor" do
|
27
|
+
@link.url = "/about#something"
|
28
|
+
File.should_receive(:open).with("#{@root}/about/index.html") {@file}
|
29
|
+
@file.should_receive(:read) {@content}
|
30
|
+
@content_reader.get(@link).should eql(@content)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should raise error if the link is broken" do
|
34
|
+
File.should_receive(:open).with("#{@root}/#{@link.url}/index.html").and_raise(Errno::ENOENT)
|
35
|
+
expect {@content_reader.get(@link)}.to raise_error(RuntimeError, "(404 Not Found)")
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should check the existence of a local image" do
|
39
|
+
@link.kind = :image
|
40
|
+
@link.url = "img/image1"
|
41
|
+
File.should_receive(:open).with("#{@root}/#{@link.url}") {@file}
|
42
|
+
@file.should_not_receive(:read)
|
43
|
+
@content_reader.get(@link).should
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should not open a remote reference if opt-out" do
|
47
|
+
@link.location = :remote
|
48
|
+
File.should_not_receive(:open)
|
49
|
+
@content_reader.get(@link)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should open a remote reference if opt-in" do
|
53
|
+
@content_reader = SiteChecker::IO::ContentFromFileSystem.new(true, @root)
|
54
|
+
@link.location = :remote
|
55
|
+
@link.url = "http://example.org"
|
56
|
+
File.should_not_receive(:open)
|
57
|
+
@content_reader.should_receive(:open)
|
58
|
+
@content_reader.get(@link)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require_relative 'io_spec_helper'
|
3
|
+
|
4
|
+
describe SiteChecker::IO::ContentFromWeb do
|
5
|
+
include IoSpecHelper
|
6
|
+
context "#get" do
|
7
|
+
before(:each) do
|
8
|
+
@root = "http://localhost:4000"
|
9
|
+
@link = SiteChecker::Link.create({:url => "link", :kind => :page, :location => :local})
|
10
|
+
@content = mock()
|
11
|
+
@content_reader = SiteChecker::IO::ContentFromWeb.new(false, @root)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should return the content of a link" do
|
15
|
+
@content_reader.should_receive(:open).with(URI("#{@root}/#{@link.url}")).and_return(@content)
|
16
|
+
@content_reader.get(@link).should eql(@content)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should raise error if the link is broken" do
|
20
|
+
@content_reader.should_receive(:open).with(URI("#{@root}/#{@link.url}")).
|
21
|
+
and_raise(OpenURI::HTTPError.new("404 Not Found", nil))
|
22
|
+
expect {@content_reader.get(@link)}.to raise_error(RuntimeError, "(404 Not Found)")
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should check the existence of an image" do
|
26
|
+
@link.kind = :image
|
27
|
+
@link.url = "img/image1"
|
28
|
+
@content_reader.should_receive(:open).with(URI("#{@root}/#{@link.url}"))
|
29
|
+
@content_reader.get(@link)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should not open a remote reference if opt-out" do
|
33
|
+
@link.location = :remote
|
34
|
+
@content_reader.should_not_receive(:open).with(URI("#{@root}/#{@link.url}"))
|
35
|
+
@content_reader.get(@link)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should open a remote reference if opt-in" do
|
39
|
+
@content_reader = SiteChecker::IO::ContentFromWeb.new(true, @root)
|
40
|
+
@link.location = :remote
|
41
|
+
@link.url = "http://example.org"
|
42
|
+
@content_reader.should_receive(:open).with(URI(@link.url))
|
43
|
+
@content_reader.get(@link)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|