bad_link_finder 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/bad_link_finder/csv_builder.rb +3 -3
- data/lib/bad_link_finder/page.rb +14 -3
- data/lib/bad_link_finder/site_checker.rb +9 -1
- data/lib/bad_link_finder/version.rb +1 -1
- data/test/fixtures/www.example.com/example/relative-example.html +17 -7
- data/test/integration/bad_link_finder_test.rb +2 -1
- data/test/unit/csv_builder_test.rb +11 -5
- data/test/unit/page_test.rb +37 -27
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NDQ1NTEwMzllMDZmMTJiNWUxYWYwMTcyNGI4NTg1YWVjMDQ1ZDNlYg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZTA2ZWJkMGI3N2Q5ZTgyODc0ZDU3YjBmYWNjODc2ODkzMzBhMDFlYg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWUwODk5YTZjYTg4NWEyZjQ5MzdkMTJlOTIyZmMzYjA3NDcyNGFjYjBkMDdl
|
10
|
+
YTMyYWFlMjI3MWQ3NjkyOTliZDMzZTJkZmIxY2MwZjk3MzIyM2EwNzNjYzU0
|
11
|
+
ZGJhODYxYzk5MmY0MGFlMDIwNWRlNDg3YmI2ZWY0YzI5MWEzN2I=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NjZlNGE5YzgwY2NiMjA5YTNiNjc5NTcyZTE3ZjY5ZWFlNTU2M2EyYjgzZmEz
|
14
|
+
NTBiZmY3YzQwNTM5YmFjNWI4OTA1NzY3N2E5Mjk5OWU5ZTA3MjRhMjA1MDMz
|
15
|
+
ZjM3NGU3NzkyYjkxYmNkMWEzMGQzYzRlOTEyMDZhZDAyODk5ZjA=
|
@@ -8,12 +8,12 @@ module BadLinkFinder
|
|
8
8
|
|
9
9
|
def to_s
|
10
10
|
@to_s ||= CSV.generate(encoding: 'UTF-8') do |csv|
|
11
|
-
csv << ['page_url', 'link', 'error_message', 'raw_error_message']
|
11
|
+
csv << ['page_url', 'page_id', 'link', 'error_message', 'raw_error_message']
|
12
12
|
|
13
|
-
@bad_link_map.each do |
|
13
|
+
@bad_link_map.each do |page_info, bad_links|
|
14
14
|
bad_links.each do |bad_link|
|
15
15
|
exception_message = bad_link.exception.message if bad_link.exception
|
16
|
-
csv << [
|
16
|
+
csv << [page_info[:url], page_info[:id], bad_link.link, bad_link.error_message, exception_message]
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
data/lib/bad_link_finder/page.rb
CHANGED
@@ -6,13 +6,24 @@ module BadLinkFinder
|
|
6
6
|
@path = strip_html_ending(path)
|
7
7
|
|
8
8
|
file = mirror_dir + path
|
9
|
-
doc = Nokogiri::HTML(file.read)
|
10
|
-
|
9
|
+
@doc = Nokogiri::HTML(file.read)
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :path
|
13
|
+
|
14
|
+
def links
|
15
|
+
@links ||= @doc.css('a').map do |a|
|
11
16
|
strip_html_ending(a['href']) unless ignore_link?(a['href'])
|
12
17
|
end.compact
|
13
18
|
end
|
14
19
|
|
15
|
-
|
20
|
+
def id
|
21
|
+
@id ||= begin
|
22
|
+
if (article = @doc.xpath('(//article[not(ancestor::article)])').first)
|
23
|
+
article['id']
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
16
27
|
|
17
28
|
protected
|
18
29
|
|
@@ -17,7 +17,15 @@ module BadLinkFinder
|
|
17
17
|
puts "Checking page #{page.path} as #{page_checker.page_url}"
|
18
18
|
|
19
19
|
bad_links = page_checker.bad_links
|
20
|
-
|
20
|
+
|
21
|
+
if bad_links.any?
|
22
|
+
page_info = {
|
23
|
+
id: page.id,
|
24
|
+
url: page_checker.page_url
|
25
|
+
}
|
26
|
+
|
27
|
+
bad_link_map[page_info] = bad_links
|
28
|
+
end
|
21
29
|
end
|
22
30
|
|
23
31
|
return bad_link_map
|
@@ -2,13 +2,23 @@
|
|
2
2
|
<html>
|
3
3
|
<head><title>Example site</title></head>
|
4
4
|
<body>
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
<article id='correct-article-id'>
|
6
|
+
<!-- Included -->
|
7
|
+
<a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
|
8
|
+
<a href=''></a>
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
<article id='incorrect-sub-article-id'>
|
11
|
+
<p>Maecenas sed diam eget risus varius blandit sit amet non magna.</p>
|
12
|
+
</article>
|
13
|
+
|
14
|
+
<!-- Excluded -->
|
15
|
+
<a></a>
|
16
|
+
<a href='#section-2'></a>
|
17
|
+
<a href='mailto:test@example.com'></a>
|
18
|
+
</article>
|
19
|
+
|
20
|
+
<article id='incorrect-second-article-id'>
|
21
|
+
Praesent commodo cursus magna, vel scelerisque nisl consectetur et.
|
22
|
+
</article>
|
13
23
|
</body>
|
14
24
|
</html>
|
@@ -20,7 +20,8 @@ describe BadLinkFinder do
|
|
20
20
|
|
21
21
|
csv_string = File.read(ENV['REPORT_OUTPUT_FILE'])
|
22
22
|
|
23
|
-
assert_match 'http://www.example.com/example/', csv_string
|
23
|
+
assert_match 'http://www.example.com/example/relative-example', csv_string
|
24
|
+
assert_match 'correct-article-id', csv_string
|
24
25
|
end
|
25
26
|
|
26
27
|
it "complains if key variables are missing" do
|
@@ -7,11 +7,11 @@ describe BadLinkFinder::CSVBuilder do
|
|
7
7
|
|
8
8
|
it "flattens out the bad links map into a CSV structure" do
|
9
9
|
bad_link_map = {
|
10
|
-
'http://www.example.com/example/' => [
|
10
|
+
{url: 'http://www.example.com/example/', id: 'some-article-id'} => [
|
11
11
|
mock_link(link: 'https://www.example.net/external-example.html', error_message: "This link returned a 404", exception: TestException.new('404 not found')),
|
12
12
|
mock_link(link: 'relative-example', error_message: "Nope")
|
13
13
|
],
|
14
|
-
'http://www.example.com/example/relative-example' => [
|
14
|
+
{url: 'http://www.example.com/example/relative-example'} => [
|
15
15
|
mock_link(
|
16
16
|
link: '/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1',
|
17
17
|
error_message: "What even is this?",
|
@@ -25,13 +25,19 @@ describe BadLinkFinder::CSVBuilder do
|
|
25
25
|
parsed_csv = CSV.parse(csv_builder.to_s)
|
26
26
|
|
27
27
|
headers = parsed_csv.shift
|
28
|
-
assert_equal ['page_url', 'link', 'error_message', 'raw_error_message'], headers
|
28
|
+
assert_equal ['page_url', 'page_id', 'link', 'error_message', 'raw_error_message'], headers
|
29
29
|
|
30
30
|
assert_equal bad_link_map.values.flatten.count, parsed_csv.count
|
31
31
|
|
32
|
-
bad_link_map.each do |
|
32
|
+
bad_link_map.each do |page_info, links|
|
33
33
|
links.each do |link|
|
34
|
-
assert parsed_csv.include?([
|
34
|
+
assert parsed_csv.include?([
|
35
|
+
page_info[:url],
|
36
|
+
page_info[:id],
|
37
|
+
link.link,
|
38
|
+
link.error_message,
|
39
|
+
(link.exception.message if link.exception),
|
40
|
+
])
|
35
41
|
end
|
36
42
|
end
|
37
43
|
end
|
data/test/unit/page_test.rb
CHANGED
@@ -3,43 +3,53 @@ require 'bad_link_finder/page'
|
|
3
3
|
|
4
4
|
describe BadLinkFinder::Page do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
describe '#path' do
|
7
|
+
it "strips index.html and .html" do
|
8
|
+
assert_equal '', build_page('index.html').path.to_s
|
9
|
+
assert_equal 'example/', build_page('example/index.html').path.to_s
|
10
|
+
assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
|
11
|
+
end
|
10
12
|
end
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
-
|
14
|
+
describe '#links' do
|
15
|
+
it "finds absolute paths, stripping index.html and .html" do
|
16
|
+
assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
|
17
|
+
end
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
+
it "finds relative paths, stripping index.html and .html" do
|
20
|
+
assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
|
21
|
+
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
it "finds and preserves external URLs" do
|
24
|
+
assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
|
25
|
+
end
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
27
|
+
it "preserves params and anchors on internal links" do
|
28
|
+
page = build_page('example/relative-example.html')
|
29
|
+
assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
|
30
|
+
end
|
28
31
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
+
it "includes links with empty href" do
|
33
|
+
assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
|
34
|
+
end
|
32
35
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
+
it "excludes links with no href" do
|
37
|
+
refute build_page('example/relative-example.html').links.include?(nil)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "excludes links with an href containing only an anchor reference" do
|
41
|
+
refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
|
42
|
+
end
|
36
43
|
|
37
|
-
|
38
|
-
|
44
|
+
it "excludes mailto links" do
|
45
|
+
refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
|
46
|
+
end
|
39
47
|
end
|
40
48
|
|
41
|
-
|
42
|
-
|
49
|
+
describe '#page_id' do
|
50
|
+
it "returns the id of the first topmost article" do
|
51
|
+
assert_equal 'correct-article-id', build_page('example/relative-example.html').id
|
52
|
+
end
|
43
53
|
end
|
44
54
|
|
45
55
|
def build_page(path)
|