bad_link_finder 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- N2ZkYWM1MjViZmM0M2E3ZDUyZDQ1NmU2MjQ4NDU5Yzk4YjMwZTY3Yg==
4
+ NDQ1NTEwMzllMDZmMTJiNWUxYWYwMTcyNGI4NTg1YWVjMDQ1ZDNlYg==
5
5
  data.tar.gz: !binary |-
6
- MTljZDc2YzUxNmFkZmZjMWNhMzlkYWE1MDg1OWQ2YzU2OWFkYzUzNw==
6
+ ZTA2ZWJkMGI3N2Q5ZTgyODc0ZDU3YjBmYWNjODc2ODkzMzBhMDFlYg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NjM3MjRkOTVlMGRlYWI0NTliMDViMDI2ZjlkZjI1MGZjNDhjYTMwYWIyNjUy
10
- OGJjODRiZmJlYzMzMzI2NzcyZGJhNjE4ZmY4ZjQzZjFlZTMyOWQ4ZDk5MzZm
11
- MmRiZWYxYmViYmMwYzJjMjdmNTQyNWU2MTIzMDUzYWE0MmFiYmY=
9
+ YWUwODk5YTZjYTg4NWEyZjQ5MzdkMTJlOTIyZmMzYjA3NDcyNGFjYjBkMDdl
10
+ YTMyYWFlMjI3MWQ3NjkyOTliZDMzZTJkZmIxY2MwZjk3MzIyM2EwNzNjYzU0
11
+ ZGJhODYxYzk5MmY0MGFlMDIwNWRlNDg3YmI2ZWY0YzI5MWEzN2I=
12
12
  data.tar.gz: !binary |-
13
- MTBlYjc4OTU5OWFkOWM5YTE4MDVhNjgzNDI0ZmQxNmQwODcwZWU3NzhkMTlh
14
- MzQ0YjhhMzExY2JmZGFmN2M2YmYxOTNhNmRmNDg3M2I3MTQ3NjljMmU2NTg4
15
- NmNlZWNkYzIwOTc1ZWRlMjU4ODE2MDI3NDgxODc4NTZkODA4YTc=
13
+ NjZlNGE5YzgwY2NiMjA5YTNiNjc5NTcyZTE3ZjY5ZWFlNTU2M2EyYjgzZmEz
14
+ NTBiZmY3YzQwNTM5YmFjNWI4OTA1NzY3N2E5Mjk5OWU5ZTA3MjRhMjA1MDMz
15
+ ZjM3NGU3NzkyYjkxYmNkMWEzMGQzYzRlOTEyMDZhZDAyODk5ZjA=
@@ -8,12 +8,12 @@ module BadLinkFinder
8
8
 
9
9
  def to_s
10
10
  @to_s ||= CSV.generate(encoding: 'UTF-8') do |csv|
11
- csv << ['page_url', 'link', 'error_message', 'raw_error_message']
11
+ csv << ['page_url', 'page_id', 'link', 'error_message', 'raw_error_message']
12
12
 
13
- @bad_link_map.each do |page_url, bad_links|
13
+ @bad_link_map.each do |page_info, bad_links|
14
14
  bad_links.each do |bad_link|
15
15
  exception_message = bad_link.exception.message if bad_link.exception
16
- csv << [page_url, bad_link.link, bad_link.error_message, exception_message]
16
+ csv << [page_info[:url], page_info[:id], bad_link.link, bad_link.error_message, exception_message]
17
17
  end
18
18
  end
19
19
  end
@@ -6,13 +6,24 @@ module BadLinkFinder
6
6
  @path = strip_html_ending(path)
7
7
 
8
8
  file = mirror_dir + path
9
- doc = Nokogiri::HTML(file.read)
10
- @links = doc.css('a').map do |a|
9
+ @doc = Nokogiri::HTML(file.read)
10
+ end
11
+
12
+ attr_reader :path
13
+
14
+ def links
15
+ @links ||= @doc.css('a').map do |a|
11
16
  strip_html_ending(a['href']) unless ignore_link?(a['href'])
12
17
  end.compact
13
18
  end
14
19
 
15
- attr_reader :path, :links
20
+ def id
21
+ @id ||= begin
22
+ if (article = @doc.xpath('(//article[not(ancestor::article)])').first)
23
+ article['id']
24
+ end
25
+ end
26
+ end
16
27
 
17
28
  protected
18
29
 
@@ -17,7 +17,15 @@ module BadLinkFinder
17
17
  puts "Checking page #{page.path} as #{page_checker.page_url}"
18
18
 
19
19
  bad_links = page_checker.bad_links
20
- bad_link_map[page_checker.page_url] = bad_links if bad_links.any?
20
+
21
+ if bad_links.any?
22
+ page_info = {
23
+ id: page.id,
24
+ url: page_checker.page_url
25
+ }
26
+
27
+ bad_link_map[page_info] = bad_links
28
+ end
21
29
  end
22
30
 
23
31
  return bad_link_map
@@ -1,3 +1,3 @@
1
1
  module BadLinkFinder
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -2,13 +2,23 @@
2
2
  <html>
3
3
  <head><title>Example site</title></head>
4
4
  <body>
5
- <!-- Included -->
6
- <a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
7
- <a href=''></a>
5
+ <article id='correct-article-id'>
6
+ <!-- Included -->
7
+ <a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
8
+ <a href=''></a>
8
9
 
9
- <!-- Excluded -->
10
- <a></a>
11
- <a href='#section-2'></a>
12
- <a href='mailto:test@example.com'></a>
10
+ <article id='incorrect-sub-article-id'>
11
+ <p>Maecenas sed diam eget risus varius blandit sit amet non magna.</p>
12
+ </article>
13
+
14
+ <!-- Excluded -->
15
+ <a></a>
16
+ <a href='#section-2'></a>
17
+ <a href='mailto:test@example.com'></a>
18
+ </article>
19
+
20
+ <article id='incorrect-second-article-id'>
21
+ Praesent commodo cursus magna, vel scelerisque nisl consectetur et.
22
+ </article>
13
23
  </body>
14
24
  </html>
@@ -20,7 +20,8 @@ describe BadLinkFinder do
20
20
 
21
21
  csv_string = File.read(ENV['REPORT_OUTPUT_FILE'])
22
22
 
23
- assert_match 'http://www.example.com/example/', csv_string
23
+ assert_match 'http://www.example.com/example/relative-example', csv_string
24
+ assert_match 'correct-article-id', csv_string
24
25
  end
25
26
 
26
27
  it "complains if key variables are missing" do
@@ -7,11 +7,11 @@ describe BadLinkFinder::CSVBuilder do
7
7
 
8
8
  it "flattens out the bad links map into a CSV structure" do
9
9
  bad_link_map = {
10
- 'http://www.example.com/example/' => [
10
+ {url: 'http://www.example.com/example/', id: 'some-article-id'} => [
11
11
  mock_link(link: 'https://www.example.net/external-example.html', error_message: "This link returned a 404", exception: TestException.new('404 not found')),
12
12
  mock_link(link: 'relative-example', error_message: "Nope")
13
13
  ],
14
- 'http://www.example.com/example/relative-example' => [
14
+ {url: 'http://www.example.com/example/relative-example'} => [
15
15
  mock_link(
16
16
  link: '/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1',
17
17
  error_message: "What even is this?",
@@ -25,13 +25,19 @@ describe BadLinkFinder::CSVBuilder do
25
25
  parsed_csv = CSV.parse(csv_builder.to_s)
26
26
 
27
27
  headers = parsed_csv.shift
28
- assert_equal ['page_url', 'link', 'error_message', 'raw_error_message'], headers
28
+ assert_equal ['page_url', 'page_id', 'link', 'error_message', 'raw_error_message'], headers
29
29
 
30
30
  assert_equal bad_link_map.values.flatten.count, parsed_csv.count
31
31
 
32
- bad_link_map.each do |page_url, links|
32
+ bad_link_map.each do |page_info, links|
33
33
  links.each do |link|
34
- assert parsed_csv.include?([page_url, link.link, link.error_message, (link.exception.message if link.exception)])
34
+ assert parsed_csv.include?([
35
+ page_info[:url],
36
+ page_info[:id],
37
+ link.link,
38
+ link.error_message,
39
+ (link.exception.message if link.exception),
40
+ ])
35
41
  end
36
42
  end
37
43
  end
@@ -3,43 +3,53 @@ require 'bad_link_finder/page'
3
3
 
4
4
  describe BadLinkFinder::Page do
5
5
 
6
- it "strips index.html and .html from the page path" do
7
- assert_equal '', build_page('index.html').path.to_s
8
- assert_equal 'example/', build_page('example/index.html').path.to_s
9
- assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
6
+ describe '#path' do
7
+ it "strips index.html and .html" do
8
+ assert_equal '', build_page('index.html').path.to_s
9
+ assert_equal 'example/', build_page('example/index.html').path.to_s
10
+ assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
11
+ end
10
12
  end
11
13
 
12
- it "finds absolute paths, stripping index.html and .html" do
13
- assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
14
- end
14
+ describe '#links' do
15
+ it "finds absolute paths, stripping index.html and .html" do
16
+ assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
17
+ end
15
18
 
16
- it "finds relative paths, stripping index.html and .html" do
17
- assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
18
- end
19
+ it "finds relative paths, stripping index.html and .html" do
20
+ assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
21
+ end
19
22
 
20
- it "finds and preserves external URLs" do
21
- assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
22
- end
23
+ it "finds and preserves external URLs" do
24
+ assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
25
+ end
23
26
 
24
- it "preserves params and anchors on internal links" do
25
- page = build_page('example/relative-example.html')
26
- assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
27
- end
27
+ it "preserves params and anchors on internal links" do
28
+ page = build_page('example/relative-example.html')
29
+ assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
30
+ end
28
31
 
29
- it "includes links with empty href" do
30
- assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
31
- end
32
+ it "includes links with empty href" do
33
+ assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
34
+ end
32
35
 
33
- it "excludes links with no href" do
34
- refute build_page('example/relative-example.html').links.include?(nil)
35
- end
36
+ it "excludes links with no href" do
37
+ refute build_page('example/relative-example.html').links.include?(nil)
38
+ end
39
+
40
+ it "excludes links with an href containing only an anchor reference" do
41
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
42
+ end
36
43
 
37
- it "excludes links with an href containing only an anchor reference" do
38
- refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
44
+ it "excludes mailto links" do
45
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
46
+ end
39
47
  end
40
48
 
41
- it "excludes mailto links" do
42
- refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
49
+ describe '#page_id' do
50
+ it "returns the id of the first topmost article" do
51
+ assert_equal 'correct-article-id', build_page('example/relative-example.html').id
52
+ end
43
53
  end
44
54
 
45
55
  def build_page(path)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bad_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliot Crosby-McCullough