bad_link_finder 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- N2ZkYWM1MjViZmM0M2E3ZDUyZDQ1NmU2MjQ4NDU5Yzk4YjMwZTY3Yg==
4
+ NDQ1NTEwMzllMDZmMTJiNWUxYWYwMTcyNGI4NTg1YWVjMDQ1ZDNlYg==
5
5
  data.tar.gz: !binary |-
6
- MTljZDc2YzUxNmFkZmZjMWNhMzlkYWE1MDg1OWQ2YzU2OWFkYzUzNw==
6
+ ZTA2ZWJkMGI3N2Q5ZTgyODc0ZDU3YjBmYWNjODc2ODkzMzBhMDFlYg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NjM3MjRkOTVlMGRlYWI0NTliMDViMDI2ZjlkZjI1MGZjNDhjYTMwYWIyNjUy
10
- OGJjODRiZmJlYzMzMzI2NzcyZGJhNjE4ZmY4ZjQzZjFlZTMyOWQ4ZDk5MzZm
11
- MmRiZWYxYmViYmMwYzJjMjdmNTQyNWU2MTIzMDUzYWE0MmFiYmY=
9
+ YWUwODk5YTZjYTg4NWEyZjQ5MzdkMTJlOTIyZmMzYjA3NDcyNGFjYjBkMDdl
10
+ YTMyYWFlMjI3MWQ3NjkyOTliZDMzZTJkZmIxY2MwZjk3MzIyM2EwNzNjYzU0
11
+ ZGJhODYxYzk5MmY0MGFlMDIwNWRlNDg3YmI2ZWY0YzI5MWEzN2I=
12
12
  data.tar.gz: !binary |-
13
- MTBlYjc4OTU5OWFkOWM5YTE4MDVhNjgzNDI0ZmQxNmQwODcwZWU3NzhkMTlh
14
- MzQ0YjhhMzExY2JmZGFmN2M2YmYxOTNhNmRmNDg3M2I3MTQ3NjljMmU2NTg4
15
- NmNlZWNkYzIwOTc1ZWRlMjU4ODE2MDI3NDgxODc4NTZkODA4YTc=
13
+ NjZlNGE5YzgwY2NiMjA5YTNiNjc5NTcyZTE3ZjY5ZWFlNTU2M2EyYjgzZmEz
14
+ NTBiZmY3YzQwNTM5YmFjNWI4OTA1NzY3N2E5Mjk5OWU5ZTA3MjRhMjA1MDMz
15
+ ZjM3NGU3NzkyYjkxYmNkMWEzMGQzYzRlOTEyMDZhZDAyODk5ZjA=
@@ -8,12 +8,12 @@ module BadLinkFinder
8
8
 
9
9
  def to_s
10
10
  @to_s ||= CSV.generate(encoding: 'UTF-8') do |csv|
11
- csv << ['page_url', 'link', 'error_message', 'raw_error_message']
11
+ csv << ['page_url', 'page_id', 'link', 'error_message', 'raw_error_message']
12
12
 
13
- @bad_link_map.each do |page_url, bad_links|
13
+ @bad_link_map.each do |page_info, bad_links|
14
14
  bad_links.each do |bad_link|
15
15
  exception_message = bad_link.exception.message if bad_link.exception
16
- csv << [page_url, bad_link.link, bad_link.error_message, exception_message]
16
+ csv << [page_info[:url], page_info[:id], bad_link.link, bad_link.error_message, exception_message]
17
17
  end
18
18
  end
19
19
  end
@@ -6,13 +6,24 @@ module BadLinkFinder
6
6
  @path = strip_html_ending(path)
7
7
 
8
8
  file = mirror_dir + path
9
- doc = Nokogiri::HTML(file.read)
10
- @links = doc.css('a').map do |a|
9
+ @doc = Nokogiri::HTML(file.read)
10
+ end
11
+
12
+ attr_reader :path
13
+
14
+ def links
15
+ @links ||= @doc.css('a').map do |a|
11
16
  strip_html_ending(a['href']) unless ignore_link?(a['href'])
12
17
  end.compact
13
18
  end
14
19
 
15
- attr_reader :path, :links
20
+ def id
21
+ @id ||= begin
22
+ if (article = @doc.xpath('(//article[not(ancestor::article)])').first)
23
+ article['id']
24
+ end
25
+ end
26
+ end
16
27
 
17
28
  protected
18
29
 
@@ -17,7 +17,15 @@ module BadLinkFinder
17
17
  puts "Checking page #{page.path} as #{page_checker.page_url}"
18
18
 
19
19
  bad_links = page_checker.bad_links
20
- bad_link_map[page_checker.page_url] = bad_links if bad_links.any?
20
+
21
+ if bad_links.any?
22
+ page_info = {
23
+ id: page.id,
24
+ url: page_checker.page_url
25
+ }
26
+
27
+ bad_link_map[page_info] = bad_links
28
+ end
21
29
  end
22
30
 
23
31
  return bad_link_map
@@ -1,3 +1,3 @@
1
1
  module BadLinkFinder
2
- VERSION = "0.0.1"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -2,13 +2,23 @@
2
2
  <html>
3
3
  <head><title>Example site</title></head>
4
4
  <body>
5
- <!-- Included -->
6
- <a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
7
- <a href=''></a>
5
+ <article id='correct-article-id'>
6
+ <!-- Included -->
7
+ <a href='/example/index.html?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1'></a>
8
+ <a href=''></a>
8
9
 
9
- <!-- Excluded -->
10
- <a></a>
11
- <a href='#section-2'></a>
12
- <a href='mailto:test@example.com'></a>
10
+ <article id='incorrect-sub-article-id'>
11
+ <p>Maecenas sed diam eget risus varius blandit sit amet non magna.</p>
12
+ </article>
13
+
14
+ <!-- Excluded -->
15
+ <a></a>
16
+ <a href='#section-2'></a>
17
+ <a href='mailto:test@example.com'></a>
18
+ </article>
19
+
20
+ <article id='incorrect-second-article-id'>
21
+ Praesent commodo cursus magna, vel scelerisque nisl consectetur et.
22
+ </article>
13
23
  </body>
14
24
  </html>
@@ -20,7 +20,8 @@ describe BadLinkFinder do
20
20
 
21
21
  csv_string = File.read(ENV['REPORT_OUTPUT_FILE'])
22
22
 
23
- assert_match 'http://www.example.com/example/', csv_string
23
+ assert_match 'http://www.example.com/example/relative-example', csv_string
24
+ assert_match 'correct-article-id', csv_string
24
25
  end
25
26
 
26
27
  it "complains if key variables are missing" do
@@ -7,11 +7,11 @@ describe BadLinkFinder::CSVBuilder do
7
7
 
8
8
  it "flattens out the bad links map into a CSV structure" do
9
9
  bad_link_map = {
10
- 'http://www.example.com/example/' => [
10
+ {url: 'http://www.example.com/example/', id: 'some-article-id'} => [
11
11
  mock_link(link: 'https://www.example.net/external-example.html', error_message: "This link returned a 404", exception: TestException.new('404 not found')),
12
12
  mock_link(link: 'relative-example', error_message: "Nope")
13
13
  ],
14
- 'http://www.example.com/example/relative-example' => [
14
+ {url: 'http://www.example.com/example/relative-example'} => [
15
15
  mock_link(
16
16
  link: '/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1',
17
17
  error_message: "What even is this?",
@@ -25,13 +25,19 @@ describe BadLinkFinder::CSVBuilder do
25
25
  parsed_csv = CSV.parse(csv_builder.to_s)
26
26
 
27
27
  headers = parsed_csv.shift
28
- assert_equal ['page_url', 'link', 'error_message', 'raw_error_message'], headers
28
+ assert_equal ['page_url', 'page_id', 'link', 'error_message', 'raw_error_message'], headers
29
29
 
30
30
  assert_equal bad_link_map.values.flatten.count, parsed_csv.count
31
31
 
32
- bad_link_map.each do |page_url, links|
32
+ bad_link_map.each do |page_info, links|
33
33
  links.each do |link|
34
- assert parsed_csv.include?([page_url, link.link, link.error_message, (link.exception.message if link.exception)])
34
+ assert parsed_csv.include?([
35
+ page_info[:url],
36
+ page_info[:id],
37
+ link.link,
38
+ link.error_message,
39
+ (link.exception.message if link.exception),
40
+ ])
35
41
  end
36
42
  end
37
43
  end
@@ -3,43 +3,53 @@ require 'bad_link_finder/page'
3
3
 
4
4
  describe BadLinkFinder::Page do
5
5
 
6
- it "strips index.html and .html from the page path" do
7
- assert_equal '', build_page('index.html').path.to_s
8
- assert_equal 'example/', build_page('example/index.html').path.to_s
9
- assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
6
+ describe '#path' do
7
+ it "strips index.html and .html" do
8
+ assert_equal '', build_page('index.html').path.to_s
9
+ assert_equal 'example/', build_page('example/index.html').path.to_s
10
+ assert_equal 'example/relative-example', build_page('example/relative-example.html').path.to_s
11
+ end
10
12
  end
11
13
 
12
- it "finds absolute paths, stripping index.html and .html" do
13
- assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
14
- end
14
+ describe '#links' do
15
+ it "finds absolute paths, stripping index.html and .html" do
16
+ assert_equal ['/example/'], build_page('index.html').links.map(&:to_s)
17
+ end
15
18
 
16
- it "finds relative paths, stripping index.html and .html" do
17
- assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
18
- end
19
+ it "finds relative paths, stripping index.html and .html" do
20
+ assert build_page('example/index.html').links.map(&:to_s).include?('relative-example')
21
+ end
19
22
 
20
- it "finds and preserves external URLs" do
21
- assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
22
- end
23
+ it "finds and preserves external URLs" do
24
+ assert build_page('example/index.html').links.map(&:to_s).include?('https://www.example.net/external-example.html')
25
+ end
23
26
 
24
- it "preserves params and anchors on internal links" do
25
- page = build_page('example/relative-example.html')
26
- assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
27
- end
27
+ it "preserves params and anchors on internal links" do
28
+ page = build_page('example/relative-example.html')
29
+ assert page.links.map(&:to_s).include?('/example/?test=true&redirect=http://www.example.com/in-param-url/index.html#section-1')
30
+ end
28
31
 
29
- it "includes links with empty href" do
30
- assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
31
- end
32
+ it "includes links with empty href" do
33
+ assert build_page('example/relative-example.html').links.map(&:to_s).include?('')
34
+ end
32
35
 
33
- it "excludes links with no href" do
34
- refute build_page('example/relative-example.html').links.include?(nil)
35
- end
36
+ it "excludes links with no href" do
37
+ refute build_page('example/relative-example.html').links.include?(nil)
38
+ end
39
+
40
+ it "excludes links with an href containing only an anchor reference" do
41
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
42
+ end
36
43
 
37
- it "excludes links with an href containing only an anchor reference" do
38
- refute build_page('example/relative-example.html').links.map(&:to_s).include?('#section-2')
44
+ it "excludes mailto links" do
45
+ refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
46
+ end
39
47
  end
40
48
 
41
- it "excludes mailto links" do
42
- refute build_page('example/relative-example.html').links.map(&:to_s).include?('mailto:test@example.com')
49
+ describe '#page_id' do
50
+ it "returns the id of the first topmost article" do
51
+ assert_equal 'correct-article-id', build_page('example/relative-example.html').id
52
+ end
43
53
  end
44
54
 
45
55
  def build_page(path)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bad_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elliot Crosby-McCullough