govspeak 5.4.0 → 5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/govspeak/link_extractor.rb +12 -7
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_link_extractor_test.rb +20 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 52ab560c9f4e178a3079553808221fed51396b32
|
4
|
+
data.tar.gz: 371d78e8b275ca0c482e925a9e8a0bce71f791e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81bfefe2e2923ef22e97898e1c9183833b220de4ceaec5d53a711dbf674ea513c081fa9bfb4b9633212278b06ab5ec8b3e9366ac0acf88f4f639e5a982b89aad
|
7
|
+
data.tar.gz: 122a67b0d9182529e010c14cc3129b0c81b72182cb272ca6bcd167c337a965f2351398e0c2abac8595501ea1355a102fef37cee070ec0312963833606a0485b7
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 5.5.0
|
2
|
+
* Ignore links with blank or missing `href`s when extracting links from a document with `Govspeak::Document#extracted_links` [#124](https://github.com/alphagov/govspeak/pull/124)
|
3
|
+
|
1
4
|
## 5.4.0
|
2
5
|
* Add an optional `website_root` argument to `Govspeak::Document#extracted_links` in order to get all links as fully qualified URLs [#122](https://github.com/alphagov/govspeak/pull/122)
|
3
6
|
|
@@ -14,17 +14,22 @@ module Govspeak
|
|
14
14
|
attr_reader :document, :website_root
|
15
15
|
|
16
16
|
def extract_links
|
17
|
-
document_anchors.
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
document_anchors.
|
18
|
+
map { |link| extract_href_from_link(link) }.
|
19
|
+
reject(&:blank?)
|
20
|
+
end
|
21
|
+
|
22
|
+
def extract_href_from_link(link)
|
23
|
+
href = link['href'] || ''
|
24
|
+
if website_root && href.start_with?('/')
|
25
|
+
"#{website_root}#{href}"
|
26
|
+
else
|
27
|
+
href
|
23
28
|
end
|
24
29
|
end
|
25
30
|
|
26
31
|
def document_anchors
|
27
|
-
processed_govspeak.css('a:not([href^="mailto"])').css('a:not([href^="#"])')
|
32
|
+
processed_govspeak.css('a[href]').css('a:not([href^="mailto"])').css('a:not([href^="#"])')
|
28
33
|
end
|
29
34
|
|
30
35
|
def processed_govspeak
|
data/lib/govspeak/version.rb
CHANGED
@@ -14,6 +14,20 @@ class GovspeakLinkExtractorTest < Minitest::Test
|
|
14
14
|
[mailto:](mailto:someone@www.example.com)
|
15
15
|
|
16
16
|
[absolute_path](/cais-trwydded-yrru-dros-dro)
|
17
|
+
|
18
|
+
<a href="http://www.example.com/from/html">raw_html_link</a>
|
19
|
+
|
20
|
+
[empty_markdown_link]()
|
21
|
+
|
22
|
+
[a_different_empty_markdown_link]( )
|
23
|
+
|
24
|
+
<a>empty_raw_html_link</a>
|
25
|
+
|
26
|
+
<a href="">a_second_empty_raw_link</a>
|
27
|
+
|
28
|
+
<a href=" ">a_third_empty_raw_link</a>
|
29
|
+
|
30
|
+
<a href>a_fourth_empty_raw_link</a>
|
17
31
|
}
|
18
32
|
end
|
19
33
|
|
@@ -26,7 +40,7 @@ class GovspeakLinkExtractorTest < Minitest::Test
|
|
26
40
|
end
|
27
41
|
|
28
42
|
test "Links are extracted from the body" do
|
29
|
-
expected_links = %w{http://www.example.com http://www.gov.com /cais-trwydded-yrru-dros-dro}
|
43
|
+
expected_links = %w{http://www.example.com http://www.gov.com /cais-trwydded-yrru-dros-dro http://www.example.com/from/html}
|
30
44
|
assert_equal expected_links, links
|
31
45
|
end
|
32
46
|
|
@@ -42,6 +56,11 @@ class GovspeakLinkExtractorTest < Minitest::Test
|
|
42
56
|
refute_includes ["mailto:someone@www.example.com"], links
|
43
57
|
end
|
44
58
|
|
59
|
+
test "Links are not extracted if they are blank" do
|
60
|
+
refute_includes [""], links
|
61
|
+
refute_includes [nil], links
|
62
|
+
end
|
63
|
+
|
45
64
|
test "Absolute links are transformed to a url when website_root passed in" do
|
46
65
|
urls = doc.extracted_links(website_root: "http://www.example.com")
|
47
66
|
assert urls.include?("http://www.example.com/cais-trwydded-yrru-dros-dro")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GOV.UK Dev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kramdown
|