govspeak 5.3.0 → 5.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/govspeak.rb +2 -2
- data/lib/govspeak/link_extractor.rb +12 -3
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_link_extractor_test.rb +8 -1
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 40e1c3a28d1f53eab54a104139116e0405c4450a
|
4
|
+
data.tar.gz: 89520dfd370e31fd803fa8d127fb546ab35521e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0afe56bcf4cd1160adb293500706b3ee3d2198b53792d18b7e8ad263ba7b81c3a770bfd63bd25e41b7ea9063d9243e31679e2fafa0b70e9a06e74f46eeb740f9
|
7
|
+
data.tar.gz: ff7509b60e910c01f83a8fbb7a0326b008898903186167609221c9f38fa805624f214139cea19afd2feb8181043aaf11faf455db51f5ca975078d7946c8b4bea
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 5.4.0
|
2
|
+
* Add an optional `website_root` argument to `Govspeak::Document#extracted_links` in order to get all links as fully qualified URLs [#122](https://github.com/alphagov/govspeak/pull/122)
|
3
|
+
|
1
4
|
## 5.3.0
|
2
5
|
* Add a link extraction class for finding links in documents [#120](https://github.com/alphagov/govspeak/pull/120)
|
3
6
|
|
data/lib/govspeak.rb
CHANGED
@@ -95,8 +95,8 @@ module Govspeak
|
|
95
95
|
Govspeak::StructuredHeaderExtractor.new(self).call
|
96
96
|
end
|
97
97
|
|
98
|
-
def extracted_links
|
99
|
-
Govspeak::LinkExtractor.new(self).call
|
98
|
+
def extracted_links(website_root: nil)
|
99
|
+
Govspeak::LinkExtractor.new(self, website_root: website_root).call
|
100
100
|
end
|
101
101
|
|
102
102
|
def preprocess(source)
|
@@ -1,7 +1,8 @@
|
|
1
1
|
module Govspeak
|
2
2
|
class LinkExtractor
|
3
|
-
def initialize(document)
|
3
|
+
def initialize(document, website_root: nil)
|
4
4
|
@document = document
|
5
|
+
@website_root = website_root
|
5
6
|
end
|
6
7
|
|
7
8
|
def call
|
@@ -10,8 +11,16 @@ module Govspeak
|
|
10
11
|
|
11
12
|
private
|
12
13
|
|
14
|
+
attr_reader :document, :website_root
|
15
|
+
|
13
16
|
def extract_links
|
14
|
-
document_anchors.map
|
17
|
+
document_anchors.map do |link|
|
18
|
+
if website_root && link['href'].start_with?('/')
|
19
|
+
"#{website_root}#{link['href']}"
|
20
|
+
else
|
21
|
+
link['href']
|
22
|
+
end
|
23
|
+
end
|
15
24
|
end
|
16
25
|
|
17
26
|
def document_anchors
|
@@ -22,7 +31,7 @@ module Govspeak
|
|
22
31
|
doc = Nokogiri::HTML::Document.new
|
23
32
|
doc.encoding = "UTF-8"
|
24
33
|
|
25
|
-
doc.fragment(
|
34
|
+
doc.fragment(document.to_html)
|
26
35
|
end
|
27
36
|
end
|
28
37
|
end
|
data/lib/govspeak/version.rb
CHANGED
@@ -12,6 +12,8 @@ class GovspeakLinkExtractorTest < Minitest::Test
|
|
12
12
|
[not_a_link](#somepage)
|
13
13
|
|
14
14
|
[mailto:](mailto:someone@www.example.com)
|
15
|
+
|
16
|
+
[absolute_path](/cais-trwydded-yrru-dros-dro)
|
15
17
|
}
|
16
18
|
end
|
17
19
|
|
@@ -24,7 +26,7 @@ class GovspeakLinkExtractorTest < Minitest::Test
|
|
24
26
|
end
|
25
27
|
|
26
28
|
test "Links are extracted from the body" do
|
27
|
-
expected_links =
|
29
|
+
expected_links = %w{http://www.example.com http://www.gov.com /cais-trwydded-yrru-dros-dro}
|
28
30
|
assert_equal expected_links, links
|
29
31
|
end
|
30
32
|
|
@@ -39,4 +41,9 @@ class GovspeakLinkExtractorTest < Minitest::Test
|
|
39
41
|
test "Links are not extracted if they begin with mailto:" do
|
40
42
|
refute_includes ["mailto:someone@www.example.com"], links
|
41
43
|
end
|
44
|
+
|
45
|
+
test "Absolute links are transformed to a url when website_root passed in" do
|
46
|
+
urls = doc.extracted_links(website_root: "http://www.example.com")
|
47
|
+
assert urls.include?("http://www.example.com/cais-trwydded-yrru-dros-dro")
|
48
|
+
end
|
42
49
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GOV.UK Dev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01-
|
11
|
+
date: 2018-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kramdown
|
@@ -320,19 +320,19 @@ signing_key:
|
|
320
320
|
specification_version: 4
|
321
321
|
summary: Markup language for single domain
|
322
322
|
test_files:
|
323
|
-
- test/govspeak_attachments_test.rb
|
324
|
-
- test/govspeak_attachments_inline_test.rb
|
325
|
-
- test/govspeak_link_test.rb
|
326
323
|
- test/govspeak_link_extractor_test.rb
|
324
|
+
- test/govspeak_structured_headers_test.rb
|
325
|
+
- test/govspeak_button_test.rb
|
326
|
+
- test/with_deep_merge_test.rb
|
327
|
+
- test/blockquote_extra_quote_remover_test.rb
|
328
|
+
- test/govspeak_test_helper.rb
|
329
|
+
- test/govspeak_link_test.rb
|
327
330
|
- test/govspeak_contacts_test.rb
|
328
331
|
- test/test_helper.rb
|
332
|
+
- test/html_validator_test.rb
|
333
|
+
- test/html_sanitizer_test.rb
|
334
|
+
- test/govspeak_attachments_inline_test.rb
|
329
335
|
- test/govspeak_test.rb
|
336
|
+
- test/govspeak_attachments_test.rb
|
330
337
|
- test/govspeak_attachments_image_test.rb
|
331
|
-
- test/with_deep_merge_test.rb
|
332
|
-
- test/html_validator_test.rb
|
333
|
-
- test/govspeak_structured_headers_test.rb
|
334
|
-
- test/blockquote_extra_quote_remover_test.rb
|
335
338
|
- test/presenters/h_card_presenter_test.rb
|
336
|
-
- test/govspeak_button_test.rb
|
337
|
-
- test/govspeak_test_helper.rb
|
338
|
-
- test/html_sanitizer_test.rb
|