govspeak 5.2.2 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/govspeak.rb +5 -0
- data/lib/govspeak/link_extractor.rb +28 -0
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_link_extractor_test.rb +42 -0
- metadata +17 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 117026a2b98a5909e89263c2d9f05091b492e8b2
|
4
|
+
data.tar.gz: b6f5a662a795899e4beb399d87ade54181f40e81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da21f89b2844d3174d1820ecd0aa991ea6a2aac40a58a5967549cdc128f71a46bfd3776e7667f7a28f0add49173c0c2ad0f50f8c7dfb4a9292f87ea2b7698c20
|
7
|
+
data.tar.gz: ae41a3cdb7664e6694f23c96d9f67f24304b813371eea173523627dd695f7e44ce78b00602791c0b8baf106b9e999336da6efe39c23a7334fc5875ed25a112f4
|
data/CHANGELOG.md
CHANGED
data/lib/govspeak.rb
CHANGED
@@ -11,6 +11,7 @@ require 'govspeak/html_sanitizer'
|
|
11
11
|
require 'govspeak/kramdown_overrides'
|
12
12
|
require 'govspeak/blockquote_extra_quote_remover'
|
13
13
|
require 'govspeak/post_processor'
|
14
|
+
require 'govspeak/link_extractor'
|
14
15
|
require 'govspeak/presenters/attachment_presenter'
|
15
16
|
require 'govspeak/presenters/contact_presenter'
|
16
17
|
require 'govspeak/presenters/h_card_presenter'
|
@@ -94,6 +95,10 @@ module Govspeak
|
|
94
95
|
Govspeak::StructuredHeaderExtractor.new(self).call
|
95
96
|
end
|
96
97
|
|
98
|
+
def extracted_links
|
99
|
+
Govspeak::LinkExtractor.new(self).call
|
100
|
+
end
|
101
|
+
|
97
102
|
def preprocess(source)
|
98
103
|
source = Govspeak::BlockquoteExtraQuoteRemover.remove(source)
|
99
104
|
@@extensions.each do |title,regexp,block|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Govspeak
|
2
|
+
class LinkExtractor
|
3
|
+
def initialize(document)
|
4
|
+
@document = document
|
5
|
+
end
|
6
|
+
|
7
|
+
def call
|
8
|
+
@links ||= extract_links
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def extract_links
|
14
|
+
document_anchors.map { |link| link['href'] }
|
15
|
+
end
|
16
|
+
|
17
|
+
def document_anchors
|
18
|
+
processed_govspeak.css('a:not([href^="mailto"])').css('a:not([href^="#"])')
|
19
|
+
end
|
20
|
+
|
21
|
+
def processed_govspeak
|
22
|
+
doc = Nokogiri::HTML::Document.new
|
23
|
+
doc.encoding = "UTF-8"
|
24
|
+
|
25
|
+
doc.fragment(@document.to_html)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/govspeak/version.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
class GovspeakLinkExtractorTest < Minitest::Test
|
4
|
+
def document_body
|
5
|
+
%{
|
6
|
+
## Heading
|
7
|
+
|
8
|
+
[link](http://www.example.com)
|
9
|
+
|
10
|
+
[link_two](http://www.gov.com)
|
11
|
+
|
12
|
+
[not_a_link](#somepage)
|
13
|
+
|
14
|
+
[mailto:](mailto:someone@www.example.com)
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def doc
|
19
|
+
@doc ||= Govspeak::Document.new(document_body)
|
20
|
+
end
|
21
|
+
|
22
|
+
def links
|
23
|
+
doc.extracted_links
|
24
|
+
end
|
25
|
+
|
26
|
+
test "Links are extracted from the body" do
|
27
|
+
expected_links = ["http://www.example.com", "http://www.gov.com"]
|
28
|
+
assert_equal expected_links, links
|
29
|
+
end
|
30
|
+
|
31
|
+
test "Other content is not extracted from the body" do
|
32
|
+
refute_includes ["Heading"], links
|
33
|
+
end
|
34
|
+
|
35
|
+
test "Links are not extracted if they begin with #" do
|
36
|
+
refute_includes ["#somepage"], links
|
37
|
+
end
|
38
|
+
|
39
|
+
test "Links are not extracted if they begin with mailto:" do
|
40
|
+
refute_includes ["mailto:someone@www.example.com"], links
|
41
|
+
end
|
42
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
8
|
-
- James Stewart
|
7
|
+
- GOV.UK Dev
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2018-01-04 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: kramdown
|
@@ -251,8 +250,7 @@ description: |-
|
|
251
250
|
A set of extensions to markdown layered on top of the kramdown
|
252
251
|
library for use in the UK Government Single Domain project
|
253
252
|
email:
|
254
|
-
-
|
255
|
-
- james.stewart@digital.cabinet-office.gov.uk
|
253
|
+
- govuk-dev@digital.cabinet-office.gov.uk
|
256
254
|
executables:
|
257
255
|
- govspeak
|
258
256
|
extensions: []
|
@@ -270,6 +268,7 @@ files:
|
|
270
268
|
- lib/govspeak/html_sanitizer.rb
|
271
269
|
- lib/govspeak/html_validator.rb
|
272
270
|
- lib/govspeak/kramdown_overrides.rb
|
271
|
+
- lib/govspeak/link_extractor.rb
|
273
272
|
- lib/govspeak/post_processor.rb
|
274
273
|
- lib/govspeak/presenters/attachment_presenter.rb
|
275
274
|
- lib/govspeak/presenters/contact_presenter.rb
|
@@ -287,6 +286,7 @@ files:
|
|
287
286
|
- test/govspeak_attachments_test.rb
|
288
287
|
- test/govspeak_button_test.rb
|
289
288
|
- test/govspeak_contacts_test.rb
|
289
|
+
- test/govspeak_link_extractor_test.rb
|
290
290
|
- test/govspeak_link_test.rb
|
291
291
|
- test/govspeak_structured_headers_test.rb
|
292
292
|
- test/govspeak_test.rb
|
@@ -320,18 +320,19 @@ signing_key:
|
|
320
320
|
specification_version: 4
|
321
321
|
summary: Markup language for single domain
|
322
322
|
test_files:
|
323
|
-
- test/blockquote_extra_quote_remover_test.rb
|
324
|
-
- test/govspeak_test_helper.rb
|
325
|
-
- test/govspeak_structured_headers_test.rb
|
326
|
-
- test/govspeak_attachments_image_test.rb
|
327
323
|
- test/govspeak_attachments_test.rb
|
328
|
-
- test/test_helper.rb
|
329
324
|
- test/govspeak_attachments_inline_test.rb
|
330
|
-
- test/html_sanitizer_test.rb
|
331
|
-
- test/govspeak_button_test.rb
|
332
|
-
- test/with_deep_merge_test.rb
|
333
|
-
- test/html_validator_test.rb
|
334
|
-
- test/govspeak_test.rb
|
335
325
|
- test/govspeak_link_test.rb
|
326
|
+
- test/govspeak_link_extractor_test.rb
|
336
327
|
- test/govspeak_contacts_test.rb
|
328
|
+
- test/test_helper.rb
|
329
|
+
- test/govspeak_test.rb
|
330
|
+
- test/govspeak_attachments_image_test.rb
|
331
|
+
- test/with_deep_merge_test.rb
|
332
|
+
- test/html_validator_test.rb
|
333
|
+
- test/govspeak_structured_headers_test.rb
|
334
|
+
- test/blockquote_extra_quote_remover_test.rb
|
337
335
|
- test/presenters/h_card_presenter_test.rb
|
336
|
+
- test/govspeak_button_test.rb
|
337
|
+
- test/govspeak_test_helper.rb
|
338
|
+
- test/html_sanitizer_test.rb
|