govspeak 5.2.2 → 5.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/govspeak.rb +5 -0
- data/lib/govspeak/link_extractor.rb +28 -0
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_link_extractor_test.rb +42 -0
- metadata +17 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 117026a2b98a5909e89263c2d9f05091b492e8b2
|
4
|
+
data.tar.gz: b6f5a662a795899e4beb399d87ade54181f40e81
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da21f89b2844d3174d1820ecd0aa991ea6a2aac40a58a5967549cdc128f71a46bfd3776e7667f7a28f0add49173c0c2ad0f50f8c7dfb4a9292f87ea2b7698c20
|
7
|
+
data.tar.gz: ae41a3cdb7664e6694f23c96d9f67f24304b813371eea173523627dd695f7e44ce78b00602791c0b8baf106b9e999336da6efe39c23a7334fc5875ed25a112f4
|
data/CHANGELOG.md
CHANGED
data/lib/govspeak.rb
CHANGED
@@ -11,6 +11,7 @@ require 'govspeak/html_sanitizer'
|
|
11
11
|
require 'govspeak/kramdown_overrides'
|
12
12
|
require 'govspeak/blockquote_extra_quote_remover'
|
13
13
|
require 'govspeak/post_processor'
|
14
|
+
require 'govspeak/link_extractor'
|
14
15
|
require 'govspeak/presenters/attachment_presenter'
|
15
16
|
require 'govspeak/presenters/contact_presenter'
|
16
17
|
require 'govspeak/presenters/h_card_presenter'
|
@@ -94,6 +95,10 @@ module Govspeak
|
|
94
95
|
Govspeak::StructuredHeaderExtractor.new(self).call
|
95
96
|
end
|
96
97
|
|
98
|
+
def extracted_links
|
99
|
+
Govspeak::LinkExtractor.new(self).call
|
100
|
+
end
|
101
|
+
|
97
102
|
def preprocess(source)
|
98
103
|
source = Govspeak::BlockquoteExtraQuoteRemover.remove(source)
|
99
104
|
@@extensions.each do |title,regexp,block|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Govspeak
|
2
|
+
class LinkExtractor
|
3
|
+
def initialize(document)
|
4
|
+
@document = document
|
5
|
+
end
|
6
|
+
|
7
|
+
def call
|
8
|
+
@links ||= extract_links
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def extract_links
|
14
|
+
document_anchors.map { |link| link['href'] }
|
15
|
+
end
|
16
|
+
|
17
|
+
def document_anchors
|
18
|
+
processed_govspeak.css('a:not([href^="mailto"])').css('a:not([href^="#"])')
|
19
|
+
end
|
20
|
+
|
21
|
+
def processed_govspeak
|
22
|
+
doc = Nokogiri::HTML::Document.new
|
23
|
+
doc.encoding = "UTF-8"
|
24
|
+
|
25
|
+
doc.fragment(@document.to_html)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/govspeak/version.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
class GovspeakLinkExtractorTest < Minitest::Test
|
4
|
+
def document_body
|
5
|
+
%{
|
6
|
+
## Heading
|
7
|
+
|
8
|
+
[link](http://www.example.com)
|
9
|
+
|
10
|
+
[link_two](http://www.gov.com)
|
11
|
+
|
12
|
+
[not_a_link](#somepage)
|
13
|
+
|
14
|
+
[mailto:](mailto:someone@www.example.com)
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def doc
|
19
|
+
@doc ||= Govspeak::Document.new(document_body)
|
20
|
+
end
|
21
|
+
|
22
|
+
def links
|
23
|
+
doc.extracted_links
|
24
|
+
end
|
25
|
+
|
26
|
+
test "Links are extracted from the body" do
|
27
|
+
expected_links = ["http://www.example.com", "http://www.gov.com"]
|
28
|
+
assert_equal expected_links, links
|
29
|
+
end
|
30
|
+
|
31
|
+
test "Other content is not extracted from the body" do
|
32
|
+
refute_includes ["Heading"], links
|
33
|
+
end
|
34
|
+
|
35
|
+
test "Links are not extracted if they begin with #" do
|
36
|
+
refute_includes ["#somepage"], links
|
37
|
+
end
|
38
|
+
|
39
|
+
test "Links are not extracted if they begin with mailto:" do
|
40
|
+
refute_includes ["mailto:someone@www.example.com"], links
|
41
|
+
end
|
42
|
+
end
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
8
|
-
- James Stewart
|
7
|
+
- GOV.UK Dev
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2018-01-04 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: kramdown
|
@@ -251,8 +250,7 @@ description: |-
|
|
251
250
|
A set of extensions to markdown layered on top of the kramdown
|
252
251
|
library for use in the UK Government Single Domain project
|
253
252
|
email:
|
254
|
-
-
|
255
|
-
- james.stewart@digital.cabinet-office.gov.uk
|
253
|
+
- govuk-dev@digital.cabinet-office.gov.uk
|
256
254
|
executables:
|
257
255
|
- govspeak
|
258
256
|
extensions: []
|
@@ -270,6 +268,7 @@ files:
|
|
270
268
|
- lib/govspeak/html_sanitizer.rb
|
271
269
|
- lib/govspeak/html_validator.rb
|
272
270
|
- lib/govspeak/kramdown_overrides.rb
|
271
|
+
- lib/govspeak/link_extractor.rb
|
273
272
|
- lib/govspeak/post_processor.rb
|
274
273
|
- lib/govspeak/presenters/attachment_presenter.rb
|
275
274
|
- lib/govspeak/presenters/contact_presenter.rb
|
@@ -287,6 +286,7 @@ files:
|
|
287
286
|
- test/govspeak_attachments_test.rb
|
288
287
|
- test/govspeak_button_test.rb
|
289
288
|
- test/govspeak_contacts_test.rb
|
289
|
+
- test/govspeak_link_extractor_test.rb
|
290
290
|
- test/govspeak_link_test.rb
|
291
291
|
- test/govspeak_structured_headers_test.rb
|
292
292
|
- test/govspeak_test.rb
|
@@ -320,18 +320,19 @@ signing_key:
|
|
320
320
|
specification_version: 4
|
321
321
|
summary: Markup language for single domain
|
322
322
|
test_files:
|
323
|
-
- test/blockquote_extra_quote_remover_test.rb
|
324
|
-
- test/govspeak_test_helper.rb
|
325
|
-
- test/govspeak_structured_headers_test.rb
|
326
|
-
- test/govspeak_attachments_image_test.rb
|
327
323
|
- test/govspeak_attachments_test.rb
|
328
|
-
- test/test_helper.rb
|
329
324
|
- test/govspeak_attachments_inline_test.rb
|
330
|
-
- test/html_sanitizer_test.rb
|
331
|
-
- test/govspeak_button_test.rb
|
332
|
-
- test/with_deep_merge_test.rb
|
333
|
-
- test/html_validator_test.rb
|
334
|
-
- test/govspeak_test.rb
|
335
325
|
- test/govspeak_link_test.rb
|
326
|
+
- test/govspeak_link_extractor_test.rb
|
336
327
|
- test/govspeak_contacts_test.rb
|
328
|
+
- test/test_helper.rb
|
329
|
+
- test/govspeak_test.rb
|
330
|
+
- test/govspeak_attachments_image_test.rb
|
331
|
+
- test/with_deep_merge_test.rb
|
332
|
+
- test/html_validator_test.rb
|
333
|
+
- test/govspeak_structured_headers_test.rb
|
334
|
+
- test/blockquote_extra_quote_remover_test.rb
|
337
335
|
- test/presenters/h_card_presenter_test.rb
|
336
|
+
- test/govspeak_button_test.rb
|
337
|
+
- test/govspeak_test_helper.rb
|
338
|
+
- test/html_sanitizer_test.rb
|