govspeak 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ ## 1.4.0
2
+
3
+ Added `#structured_headers` method to provide heirarchically structured
4
+ headers extracted from markdown text heading tags.
data/lib/govspeak.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'kramdown'
2
2
  require 'govspeak/header_extractor'
3
+ require 'govspeak/structured_header_extractor'
3
4
  require 'govspeak/html_validator'
4
5
  require 'govspeak/html_sanitizer'
5
6
  require 'kramdown/parser/kramdown_with_automatic_external_links'
@@ -55,6 +56,10 @@ module Govspeak
55
56
  Govspeak::HeaderExtractor.convert(kramdown_doc).first
56
57
  end
57
58
 
59
+ def structured_headers
60
+ Govspeak::StructuredHeaderExtractor.new(self).call
61
+ end
62
+
58
63
  def preprocess(source)
59
64
  @@extensions.each do |title,regexp,block|
60
65
  source.gsub!(regexp) {|match|
@@ -0,0 +1,97 @@
1
+ module Govspeak
2
+
3
+ StructuredHeader = Struct.new(:text, :level, :id, :headers) do
4
+ def top_level
5
+ 2
6
+ end
7
+
8
+ def top_level?
9
+ level == top_level
10
+ end
11
+ end
12
+
13
+ class StructuredHeaderExtractor
14
+ def initialize(document)
15
+ @doc = document
16
+ @structured_headers = []
17
+ reset_stack
18
+ end
19
+
20
+ def call
21
+ headers_list.each do |header|
22
+ next if header_higher_than_top_level?(header)
23
+
24
+ if header.top_level?
25
+ add_top_level(header)
26
+ elsif header_at_same_level_as_prev?(header)
27
+ add_sibling(header)
28
+ elsif header_one_level_lower_than_prev?(header)
29
+ add_child(header)
30
+ elsif header_at_higher_level_than_prev?(header)
31
+ add_uncle_or_aunt(header)
32
+ else
33
+ next # ignore semantically invalid headers
34
+ end
35
+
36
+ stack.push(header)
37
+ end
38
+
39
+ structured_headers
40
+ end
41
+
42
+ attr_reader :doc, :stack, :structured_headers
43
+ private :doc, :stack, :structured_headers
44
+
45
+ def headers_list
46
+ @headers_list ||= doc.headers.map { |h|
47
+ StructuredHeader.new(h.text, h.level, h.id, [])
48
+ }
49
+ end
50
+
51
+ def add_top_level(header)
52
+ structured_headers.push(header)
53
+ reset_stack
54
+ end
55
+
56
+ def add_sibling(header)
57
+ stack.pop
58
+ stack.last.headers << header
59
+ end
60
+
61
+ def add_child(header)
62
+ stack.last.headers << header
63
+ end
64
+
65
+ def add_uncle_or_aunt(header)
66
+ pop_stack_to_level(header)
67
+ stack.last.headers << header
68
+ end
69
+
70
+ def header_higher_than_top_level?(header)
71
+ header.level < header.top_level
72
+ end
73
+
74
+ def header_at_same_level_as_prev?(header)
75
+ stack.last && stack.last.level == header.level
76
+ end
77
+
78
+ def header_one_level_lower_than_prev?(header)
79
+ # lower level means level integer is higher
80
+ stack.last && (stack.last.level - header.level == -1)
81
+ end
82
+
83
+ def header_at_higher_level_than_prev?(header)
84
+ # higher level means level integer is lower
85
+ stack.last && (stack.last.level > header.level)
86
+ end
87
+
88
+ def pop_stack_to_level(header)
89
+ times_to_pop = stack.last.level - header.level + 1
90
+ times_to_pop.times { stack.pop }
91
+ end
92
+
93
+ def reset_stack
94
+ @stack = []
95
+ end
96
+ end
97
+ end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "1.3.0"
2
+ VERSION = "1.4.0"
3
3
  end
@@ -0,0 +1,103 @@
1
+ require 'test_helper'
2
+
3
+ class GovspeakStructuredHeadersTest < Test::Unit::TestCase
4
+
5
+ def document_body
6
+ %{
7
+ ## Heading 1
8
+
9
+ ## Heading 2
10
+
11
+ ### Sub heading 2.1
12
+
13
+ ### Sub heading 2.2
14
+
15
+ #### Sub sub heading 2.2.1
16
+
17
+ ### Sub heading 2.3
18
+
19
+ ## Heading 3
20
+
21
+ ## Heading 4
22
+
23
+ ### Sub heading 4.1
24
+
25
+ #### Sub heading 4.1.1
26
+
27
+ ##### Sub heading 4.1.1.1
28
+
29
+ ### Sub heading 4.2
30
+
31
+ ## Heading 5
32
+
33
+ }
34
+ end
35
+
36
+ def doc
37
+ @doc ||= Govspeak::Document.new(document_body)
38
+ end
39
+
40
+ def structured_headers
41
+ doc.structured_headers
42
+ end
43
+
44
+ test "Headings with no sub-headings have an empty headings collection" do
45
+ assert_empty structured_headers.first.headers
46
+ end
47
+
48
+ test "h2s are extracted as top level headings" do
49
+ expected_headings = ["Heading 1", "Heading 2", "Heading 3", "Heading 4", "Heading 5"]
50
+
51
+ assert_equal expected_headings, structured_headers.map(&:text)
52
+ end
53
+
54
+ test "headings can have multiple sub-headings" do
55
+ expected_heading_texts = ["Sub heading 2.1", "Sub heading 2.2", "Sub heading 2.3"]
56
+ assert_equal expected_heading_texts, structured_headers[1].headers.map(&:text)
57
+ end
58
+
59
+ test "h3 following h2s are nested within them" do
60
+ assert_equal "Sub heading 2.1", structured_headers[1].headers[0].text
61
+ end
62
+
63
+ test "h4 following h3s are nested within them" do
64
+ assert_equal "Sub sub heading 2.2.1", structured_headers[1].headers[1].headers[0].text
65
+ end
66
+
67
+ test "h3 can follow an h5" do
68
+ assert_equal "Sub heading 4.2", structured_headers[3].headers[1].text
69
+ end
70
+
71
+ def invalid_document_body
72
+ %{
73
+ ### Invalid heading (h3)
74
+
75
+ ## Heading 1
76
+
77
+ #### Invalid heading (h4)
78
+
79
+ ### Sub heading 1.1
80
+
81
+ # Invalid heading (h1)
82
+
83
+ }
84
+ end
85
+
86
+ def invalid_doc
87
+ @invalid_doc ||= Govspeak::Document.new(invalid_document_body)
88
+ end
89
+
90
+ def invalid_structured_headers
91
+ invalid_doc.structured_headers
92
+ end
93
+
94
+ test "semantically invalid headers are ignored" do
95
+ assert_equal ["Heading 1"], invalid_structured_headers.map(&:text)
96
+
97
+ assert_equal ["Sub heading 1.1"], invalid_structured_headers.first.headers.map(&:text)
98
+ end
99
+
100
+ test "document with single h1 produces no headers" do
101
+ assert_equal [], Govspeak::Document.new("# Heading\n").structured_headers
102
+ end
103
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-02-03 00:00:00.000000000 Z
13
+ date: 2014-03-05 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: kramdown
@@ -140,13 +140,16 @@ files:
140
140
  - lib/govspeak/version.rb
141
141
  - lib/govspeak/html_sanitizer.rb
142
142
  - lib/govspeak/header_extractor.rb
143
+ - lib/govspeak/structured_header_extractor.rb
143
144
  - README.md
145
+ - CHANGELOG.md
144
146
  - Gemfile
145
147
  - Rakefile
146
148
  - test/govspeak_test_helper.rb
147
149
  - test/html_validator_test.rb
148
150
  - test/govspeak_test.rb
149
151
  - test/html_sanitizer_test.rb
152
+ - test/govspeak_structured_headers_test.rb
150
153
  - test/test_helper.rb
151
154
  homepage: http://github.com/alphagov/govspeak
152
155
  licenses: []
@@ -162,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
165
  version: '0'
163
166
  segments:
164
167
  - 0
165
- hash: 3587010685364545485
168
+ hash: -3957971095230574253
166
169
  required_rubygems_version: !ruby/object:Gem::Requirement
167
170
  none: false
168
171
  requirements:
@@ -171,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
174
  version: '0'
172
175
  segments:
173
176
  - 0
174
- hash: 3587010685364545485
177
+ hash: -3957971095230574253
175
178
  requirements: []
176
179
  rubyforge_project:
177
180
  rubygems_version: 1.8.23
@@ -183,4 +186,5 @@ test_files:
183
186
  - test/html_validator_test.rb
184
187
  - test/govspeak_test.rb
185
188
  - test/html_sanitizer_test.rb
189
+ - test/govspeak_structured_headers_test.rb
186
190
  - test/test_helper.rb