govspeak 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ ## 1.4.0
2
+
3
+ Added `#structured_headers` method to provide heirarchically structured
4
+ headers extracted from markdown text heading tags.
data/lib/govspeak.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'kramdown'
2
2
  require 'govspeak/header_extractor'
3
+ require 'govspeak/structured_header_extractor'
3
4
  require 'govspeak/html_validator'
4
5
  require 'govspeak/html_sanitizer'
5
6
  require 'kramdown/parser/kramdown_with_automatic_external_links'
@@ -55,6 +56,10 @@ module Govspeak
55
56
  Govspeak::HeaderExtractor.convert(kramdown_doc).first
56
57
  end
57
58
 
59
+ def structured_headers
60
+ Govspeak::StructuredHeaderExtractor.new(self).call
61
+ end
62
+
58
63
  def preprocess(source)
59
64
  @@extensions.each do |title,regexp,block|
60
65
  source.gsub!(regexp) {|match|
@@ -0,0 +1,97 @@
1
+ module Govspeak
2
+
3
+ StructuredHeader = Struct.new(:text, :level, :id, :headers) do
4
+ def top_level
5
+ 2
6
+ end
7
+
8
+ def top_level?
9
+ level == top_level
10
+ end
11
+ end
12
+
13
+ class StructuredHeaderExtractor
14
+ def initialize(document)
15
+ @doc = document
16
+ @structured_headers = []
17
+ reset_stack
18
+ end
19
+
20
+ def call
21
+ headers_list.each do |header|
22
+ next if header_higher_than_top_level?(header)
23
+
24
+ if header.top_level?
25
+ add_top_level(header)
26
+ elsif header_at_same_level_as_prev?(header)
27
+ add_sibling(header)
28
+ elsif header_one_level_lower_than_prev?(header)
29
+ add_child(header)
30
+ elsif header_at_higher_level_than_prev?(header)
31
+ add_uncle_or_aunt(header)
32
+ else
33
+ next # ignore semantically invalid headers
34
+ end
35
+
36
+ stack.push(header)
37
+ end
38
+
39
+ structured_headers
40
+ end
41
+
42
+ attr_reader :doc, :stack, :structured_headers
43
+ private :doc, :stack, :structured_headers
44
+
45
+ def headers_list
46
+ @headers_list ||= doc.headers.map { |h|
47
+ StructuredHeader.new(h.text, h.level, h.id, [])
48
+ }
49
+ end
50
+
51
+ def add_top_level(header)
52
+ structured_headers.push(header)
53
+ reset_stack
54
+ end
55
+
56
+ def add_sibling(header)
57
+ stack.pop
58
+ stack.last.headers << header
59
+ end
60
+
61
+ def add_child(header)
62
+ stack.last.headers << header
63
+ end
64
+
65
+ def add_uncle_or_aunt(header)
66
+ pop_stack_to_level(header)
67
+ stack.last.headers << header
68
+ end
69
+
70
+ def header_higher_than_top_level?(header)
71
+ header.level < header.top_level
72
+ end
73
+
74
+ def header_at_same_level_as_prev?(header)
75
+ stack.last && stack.last.level == header.level
76
+ end
77
+
78
+ def header_one_level_lower_than_prev?(header)
79
+ # lower level means level integer is higher
80
+ stack.last && (stack.last.level - header.level == -1)
81
+ end
82
+
83
+ def header_at_higher_level_than_prev?(header)
84
+ # higher level means level integer is lower
85
+ stack.last && (stack.last.level > header.level)
86
+ end
87
+
88
+ def pop_stack_to_level(header)
89
+ times_to_pop = stack.last.level - header.level + 1
90
+ times_to_pop.times { stack.pop }
91
+ end
92
+
93
+ def reset_stack
94
+ @stack = []
95
+ end
96
+ end
97
+ end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "1.3.0"
2
+ VERSION = "1.4.0"
3
3
  end
@@ -0,0 +1,103 @@
1
+ require 'test_helper'
2
+
3
+ class GovspeakStructuredHeadersTest < Test::Unit::TestCase
4
+
5
+ def document_body
6
+ %{
7
+ ## Heading 1
8
+
9
+ ## Heading 2
10
+
11
+ ### Sub heading 2.1
12
+
13
+ ### Sub heading 2.2
14
+
15
+ #### Sub sub heading 2.2.1
16
+
17
+ ### Sub heading 2.3
18
+
19
+ ## Heading 3
20
+
21
+ ## Heading 4
22
+
23
+ ### Sub heading 4.1
24
+
25
+ #### Sub heading 4.1.1
26
+
27
+ ##### Sub heading 4.1.1.1
28
+
29
+ ### Sub heading 4.2
30
+
31
+ ## Heading 5
32
+
33
+ }
34
+ end
35
+
36
+ def doc
37
+ @doc ||= Govspeak::Document.new(document_body)
38
+ end
39
+
40
+ def structured_headers
41
+ doc.structured_headers
42
+ end
43
+
44
+ test "Headings with no sub-headings have an empty headings collection" do
45
+ assert_empty structured_headers.first.headers
46
+ end
47
+
48
+ test "h2s are extracted as top level headings" do
49
+ expected_headings = ["Heading 1", "Heading 2", "Heading 3", "Heading 4", "Heading 5"]
50
+
51
+ assert_equal expected_headings, structured_headers.map(&:text)
52
+ end
53
+
54
+ test "headings can have multiple sub-headings" do
55
+ expected_heading_texts = ["Sub heading 2.1", "Sub heading 2.2", "Sub heading 2.3"]
56
+ assert_equal expected_heading_texts, structured_headers[1].headers.map(&:text)
57
+ end
58
+
59
+ test "h3 following h2s are nested within them" do
60
+ assert_equal "Sub heading 2.1", structured_headers[1].headers[0].text
61
+ end
62
+
63
+ test "h4 following h3s are nested within them" do
64
+ assert_equal "Sub sub heading 2.2.1", structured_headers[1].headers[1].headers[0].text
65
+ end
66
+
67
+ test "h3 can follow an h5" do
68
+ assert_equal "Sub heading 4.2", structured_headers[3].headers[1].text
69
+ end
70
+
71
+ def invalid_document_body
72
+ %{
73
+ ### Invalid heading (h3)
74
+
75
+ ## Heading 1
76
+
77
+ #### Invalid heading (h4)
78
+
79
+ ### Sub heading 1.1
80
+
81
+ # Invalid heading (h1)
82
+
83
+ }
84
+ end
85
+
86
+ def invalid_doc
87
+ @invalid_doc ||= Govspeak::Document.new(invalid_document_body)
88
+ end
89
+
90
+ def invalid_structured_headers
91
+ invalid_doc.structured_headers
92
+ end
93
+
94
+ test "semantically invalid headers are ignored" do
95
+ assert_equal ["Heading 1"], invalid_structured_headers.map(&:text)
96
+
97
+ assert_equal ["Sub heading 1.1"], invalid_structured_headers.first.headers.map(&:text)
98
+ end
99
+
100
+ test "document with single h1 produces no headers" do
101
+ assert_equal [], Govspeak::Document.new("# Heading\n").structured_headers
102
+ end
103
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-02-03 00:00:00.000000000 Z
13
+ date: 2014-03-05 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: kramdown
@@ -140,13 +140,16 @@ files:
140
140
  - lib/govspeak/version.rb
141
141
  - lib/govspeak/html_sanitizer.rb
142
142
  - lib/govspeak/header_extractor.rb
143
+ - lib/govspeak/structured_header_extractor.rb
143
144
  - README.md
145
+ - CHANGELOG.md
144
146
  - Gemfile
145
147
  - Rakefile
146
148
  - test/govspeak_test_helper.rb
147
149
  - test/html_validator_test.rb
148
150
  - test/govspeak_test.rb
149
151
  - test/html_sanitizer_test.rb
152
+ - test/govspeak_structured_headers_test.rb
150
153
  - test/test_helper.rb
151
154
  homepage: http://github.com/alphagov/govspeak
152
155
  licenses: []
@@ -162,7 +165,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
165
  version: '0'
163
166
  segments:
164
167
  - 0
165
- hash: 3587010685364545485
168
+ hash: -3957971095230574253
166
169
  required_rubygems_version: !ruby/object:Gem::Requirement
167
170
  none: false
168
171
  requirements:
@@ -171,7 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
174
  version: '0'
172
175
  segments:
173
176
  - 0
174
- hash: 3587010685364545485
177
+ hash: -3957971095230574253
175
178
  requirements: []
176
179
  rubyforge_project:
177
180
  rubygems_version: 1.8.23
@@ -183,4 +186,5 @@ test_files:
183
186
  - test/html_validator_test.rb
184
187
  - test/govspeak_test.rb
185
188
  - test/html_sanitizer_test.rb
189
+ - test/govspeak_structured_headers_test.rb
186
190
  - test/test_helper.rb