slaw 0.8.3 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -0
- data/README.md +10 -0
- data/lib/slaw/parse/blocklists.rb +19 -1
- data/lib/slaw/parse/builder.rb +7 -6
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +54 -57
- data/lib/slaw/za/act_nodes.rb +24 -53
- data/spec/parse/builder_spec.rb +135 -10
- data/spec/za/act_spec.rb +225 -129
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08d012c5c08856227cf9c203f5c932d076115522
|
4
|
+
data.tar.gz: 742fa6d2ff279945b3ed3cdff468c62a9567cd17
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f831cd0db6d4d05a80634645b248cfb8555122e85f59af07cc848cdb32330d3f97eab5d59b883802209eb4d9ebe3079a7d29ad80df20a73b2ec340cd1568d2d
|
7
|
+
data.tar.gz: 21285db937fb61a1d81ef7a8a8359219e2b10c066e36e9d01ec6091d107bdd3f2d8ccb5e5fb06f1cb394ff4b8cf52b78c20cedcaab36ef19b8b271623ef946c0
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -216,6 +216,16 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
|
|
216
216
|
|
217
217
|
## Changelog
|
218
218
|
|
219
|
+
### 0.9.0
|
220
|
+
|
221
|
+
* This release makes reasonably significant changes to generated XML, particularly
|
222
|
+
for sections without explicit subsections.
|
223
|
+
* Blocklists with (aa) following (z) are using the same numbering format.
|
224
|
+
* Change how blockList listIntroduction elements are created to be more generic
|
225
|
+
* Support for sections that dive straight into lists without subsections
|
226
|
+
* Simplify grammar
|
227
|
+
* Fix elements with potentially duplicate ids
|
228
|
+
|
219
229
|
### 0.8.3
|
220
230
|
|
221
231
|
* During cleanup, break lines on section titles that don't have a space after the number, eg: "New section title 4.(1) The content..."
|
@@ -36,7 +36,7 @@ module Slaw
|
|
36
36
|
def self.nest_blocklists(doc)
|
37
37
|
doc.xpath('//a:blockList', a: NS).each do |blocklist|
|
38
38
|
items = blocklist.xpath('a:item', a: NS)
|
39
|
-
nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil)
|
39
|
+
nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil) unless items.empty?
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -47,11 +47,18 @@ module Slaw
|
|
47
47
|
item = items.shift
|
48
48
|
|
49
49
|
sublist_count = 0
|
50
|
+
number_format = our_number_format
|
50
51
|
|
51
52
|
while item and item.name == 'item'
|
52
53
|
number_format = guess_number_format(item, number_format)
|
53
54
|
break unless number_format
|
54
55
|
|
56
|
+
# (aa) after (z) is same numbering type, pretend we've always
|
57
|
+
# been this format
|
58
|
+
if item.num == "(aa)" and item.previous_element and item.previous_element.num == "(z)"
|
59
|
+
our_number_format = number_format
|
60
|
+
end
|
61
|
+
|
55
62
|
if number_format != our_number_format
|
56
63
|
# new sublist, or back to the old list?
|
57
64
|
if number_format < our_number_format
|
@@ -158,6 +165,17 @@ module Slaw
|
|
158
165
|
end
|
159
166
|
end
|
160
167
|
|
168
|
+
# Change p tags preceding a blocklist into listIntroductions within the blocklist
|
169
|
+
def self.fix_intros(doc)
|
170
|
+
doc.xpath('//a:blockList', a: NS).each do |blocklist|
|
171
|
+
prev = blocklist.previous
|
172
|
+
if prev and prev.name == 'p'
|
173
|
+
prev.name = 'listIntroduction'
|
174
|
+
blocklist.prepend_child(prev)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
161
179
|
class NumberingFormat
|
162
180
|
include Comparable
|
163
181
|
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -152,7 +152,7 @@ module Slaw
|
|
152
152
|
def postprocess(doc)
|
153
153
|
normalise_headings(doc)
|
154
154
|
find_short_title(doc)
|
155
|
-
|
155
|
+
adjust_blocklists(doc)
|
156
156
|
|
157
157
|
doc
|
158
158
|
end
|
@@ -353,16 +353,17 @@ module Slaw
|
|
353
353
|
end
|
354
354
|
end
|
355
355
|
|
356
|
-
#
|
356
|
+
# Adjust blocklists:
|
357
357
|
#
|
358
|
-
#
|
359
|
-
#
|
358
|
+
# - nest them correctly
|
359
|
+
# - change preceding p tags into listIntroductions
|
360
360
|
#
|
361
361
|
# @param doc [Nokogiri::XML::Document]
|
362
|
-
def
|
363
|
-
logger.info("
|
362
|
+
def adjust_blocklists(doc)
|
363
|
+
logger.info("Adjusting blocklists")
|
364
364
|
|
365
365
|
Slaw::Parse::Blocklists.nest_blocklists(doc)
|
366
|
+
Slaw::Parse::Blocklists.fix_intros(doc)
|
366
367
|
end
|
367
368
|
|
368
369
|
protected
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act.treetop
CHANGED
@@ -22,38 +22,39 @@ module Slaw
|
|
22
22
|
rule preface
|
23
23
|
!'PREAMBLE'
|
24
24
|
('PREFACE'i space? eol)?
|
25
|
-
statements:(!'PREAMBLE'
|
25
|
+
statements:(!'PREAMBLE' pre_body_statement)* <Preface>
|
26
26
|
end
|
27
27
|
|
28
28
|
rule preamble
|
29
29
|
'PREAMBLE'i space? eol
|
30
|
-
statements:
|
30
|
+
statements:pre_body_statement* <Preamble>
|
31
31
|
end
|
32
32
|
|
33
33
|
rule body
|
34
|
-
children:(chapter / part / section /
|
34
|
+
children:(chapter / part / section / subsection / block_paragraphs )+ <Body>
|
35
35
|
end
|
36
36
|
|
37
37
|
rule chapter
|
38
38
|
heading:chapter_heading
|
39
|
-
children:(part / section /
|
39
|
+
children:(part / section / subsection / block_paragraphs)*
|
40
40
|
<Chapter>
|
41
41
|
end
|
42
42
|
|
43
43
|
rule part
|
44
44
|
heading:part_heading
|
45
|
-
children:(section /
|
45
|
+
children:(section / subsection / block_paragraphs)*
|
46
46
|
<Part>
|
47
47
|
end
|
48
48
|
|
49
49
|
rule section
|
50
50
|
section_title
|
51
|
-
|
51
|
+
children:(subsection / block_paragraphs)* <Section>
|
52
52
|
end
|
53
53
|
|
54
54
|
rule subsection
|
55
|
-
|
56
|
-
|
55
|
+
# TODO: do it make sense to allow an eol here?
|
56
|
+
space? subsection_prefix whitespace eol?
|
57
|
+
children:block_element* <Subsection>
|
57
58
|
end
|
58
59
|
|
59
60
|
rule schedules_container
|
@@ -70,14 +71,6 @@ module Slaw
|
|
70
71
|
<Schedule>
|
71
72
|
end
|
72
73
|
|
73
|
-
rule block_paragraphs
|
74
|
-
block_element+ <BlockParagraph>
|
75
|
-
end
|
76
|
-
|
77
|
-
rule block_element
|
78
|
-
(table / blocklist / naked_statement)
|
79
|
-
end
|
80
|
-
|
81
74
|
##########
|
82
75
|
# group elements
|
83
76
|
#
|
@@ -132,7 +125,7 @@ module Slaw
|
|
132
125
|
end
|
133
126
|
|
134
127
|
rule section_title_content
|
135
|
-
space !
|
128
|
+
space !subsection_prefix content eol
|
136
129
|
end
|
137
130
|
|
138
131
|
rule schedule_title
|
@@ -142,36 +135,29 @@ module Slaw
|
|
142
135
|
end
|
143
136
|
|
144
137
|
##########
|
145
|
-
#
|
138
|
+
# blocks of content inside containers
|
146
139
|
|
147
|
-
rule
|
148
|
-
|
140
|
+
rule block_paragraphs
|
141
|
+
block_element+ <BlockParagraph>
|
149
142
|
end
|
150
143
|
|
151
|
-
rule
|
152
|
-
|
153
|
-
<NakedStatement>
|
144
|
+
rule block_element
|
145
|
+
(table / blocklist / naked_statement)
|
154
146
|
end
|
155
147
|
|
156
|
-
rule
|
157
|
-
|
158
|
-
<ScheduleStatement>
|
148
|
+
rule blocklist
|
149
|
+
blocklist_item+ <Blocklist>
|
159
150
|
end
|
160
151
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
<Clauses>
|
152
|
+
rule blocklist_item
|
153
|
+
space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix content eol)? eol?
|
154
|
+
<BlocklistItem>
|
165
155
|
end
|
166
156
|
|
167
|
-
rule
|
168
|
-
'
|
169
|
-
<Remark>
|
157
|
+
rule blocklist_item_prefix
|
158
|
+
('(' letter_ordinal ')') / dotted_number_3
|
170
159
|
end
|
171
160
|
|
172
|
-
##########
|
173
|
-
# tables
|
174
|
-
|
175
161
|
rule table
|
176
162
|
space? table_start eol? (!table_end content eol)* table_end eol
|
177
163
|
<Table>
|
@@ -185,6 +171,33 @@ module Slaw
|
|
185
171
|
space? '|}'
|
186
172
|
end
|
187
173
|
|
174
|
+
##########
|
175
|
+
# statements - single lines of content
|
176
|
+
|
177
|
+
rule naked_statement
|
178
|
+
space? !(chapter_heading / part_heading / section_title / schedule_title / subsection_prefix) clauses eol
|
179
|
+
<NakedStatement>
|
180
|
+
end
|
181
|
+
|
182
|
+
rule pre_body_statement
|
183
|
+
space? !(chapter_heading / part_heading / section_title / schedule_title) clauses eol
|
184
|
+
<NakedStatement>
|
185
|
+
end
|
186
|
+
|
187
|
+
##########
|
188
|
+
# inline content
|
189
|
+
|
190
|
+
# one or more words, allowing inline elements
|
191
|
+
rule clauses
|
192
|
+
((remark / [^ \t\n]+) [ \t]*)+
|
193
|
+
<Clauses>
|
194
|
+
end
|
195
|
+
|
196
|
+
rule remark
|
197
|
+
'[[' content:(!']]' .)+ ']]'
|
198
|
+
<Remark>
|
199
|
+
end
|
200
|
+
|
188
201
|
##########
|
189
202
|
# prefixes
|
190
203
|
|
@@ -204,7 +217,7 @@ module Slaw
|
|
204
217
|
number_letter '.'?
|
205
218
|
end
|
206
219
|
|
207
|
-
rule
|
220
|
+
rule subsection_prefix
|
208
221
|
# there are two subsection handling syntaxes:
|
209
222
|
#
|
210
223
|
# (1) foo
|
@@ -228,26 +241,6 @@ module Slaw
|
|
228
241
|
num:dotted_number_2 '.'? space
|
229
242
|
end
|
230
243
|
|
231
|
-
##########
|
232
|
-
# blocklists
|
233
|
-
|
234
|
-
rule blocklist
|
235
|
-
blocklist_item+ <Blocklist>
|
236
|
-
end
|
237
|
-
|
238
|
-
rule blocklist_item
|
239
|
-
space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix content eol)?
|
240
|
-
<BlocklistItem>
|
241
|
-
end
|
242
|
-
|
243
|
-
rule blocklist_item_prefix
|
244
|
-
('(' letter_ordinal ')') / dotted_number_3
|
245
|
-
end
|
246
|
-
|
247
|
-
rule letter_ordinal
|
248
|
-
letter (letter / digit)*
|
249
|
-
end
|
250
|
-
|
251
244
|
#########
|
252
245
|
## one line of basic content
|
253
246
|
|
@@ -265,6 +258,10 @@ module Slaw
|
|
265
258
|
number letter*
|
266
259
|
end
|
267
260
|
|
261
|
+
rule letter_ordinal
|
262
|
+
letter (letter / digit)*
|
263
|
+
end
|
264
|
+
|
268
265
|
rule dotted_number_3
|
269
266
|
number '.' number ('.' number)+
|
270
267
|
end
|
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -79,7 +79,7 @@ module Slaw
|
|
79
79
|
class Body < Treetop::Runtime::SyntaxNode
|
80
80
|
def to_xml(b)
|
81
81
|
b.body { |b|
|
82
|
-
children.elements.
|
82
|
+
children.elements.each_with_index { |e, i| e.to_xml(b, '', i) }
|
83
83
|
}
|
84
84
|
end
|
85
85
|
end
|
@@ -207,7 +207,7 @@ module Slaw
|
|
207
207
|
|
208
208
|
idprefix = "#{id}."
|
209
209
|
|
210
|
-
|
210
|
+
children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
211
211
|
}
|
212
212
|
end
|
213
213
|
end
|
@@ -245,70 +245,39 @@ module Slaw
|
|
245
245
|
end
|
246
246
|
end
|
247
247
|
|
248
|
-
class Subsection < Treetop::Runtime::SyntaxNode
|
249
|
-
def to_xml(b, idprefix, i=0)
|
250
|
-
if statement.is_a?(NumberedStatement)
|
251
|
-
attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
|
252
|
-
else
|
253
|
-
attribs = {id: idprefix + "subsection-#{i}"}
|
254
|
-
end
|
255
|
-
|
256
|
-
idprefix = attribs[:id] + "."
|
257
|
-
|
258
|
-
b.subsection(attribs) { |b|
|
259
|
-
b.num(statement.num) if statement.is_a?(NumberedStatement)
|
260
|
-
|
261
|
-
b.content { |b|
|
262
|
-
if blocklist and blocklist.is_a?(Blocklist)
|
263
|
-
if statement.content
|
264
|
-
# provide the statement as the list introduction to the block list
|
265
|
-
blocklist.to_xml(b, idprefix, i) { |b| statement.content.to_xml(b, idprefix) }
|
266
|
-
else
|
267
|
-
blocklist.to_xml(b, idprefix, i)
|
268
|
-
end
|
269
|
-
else
|
270
|
-
# raw content
|
271
|
-
statement.to_xml(b, idprefix)
|
272
|
-
end
|
273
|
-
}
|
274
|
-
}
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
248
|
class BlockParagraph < Treetop::Runtime::SyntaxNode
|
279
249
|
def to_xml(b, idprefix='', i=0)
|
280
|
-
|
250
|
+
id = "#{idprefix}paragraph-0"
|
251
|
+
idprefix = "#{id}."
|
252
|
+
|
253
|
+
b.paragraph(id: id) { |b|
|
281
254
|
b.content { |b|
|
282
|
-
elements.each_with_index { |e, i| e.to_xml(b, idprefix) }
|
255
|
+
elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
283
256
|
}
|
284
257
|
}
|
285
258
|
end
|
286
259
|
end
|
287
260
|
|
288
|
-
class
|
261
|
+
class Subsection < Treetop::Runtime::SyntaxNode
|
289
262
|
def num
|
290
|
-
|
291
|
-
end
|
292
|
-
|
293
|
-
def parentheses?
|
294
|
-
!numbered_statement_prefix.respond_to? :dotted_number_2
|
263
|
+
subsection_prefix.num.text_value
|
295
264
|
end
|
296
265
|
|
297
|
-
def
|
298
|
-
|
299
|
-
|
300
|
-
else
|
301
|
-
elements[3].clauses
|
302
|
-
end
|
303
|
-
end
|
266
|
+
def to_xml(b, idprefix, i)
|
267
|
+
id = idprefix + num.gsub(/[()]/, '')
|
268
|
+
idprefix = id + "."
|
304
269
|
|
305
|
-
|
306
|
-
|
270
|
+
b.subsection(id: id) { |b|
|
271
|
+
b.num(num)
|
272
|
+
b.content { |b|
|
273
|
+
children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
274
|
+
}
|
275
|
+
}
|
307
276
|
end
|
308
277
|
end
|
309
278
|
|
310
279
|
class NakedStatement < Treetop::Runtime::SyntaxNode
|
311
|
-
def to_xml(b, idprefix)
|
280
|
+
def to_xml(b, idprefix, i=0)
|
312
281
|
b.p { |b| clauses.to_xml(b, idprefix) } if clauses
|
313
282
|
end
|
314
283
|
|
@@ -371,7 +340,7 @@ module Slaw
|
|
371
340
|
end
|
372
341
|
|
373
342
|
class Table < Treetop::Runtime::SyntaxNode
|
374
|
-
def to_xml(b, idprefix)
|
343
|
+
def to_xml(b, idprefix, i=0)
|
375
344
|
# parse the table using wikicloth
|
376
345
|
html = WikiCloth::Parser.new({data: self.text_value}).to_html
|
377
346
|
|
@@ -379,7 +348,7 @@ module Slaw
|
|
379
348
|
# an id to the table
|
380
349
|
html = Nokogiri::HTML(html)
|
381
350
|
table = html.css("table").first
|
382
|
-
table['id'] = "#{idprefix}
|
351
|
+
table['id'] = "#{idprefix}table#{i}"
|
383
352
|
|
384
353
|
# wrap td and th content in p tags
|
385
354
|
table.css("td, th").each do |cell|
|
@@ -470,11 +439,13 @@ module Slaw
|
|
470
439
|
}
|
471
440
|
|
472
441
|
b.mainBody { |b|
|
442
|
+
idprefix = "#{id}."
|
443
|
+
|
473
444
|
# there is no good AKN hierarchy container for schedules, so we
|
474
445
|
# just use article because we don't use it anywhere else.
|
475
446
|
b.article(id: id) { |b|
|
476
447
|
b.heading(heading) if heading
|
477
|
-
body.children.elements.
|
448
|
+
body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
|
478
449
|
}
|
479
450
|
}
|
480
451
|
}
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe Slaw::Parse::Builder do
|
|
7
7
|
let(:parser) { double("parser") }
|
8
8
|
subject { Slaw::Parse::Builder.new(parser: parser) }
|
9
9
|
|
10
|
-
describe '#
|
10
|
+
describe '#adjust_blocklists' do
|
11
11
|
it 'should nest simple blocks' do
|
12
12
|
doc = xml2doc(subsection(<<XML
|
13
13
|
<blockList id="section-10.1.lst0">
|
@@ -39,7 +39,7 @@ describe Slaw::Parse::Builder do
|
|
39
39
|
XML
|
40
40
|
))
|
41
41
|
|
42
|
-
subject.
|
42
|
+
subject.adjust_blocklists(doc)
|
43
43
|
doc.to_s.should == subsection(<<XML
|
44
44
|
<blockList id="section-10.1.lst0">
|
45
45
|
<item id="section-10.1.lst0.a">
|
@@ -100,7 +100,7 @@ XML
|
|
100
100
|
XML
|
101
101
|
))
|
102
102
|
|
103
|
-
subject.
|
103
|
+
subject.adjust_blocklists(doc)
|
104
104
|
doc.to_s.should == subsection(<<XML
|
105
105
|
<blockList id="section-10.1.lst0">
|
106
106
|
<item id="section-10.1.lst0.a">
|
@@ -147,7 +147,7 @@ XML
|
|
147
147
|
XML
|
148
148
|
))
|
149
149
|
|
150
|
-
subject.
|
150
|
+
subject.adjust_blocklists(doc)
|
151
151
|
doc.to_s.should == subsection(<<XML
|
152
152
|
<blockList id="section-10.1.lst0">
|
153
153
|
<item id="section-10.1.lst0.h">
|
@@ -196,7 +196,7 @@ XML
|
|
196
196
|
XML
|
197
197
|
))
|
198
198
|
|
199
|
-
subject.
|
199
|
+
subject.adjust_blocklists(doc)
|
200
200
|
doc.to_s.should == subsection(<<XML
|
201
201
|
<blockList id="section-10.1.lst0">
|
202
202
|
<item id="section-10.1.lst0.t">
|
@@ -262,7 +262,7 @@ XML
|
|
262
262
|
XML
|
263
263
|
))
|
264
264
|
|
265
|
-
subject.
|
265
|
+
subject.adjust_blocklists(doc)
|
266
266
|
doc.to_s.should == subsection(<<XML
|
267
267
|
<blockList id="section-28.3.list2">
|
268
268
|
<item id="section-28.3.list2.g">
|
@@ -344,7 +344,7 @@ XML
|
|
344
344
|
XML
|
345
345
|
))
|
346
346
|
|
347
|
-
subject.
|
347
|
+
subject.adjust_blocklists(doc)
|
348
348
|
doc.to_s.should == subsection(<<XML
|
349
349
|
<blockList id="section-28.3.list2">
|
350
350
|
<item id="section-28.3.list2.g">
|
@@ -394,6 +394,106 @@ XML
|
|
394
394
|
|
395
395
|
# -------------------------------------------------------------------------
|
396
396
|
|
397
|
+
it 'should treat (aa) after (z) as siblings' do
|
398
|
+
doc = xml2doc(subsection(<<XML
|
399
|
+
<blockList id="list0">
|
400
|
+
<item id="list0.y">
|
401
|
+
<num>(y)</num>
|
402
|
+
<p>foo</p>
|
403
|
+
</item>
|
404
|
+
<item id="list0.z">
|
405
|
+
<num>(z)</num>
|
406
|
+
<p>item-z</p>
|
407
|
+
</item>
|
408
|
+
<item id="list0.aa">
|
409
|
+
<num>(aa)</num>
|
410
|
+
<p>item-aa</p>
|
411
|
+
</item>
|
412
|
+
<item id="list0.bb">
|
413
|
+
<num>(bb)</num>
|
414
|
+
<p>item-bb</p>
|
415
|
+
</item>
|
416
|
+
</blockList>
|
417
|
+
XML
|
418
|
+
))
|
419
|
+
|
420
|
+
subject.adjust_blocklists(doc)
|
421
|
+
doc.to_s.should == subsection(<<XML
|
422
|
+
<blockList id="list0">
|
423
|
+
<item id="list0.y">
|
424
|
+
<num>(y)</num>
|
425
|
+
<p>foo</p>
|
426
|
+
</item>
|
427
|
+
<item id="list0.z">
|
428
|
+
<num>(z)</num>
|
429
|
+
<p>item-z</p>
|
430
|
+
</item>
|
431
|
+
<item id="list0.aa">
|
432
|
+
<num>(aa)</num>
|
433
|
+
<p>item-aa</p>
|
434
|
+
</item>
|
435
|
+
<item id="list0.bb">
|
436
|
+
<num>(bb)</num>
|
437
|
+
<p>item-bb</p>
|
438
|
+
</item>
|
439
|
+
</blockList>
|
440
|
+
XML
|
441
|
+
)
|
442
|
+
end
|
443
|
+
|
444
|
+
# -------------------------------------------------------------------------
|
445
|
+
|
446
|
+
it 'should treat (AA) after (z) a sublist' do
|
447
|
+
doc = xml2doc(subsection(<<XML
|
448
|
+
<blockList id="list0">
|
449
|
+
<item id="list0.y">
|
450
|
+
<num>(y)</num>
|
451
|
+
<p>foo</p>
|
452
|
+
</item>
|
453
|
+
<item id="list0.z">
|
454
|
+
<num>(z)</num>
|
455
|
+
<p>item-z</p>
|
456
|
+
</item>
|
457
|
+
<item id="list0.AA">
|
458
|
+
<num>(AA)</num>
|
459
|
+
<p>item-AA</p>
|
460
|
+
</item>
|
461
|
+
<item id="list0.BB">
|
462
|
+
<num>(BB)</num>
|
463
|
+
<p>item-BB</p>
|
464
|
+
</item>
|
465
|
+
</blockList>
|
466
|
+
XML
|
467
|
+
))
|
468
|
+
|
469
|
+
subject.adjust_blocklists(doc)
|
470
|
+
doc.to_s.should == subsection(<<XML
|
471
|
+
<blockList id="list0">
|
472
|
+
<item id="list0.y">
|
473
|
+
<num>(y)</num>
|
474
|
+
<p>foo</p>
|
475
|
+
</item>
|
476
|
+
<item id="list0.z">
|
477
|
+
<num>(z)</num>
|
478
|
+
<blockList id="list0.z.list0">
|
479
|
+
<listIntroduction>item-z</listIntroduction>
|
480
|
+
<item id="list0.z.list0.AA">
|
481
|
+
<num>(AA)</num>
|
482
|
+
<p>item-AA</p>
|
483
|
+
</item>
|
484
|
+
<item id="list0.z.list0.BB">
|
485
|
+
<num>(BB)</num>
|
486
|
+
<p>item-BB</p>
|
487
|
+
</item>
|
488
|
+
</blockList>
|
489
|
+
</item>
|
490
|
+
</blockList>
|
491
|
+
XML
|
492
|
+
)
|
493
|
+
end
|
494
|
+
|
495
|
+
# -------------------------------------------------------------------------
|
496
|
+
|
397
497
|
it 'should handle deeply nested lists' do
|
398
498
|
doc = xml2doc(subsection(<<XML
|
399
499
|
<blockList id="list0">
|
@@ -441,7 +541,7 @@ XML
|
|
441
541
|
XML
|
442
542
|
))
|
443
543
|
|
444
|
-
subject.
|
544
|
+
subject.adjust_blocklists(doc)
|
445
545
|
doc.to_s.should == subsection(<<XML
|
446
546
|
<blockList id="list0">
|
447
547
|
<item id="list0.a">
|
@@ -520,7 +620,7 @@ XML
|
|
520
620
|
XML
|
521
621
|
))
|
522
622
|
|
523
|
-
subject.
|
623
|
+
subject.adjust_blocklists(doc)
|
524
624
|
doc.to_s.should == subsection(<<XML
|
525
625
|
<blockList id="section-10.1.lst0">
|
526
626
|
<item id="section-10.1.lst0.h">
|
@@ -567,7 +667,7 @@ XML
|
|
567
667
|
XML
|
568
668
|
))
|
569
669
|
|
570
|
-
subject.
|
670
|
+
subject.adjust_blocklists(doc)
|
571
671
|
doc.to_s.should == subsection(<<XML
|
572
672
|
<blockList id="section-9.subsection-2.list2">
|
573
673
|
<item id="section-9.subsection-2.list2.9.2.1">
|
@@ -585,6 +685,31 @@ XML
|
|
585
685
|
</blockList>
|
586
686
|
</item>
|
587
687
|
</blockList>
|
688
|
+
XML
|
689
|
+
)
|
690
|
+
end
|
691
|
+
|
692
|
+
it 'should handle p tags just before' do
|
693
|
+
doc = xml2doc(subsection(<<XML
|
694
|
+
<p>intro</p>
|
695
|
+
<blockList id="section-10.1.lst0">
|
696
|
+
<item id="section-10.1.lst0.a">
|
697
|
+
<num>(a)</num>
|
698
|
+
<p>foo</p>
|
699
|
+
</item>
|
700
|
+
</blockList>
|
701
|
+
XML
|
702
|
+
))
|
703
|
+
|
704
|
+
subject.adjust_blocklists(doc)
|
705
|
+
doc.to_s.should == subsection(<<XML
|
706
|
+
<blockList id="section-10.1.lst0">
|
707
|
+
<listIntroduction>intro</listIntroduction>
|
708
|
+
<item id="section-10.1.lst0.a">
|
709
|
+
<num>(a)</num>
|
710
|
+
<p>foo</p>
|
711
|
+
</item>
|
712
|
+
</blockList>
|
588
713
|
XML
|
589
714
|
)
|
590
715
|
end
|