slaw 0.8.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -0
- data/README.md +10 -0
- data/lib/slaw/parse/blocklists.rb +19 -1
- data/lib/slaw/parse/builder.rb +7 -6
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +54 -57
- data/lib/slaw/za/act_nodes.rb +24 -53
- data/spec/parse/builder_spec.rb +135 -10
- data/spec/za/act_spec.rb +225 -129
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08d012c5c08856227cf9c203f5c932d076115522
|
4
|
+
data.tar.gz: 742fa6d2ff279945b3ed3cdff468c62a9567cd17
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f831cd0db6d4d05a80634645b248cfb8555122e85f59af07cc848cdb32330d3f97eab5d59b883802209eb4d9ebe3079a7d29ad80df20a73b2ec340cd1568d2d
|
7
|
+
data.tar.gz: 21285db937fb61a1d81ef7a8a8359219e2b10c066e36e9d01ec6091d107bdd3f2d8ccb5e5fb06f1cb394ff4b8cf52b78c20cedcaab36ef19b8b271623ef946c0
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -216,6 +216,16 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
|
|
216
216
|
|
217
217
|
## Changelog
|
218
218
|
|
219
|
+
### 0.9.0
|
220
|
+
|
221
|
+
* This release makes reasonably significant changes to generated XML, particularly
|
222
|
+
for sections without explicit subsections.
|
223
|
+
* Blocklists with (aa) following (z) are using the same numbering format.
|
224
|
+
* Change how blockList listIntroduction elements are created to be more generic
|
225
|
+
* Support for sections that dive straight into lists without subsections
|
226
|
+
* Simplify grammar
|
227
|
+
* Fix elements with potentially duplicate ids
|
228
|
+
|
219
229
|
### 0.8.3
|
220
230
|
|
221
231
|
* During cleanup, break lines on section titles that don't have a space after the number, eg: "New section title 4.(1) The content..."
|
@@ -36,7 +36,7 @@ module Slaw
|
|
36
36
|
def self.nest_blocklists(doc)
|
37
37
|
doc.xpath('//a:blockList', a: NS).each do |blocklist|
|
38
38
|
items = blocklist.xpath('a:item', a: NS)
|
39
|
-
nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil)
|
39
|
+
nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil) unless items.empty?
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -47,11 +47,18 @@ module Slaw
|
|
47
47
|
item = items.shift
|
48
48
|
|
49
49
|
sublist_count = 0
|
50
|
+
number_format = our_number_format
|
50
51
|
|
51
52
|
while item and item.name == 'item'
|
52
53
|
number_format = guess_number_format(item, number_format)
|
53
54
|
break unless number_format
|
54
55
|
|
56
|
+
# (aa) after (z) is same numbering type, pretend we've always
|
57
|
+
# been this format
|
58
|
+
if item.num == "(aa)" and item.previous_element and item.previous_element.num == "(z)"
|
59
|
+
our_number_format = number_format
|
60
|
+
end
|
61
|
+
|
55
62
|
if number_format != our_number_format
|
56
63
|
# new sublist, or back to the old list?
|
57
64
|
if number_format < our_number_format
|
@@ -158,6 +165,17 @@ module Slaw
|
|
158
165
|
end
|
159
166
|
end
|
160
167
|
|
168
|
+
# Change p tags preceding a blocklist into listIntroductions within the blocklist
|
169
|
+
def self.fix_intros(doc)
|
170
|
+
doc.xpath('//a:blockList', a: NS).each do |blocklist|
|
171
|
+
prev = blocklist.previous
|
172
|
+
if prev and prev.name == 'p'
|
173
|
+
prev.name = 'listIntroduction'
|
174
|
+
blocklist.prepend_child(prev)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
161
179
|
class NumberingFormat
|
162
180
|
include Comparable
|
163
181
|
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -152,7 +152,7 @@ module Slaw
|
|
152
152
|
def postprocess(doc)
|
153
153
|
normalise_headings(doc)
|
154
154
|
find_short_title(doc)
|
155
|
-
|
155
|
+
adjust_blocklists(doc)
|
156
156
|
|
157
157
|
doc
|
158
158
|
end
|
@@ -353,16 +353,17 @@ module Slaw
|
|
353
353
|
end
|
354
354
|
end
|
355
355
|
|
356
|
-
#
|
356
|
+
# Adjust blocklists:
|
357
357
|
#
|
358
|
-
#
|
359
|
-
#
|
358
|
+
# - nest them correctly
|
359
|
+
# - change preceding p tags into listIntroductions
|
360
360
|
#
|
361
361
|
# @param doc [Nokogiri::XML::Document]
|
362
|
-
def
|
363
|
-
logger.info("
|
362
|
+
def adjust_blocklists(doc)
|
363
|
+
logger.info("Adjusting blocklists")
|
364
364
|
|
365
365
|
Slaw::Parse::Blocklists.nest_blocklists(doc)
|
366
|
+
Slaw::Parse::Blocklists.fix_intros(doc)
|
366
367
|
end
|
367
368
|
|
368
369
|
protected
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act.treetop
CHANGED
@@ -22,38 +22,39 @@ module Slaw
|
|
22
22
|
rule preface
|
23
23
|
!'PREAMBLE'
|
24
24
|
('PREFACE'i space? eol)?
|
25
|
-
statements:(!'PREAMBLE'
|
25
|
+
statements:(!'PREAMBLE' pre_body_statement)* <Preface>
|
26
26
|
end
|
27
27
|
|
28
28
|
rule preamble
|
29
29
|
'PREAMBLE'i space? eol
|
30
|
-
statements:
|
30
|
+
statements:pre_body_statement* <Preamble>
|
31
31
|
end
|
32
32
|
|
33
33
|
rule body
|
34
|
-
children:(chapter / part / section /
|
34
|
+
children:(chapter / part / section / subsection / block_paragraphs )+ <Body>
|
35
35
|
end
|
36
36
|
|
37
37
|
rule chapter
|
38
38
|
heading:chapter_heading
|
39
|
-
children:(part / section /
|
39
|
+
children:(part / section / subsection / block_paragraphs)*
|
40
40
|
<Chapter>
|
41
41
|
end
|
42
42
|
|
43
43
|
rule part
|
44
44
|
heading:part_heading
|
45
|
-
children:(section /
|
45
|
+
children:(section / subsection / block_paragraphs)*
|
46
46
|
<Part>
|
47
47
|
end
|
48
48
|
|
49
49
|
rule section
|
50
50
|
section_title
|
51
|
-
|
51
|
+
children:(subsection / block_paragraphs)* <Section>
|
52
52
|
end
|
53
53
|
|
54
54
|
rule subsection
|
55
|
-
|
56
|
-
|
55
|
+
# TODO: do it make sense to allow an eol here?
|
56
|
+
space? subsection_prefix whitespace eol?
|
57
|
+
children:block_element* <Subsection>
|
57
58
|
end
|
58
59
|
|
59
60
|
rule schedules_container
|
@@ -70,14 +71,6 @@ module Slaw
|
|
70
71
|
<Schedule>
|
71
72
|
end
|
72
73
|
|
73
|
-
rule block_paragraphs
|
74
|
-
block_element+ <BlockParagraph>
|
75
|
-
end
|
76
|
-
|
77
|
-
rule block_element
|
78
|
-
(table / blocklist / naked_statement)
|
79
|
-
end
|
80
|
-
|
81
74
|
##########
|
82
75
|
# group elements
|
83
76
|
#
|
@@ -132,7 +125,7 @@ module Slaw
|
|
132
125
|
end
|
133
126
|
|
134
127
|
rule section_title_content
|
135
|
-
space !
|
128
|
+
space !subsection_prefix content eol
|
136
129
|
end
|
137
130
|
|
138
131
|
rule schedule_title
|
@@ -142,36 +135,29 @@ module Slaw
|
|
142
135
|
end
|
143
136
|
|
144
137
|
##########
|
145
|
-
#
|
138
|
+
# blocks of content inside containers
|
146
139
|
|
147
|
-
rule
|
148
|
-
|
140
|
+
rule block_paragraphs
|
141
|
+
block_element+ <BlockParagraph>
|
149
142
|
end
|
150
143
|
|
151
|
-
rule
|
152
|
-
|
153
|
-
<NakedStatement>
|
144
|
+
rule block_element
|
145
|
+
(table / blocklist / naked_statement)
|
154
146
|
end
|
155
147
|
|
156
|
-
rule
|
157
|
-
|
158
|
-
<ScheduleStatement>
|
148
|
+
rule blocklist
|
149
|
+
blocklist_item+ <Blocklist>
|
159
150
|
end
|
160
151
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
<Clauses>
|
152
|
+
rule blocklist_item
|
153
|
+
space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix content eol)? eol?
|
154
|
+
<BlocklistItem>
|
165
155
|
end
|
166
156
|
|
167
|
-
rule
|
168
|
-
'
|
169
|
-
<Remark>
|
157
|
+
rule blocklist_item_prefix
|
158
|
+
('(' letter_ordinal ')') / dotted_number_3
|
170
159
|
end
|
171
160
|
|
172
|
-
##########
|
173
|
-
# tables
|
174
|
-
|
175
161
|
rule table
|
176
162
|
space? table_start eol? (!table_end content eol)* table_end eol
|
177
163
|
<Table>
|
@@ -185,6 +171,33 @@ module Slaw
|
|
185
171
|
space? '|}'
|
186
172
|
end
|
187
173
|
|
174
|
+
##########
|
175
|
+
# statements - single lines of content
|
176
|
+
|
177
|
+
rule naked_statement
|
178
|
+
space? !(chapter_heading / part_heading / section_title / schedule_title / subsection_prefix) clauses eol
|
179
|
+
<NakedStatement>
|
180
|
+
end
|
181
|
+
|
182
|
+
rule pre_body_statement
|
183
|
+
space? !(chapter_heading / part_heading / section_title / schedule_title) clauses eol
|
184
|
+
<NakedStatement>
|
185
|
+
end
|
186
|
+
|
187
|
+
##########
|
188
|
+
# inline content
|
189
|
+
|
190
|
+
# one or more words, allowing inline elements
|
191
|
+
rule clauses
|
192
|
+
((remark / [^ \t\n]+) [ \t]*)+
|
193
|
+
<Clauses>
|
194
|
+
end
|
195
|
+
|
196
|
+
rule remark
|
197
|
+
'[[' content:(!']]' .)+ ']]'
|
198
|
+
<Remark>
|
199
|
+
end
|
200
|
+
|
188
201
|
##########
|
189
202
|
# prefixes
|
190
203
|
|
@@ -204,7 +217,7 @@ module Slaw
|
|
204
217
|
number_letter '.'?
|
205
218
|
end
|
206
219
|
|
207
|
-
rule
|
220
|
+
rule subsection_prefix
|
208
221
|
# there are two subsection handling syntaxes:
|
209
222
|
#
|
210
223
|
# (1) foo
|
@@ -228,26 +241,6 @@ module Slaw
|
|
228
241
|
num:dotted_number_2 '.'? space
|
229
242
|
end
|
230
243
|
|
231
|
-
##########
|
232
|
-
# blocklists
|
233
|
-
|
234
|
-
rule blocklist
|
235
|
-
blocklist_item+ <Blocklist>
|
236
|
-
end
|
237
|
-
|
238
|
-
rule blocklist_item
|
239
|
-
space? blocklist_item_prefix whitespace item_content:(!blocklist_item_prefix content eol)?
|
240
|
-
<BlocklistItem>
|
241
|
-
end
|
242
|
-
|
243
|
-
rule blocklist_item_prefix
|
244
|
-
('(' letter_ordinal ')') / dotted_number_3
|
245
|
-
end
|
246
|
-
|
247
|
-
rule letter_ordinal
|
248
|
-
letter (letter / digit)*
|
249
|
-
end
|
250
|
-
|
251
244
|
#########
|
252
245
|
## one line of basic content
|
253
246
|
|
@@ -265,6 +258,10 @@ module Slaw
|
|
265
258
|
number letter*
|
266
259
|
end
|
267
260
|
|
261
|
+
rule letter_ordinal
|
262
|
+
letter (letter / digit)*
|
263
|
+
end
|
264
|
+
|
268
265
|
rule dotted_number_3
|
269
266
|
number '.' number ('.' number)+
|
270
267
|
end
|
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -79,7 +79,7 @@ module Slaw
|
|
79
79
|
class Body < Treetop::Runtime::SyntaxNode
|
80
80
|
def to_xml(b)
|
81
81
|
b.body { |b|
|
82
|
-
children.elements.
|
82
|
+
children.elements.each_with_index { |e, i| e.to_xml(b, '', i) }
|
83
83
|
}
|
84
84
|
end
|
85
85
|
end
|
@@ -207,7 +207,7 @@ module Slaw
|
|
207
207
|
|
208
208
|
idprefix = "#{id}."
|
209
209
|
|
210
|
-
|
210
|
+
children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
211
211
|
}
|
212
212
|
end
|
213
213
|
end
|
@@ -245,70 +245,39 @@ module Slaw
|
|
245
245
|
end
|
246
246
|
end
|
247
247
|
|
248
|
-
class Subsection < Treetop::Runtime::SyntaxNode
|
249
|
-
def to_xml(b, idprefix, i=0)
|
250
|
-
if statement.is_a?(NumberedStatement)
|
251
|
-
attribs = {id: idprefix + statement.num.gsub(/[()]/, '')}
|
252
|
-
else
|
253
|
-
attribs = {id: idprefix + "subsection-#{i}"}
|
254
|
-
end
|
255
|
-
|
256
|
-
idprefix = attribs[:id] + "."
|
257
|
-
|
258
|
-
b.subsection(attribs) { |b|
|
259
|
-
b.num(statement.num) if statement.is_a?(NumberedStatement)
|
260
|
-
|
261
|
-
b.content { |b|
|
262
|
-
if blocklist and blocklist.is_a?(Blocklist)
|
263
|
-
if statement.content
|
264
|
-
# provide the statement as the list introduction to the block list
|
265
|
-
blocklist.to_xml(b, idprefix, i) { |b| statement.content.to_xml(b, idprefix) }
|
266
|
-
else
|
267
|
-
blocklist.to_xml(b, idprefix, i)
|
268
|
-
end
|
269
|
-
else
|
270
|
-
# raw content
|
271
|
-
statement.to_xml(b, idprefix)
|
272
|
-
end
|
273
|
-
}
|
274
|
-
}
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
248
|
class BlockParagraph < Treetop::Runtime::SyntaxNode
|
279
249
|
def to_xml(b, idprefix='', i=0)
|
280
|
-
|
250
|
+
id = "#{idprefix}paragraph-0"
|
251
|
+
idprefix = "#{id}."
|
252
|
+
|
253
|
+
b.paragraph(id: id) { |b|
|
281
254
|
b.content { |b|
|
282
|
-
elements.each_with_index { |e, i| e.to_xml(b, idprefix) }
|
255
|
+
elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
283
256
|
}
|
284
257
|
}
|
285
258
|
end
|
286
259
|
end
|
287
260
|
|
288
|
-
class
|
261
|
+
class Subsection < Treetop::Runtime::SyntaxNode
|
289
262
|
def num
|
290
|
-
|
291
|
-
end
|
292
|
-
|
293
|
-
def parentheses?
|
294
|
-
!numbered_statement_prefix.respond_to? :dotted_number_2
|
263
|
+
subsection_prefix.num.text_value
|
295
264
|
end
|
296
265
|
|
297
|
-
def
|
298
|
-
|
299
|
-
|
300
|
-
else
|
301
|
-
elements[3].clauses
|
302
|
-
end
|
303
|
-
end
|
266
|
+
def to_xml(b, idprefix, i)
|
267
|
+
id = idprefix + num.gsub(/[()]/, '')
|
268
|
+
idprefix = id + "."
|
304
269
|
|
305
|
-
|
306
|
-
|
270
|
+
b.subsection(id: id) { |b|
|
271
|
+
b.num(num)
|
272
|
+
b.content { |b|
|
273
|
+
children.elements.each_with_index { |e, i| e.to_xml(b, idprefix, i) }
|
274
|
+
}
|
275
|
+
}
|
307
276
|
end
|
308
277
|
end
|
309
278
|
|
310
279
|
class NakedStatement < Treetop::Runtime::SyntaxNode
|
311
|
-
def to_xml(b, idprefix)
|
280
|
+
def to_xml(b, idprefix, i=0)
|
312
281
|
b.p { |b| clauses.to_xml(b, idprefix) } if clauses
|
313
282
|
end
|
314
283
|
|
@@ -371,7 +340,7 @@ module Slaw
|
|
371
340
|
end
|
372
341
|
|
373
342
|
class Table < Treetop::Runtime::SyntaxNode
|
374
|
-
def to_xml(b, idprefix)
|
343
|
+
def to_xml(b, idprefix, i=0)
|
375
344
|
# parse the table using wikicloth
|
376
345
|
html = WikiCloth::Parser.new({data: self.text_value}).to_html
|
377
346
|
|
@@ -379,7 +348,7 @@ module Slaw
|
|
379
348
|
# an id to the table
|
380
349
|
html = Nokogiri::HTML(html)
|
381
350
|
table = html.css("table").first
|
382
|
-
table['id'] = "#{idprefix}
|
351
|
+
table['id'] = "#{idprefix}table#{i}"
|
383
352
|
|
384
353
|
# wrap td and th content in p tags
|
385
354
|
table.css("td, th").each do |cell|
|
@@ -470,11 +439,13 @@ module Slaw
|
|
470
439
|
}
|
471
440
|
|
472
441
|
b.mainBody { |b|
|
442
|
+
idprefix = "#{id}."
|
443
|
+
|
473
444
|
# there is no good AKN hierarchy container for schedules, so we
|
474
445
|
# just use article because we don't use it anywhere else.
|
475
446
|
b.article(id: id) { |b|
|
476
447
|
b.heading(heading) if heading
|
477
|
-
body.children.elements.
|
448
|
+
body.children.elements.each_with_index { |e| e.to_xml(b, idprefix, i) } if body.is_a? Body
|
478
449
|
}
|
479
450
|
}
|
480
451
|
}
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe Slaw::Parse::Builder do
|
|
7
7
|
let(:parser) { double("parser") }
|
8
8
|
subject { Slaw::Parse::Builder.new(parser: parser) }
|
9
9
|
|
10
|
-
describe '#
|
10
|
+
describe '#adjust_blocklists' do
|
11
11
|
it 'should nest simple blocks' do
|
12
12
|
doc = xml2doc(subsection(<<XML
|
13
13
|
<blockList id="section-10.1.lst0">
|
@@ -39,7 +39,7 @@ describe Slaw::Parse::Builder do
|
|
39
39
|
XML
|
40
40
|
))
|
41
41
|
|
42
|
-
subject.
|
42
|
+
subject.adjust_blocklists(doc)
|
43
43
|
doc.to_s.should == subsection(<<XML
|
44
44
|
<blockList id="section-10.1.lst0">
|
45
45
|
<item id="section-10.1.lst0.a">
|
@@ -100,7 +100,7 @@ XML
|
|
100
100
|
XML
|
101
101
|
))
|
102
102
|
|
103
|
-
subject.
|
103
|
+
subject.adjust_blocklists(doc)
|
104
104
|
doc.to_s.should == subsection(<<XML
|
105
105
|
<blockList id="section-10.1.lst0">
|
106
106
|
<item id="section-10.1.lst0.a">
|
@@ -147,7 +147,7 @@ XML
|
|
147
147
|
XML
|
148
148
|
))
|
149
149
|
|
150
|
-
subject.
|
150
|
+
subject.adjust_blocklists(doc)
|
151
151
|
doc.to_s.should == subsection(<<XML
|
152
152
|
<blockList id="section-10.1.lst0">
|
153
153
|
<item id="section-10.1.lst0.h">
|
@@ -196,7 +196,7 @@ XML
|
|
196
196
|
XML
|
197
197
|
))
|
198
198
|
|
199
|
-
subject.
|
199
|
+
subject.adjust_blocklists(doc)
|
200
200
|
doc.to_s.should == subsection(<<XML
|
201
201
|
<blockList id="section-10.1.lst0">
|
202
202
|
<item id="section-10.1.lst0.t">
|
@@ -262,7 +262,7 @@ XML
|
|
262
262
|
XML
|
263
263
|
))
|
264
264
|
|
265
|
-
subject.
|
265
|
+
subject.adjust_blocklists(doc)
|
266
266
|
doc.to_s.should == subsection(<<XML
|
267
267
|
<blockList id="section-28.3.list2">
|
268
268
|
<item id="section-28.3.list2.g">
|
@@ -344,7 +344,7 @@ XML
|
|
344
344
|
XML
|
345
345
|
))
|
346
346
|
|
347
|
-
subject.
|
347
|
+
subject.adjust_blocklists(doc)
|
348
348
|
doc.to_s.should == subsection(<<XML
|
349
349
|
<blockList id="section-28.3.list2">
|
350
350
|
<item id="section-28.3.list2.g">
|
@@ -394,6 +394,106 @@ XML
|
|
394
394
|
|
395
395
|
# -------------------------------------------------------------------------
|
396
396
|
|
397
|
+
it 'should treat (aa) after (z) as siblings' do
|
398
|
+
doc = xml2doc(subsection(<<XML
|
399
|
+
<blockList id="list0">
|
400
|
+
<item id="list0.y">
|
401
|
+
<num>(y)</num>
|
402
|
+
<p>foo</p>
|
403
|
+
</item>
|
404
|
+
<item id="list0.z">
|
405
|
+
<num>(z)</num>
|
406
|
+
<p>item-z</p>
|
407
|
+
</item>
|
408
|
+
<item id="list0.aa">
|
409
|
+
<num>(aa)</num>
|
410
|
+
<p>item-aa</p>
|
411
|
+
</item>
|
412
|
+
<item id="list0.bb">
|
413
|
+
<num>(bb)</num>
|
414
|
+
<p>item-bb</p>
|
415
|
+
</item>
|
416
|
+
</blockList>
|
417
|
+
XML
|
418
|
+
))
|
419
|
+
|
420
|
+
subject.adjust_blocklists(doc)
|
421
|
+
doc.to_s.should == subsection(<<XML
|
422
|
+
<blockList id="list0">
|
423
|
+
<item id="list0.y">
|
424
|
+
<num>(y)</num>
|
425
|
+
<p>foo</p>
|
426
|
+
</item>
|
427
|
+
<item id="list0.z">
|
428
|
+
<num>(z)</num>
|
429
|
+
<p>item-z</p>
|
430
|
+
</item>
|
431
|
+
<item id="list0.aa">
|
432
|
+
<num>(aa)</num>
|
433
|
+
<p>item-aa</p>
|
434
|
+
</item>
|
435
|
+
<item id="list0.bb">
|
436
|
+
<num>(bb)</num>
|
437
|
+
<p>item-bb</p>
|
438
|
+
</item>
|
439
|
+
</blockList>
|
440
|
+
XML
|
441
|
+
)
|
442
|
+
end
|
443
|
+
|
444
|
+
# -------------------------------------------------------------------------
|
445
|
+
|
446
|
+
it 'should treat (AA) after (z) a sublist' do
|
447
|
+
doc = xml2doc(subsection(<<XML
|
448
|
+
<blockList id="list0">
|
449
|
+
<item id="list0.y">
|
450
|
+
<num>(y)</num>
|
451
|
+
<p>foo</p>
|
452
|
+
</item>
|
453
|
+
<item id="list0.z">
|
454
|
+
<num>(z)</num>
|
455
|
+
<p>item-z</p>
|
456
|
+
</item>
|
457
|
+
<item id="list0.AA">
|
458
|
+
<num>(AA)</num>
|
459
|
+
<p>item-AA</p>
|
460
|
+
</item>
|
461
|
+
<item id="list0.BB">
|
462
|
+
<num>(BB)</num>
|
463
|
+
<p>item-BB</p>
|
464
|
+
</item>
|
465
|
+
</blockList>
|
466
|
+
XML
|
467
|
+
))
|
468
|
+
|
469
|
+
subject.adjust_blocklists(doc)
|
470
|
+
doc.to_s.should == subsection(<<XML
|
471
|
+
<blockList id="list0">
|
472
|
+
<item id="list0.y">
|
473
|
+
<num>(y)</num>
|
474
|
+
<p>foo</p>
|
475
|
+
</item>
|
476
|
+
<item id="list0.z">
|
477
|
+
<num>(z)</num>
|
478
|
+
<blockList id="list0.z.list0">
|
479
|
+
<listIntroduction>item-z</listIntroduction>
|
480
|
+
<item id="list0.z.list0.AA">
|
481
|
+
<num>(AA)</num>
|
482
|
+
<p>item-AA</p>
|
483
|
+
</item>
|
484
|
+
<item id="list0.z.list0.BB">
|
485
|
+
<num>(BB)</num>
|
486
|
+
<p>item-BB</p>
|
487
|
+
</item>
|
488
|
+
</blockList>
|
489
|
+
</item>
|
490
|
+
</blockList>
|
491
|
+
XML
|
492
|
+
)
|
493
|
+
end
|
494
|
+
|
495
|
+
# -------------------------------------------------------------------------
|
496
|
+
|
397
497
|
it 'should handle deeply nested lists' do
|
398
498
|
doc = xml2doc(subsection(<<XML
|
399
499
|
<blockList id="list0">
|
@@ -441,7 +541,7 @@ XML
|
|
441
541
|
XML
|
442
542
|
))
|
443
543
|
|
444
|
-
subject.
|
544
|
+
subject.adjust_blocklists(doc)
|
445
545
|
doc.to_s.should == subsection(<<XML
|
446
546
|
<blockList id="list0">
|
447
547
|
<item id="list0.a">
|
@@ -520,7 +620,7 @@ XML
|
|
520
620
|
XML
|
521
621
|
))
|
522
622
|
|
523
|
-
subject.
|
623
|
+
subject.adjust_blocklists(doc)
|
524
624
|
doc.to_s.should == subsection(<<XML
|
525
625
|
<blockList id="section-10.1.lst0">
|
526
626
|
<item id="section-10.1.lst0.h">
|
@@ -567,7 +667,7 @@ XML
|
|
567
667
|
XML
|
568
668
|
))
|
569
669
|
|
570
|
-
subject.
|
670
|
+
subject.adjust_blocklists(doc)
|
571
671
|
doc.to_s.should == subsection(<<XML
|
572
672
|
<blockList id="section-9.subsection-2.list2">
|
573
673
|
<item id="section-9.subsection-2.list2.9.2.1">
|
@@ -585,6 +685,31 @@ XML
|
|
585
685
|
</blockList>
|
586
686
|
</item>
|
587
687
|
</blockList>
|
688
|
+
XML
|
689
|
+
)
|
690
|
+
end
|
691
|
+
|
692
|
+
it 'should handle p tags just before' do
|
693
|
+
doc = xml2doc(subsection(<<XML
|
694
|
+
<p>intro</p>
|
695
|
+
<blockList id="section-10.1.lst0">
|
696
|
+
<item id="section-10.1.lst0.a">
|
697
|
+
<num>(a)</num>
|
698
|
+
<p>foo</p>
|
699
|
+
</item>
|
700
|
+
</blockList>
|
701
|
+
XML
|
702
|
+
))
|
703
|
+
|
704
|
+
subject.adjust_blocklists(doc)
|
705
|
+
doc.to_s.should == subsection(<<XML
|
706
|
+
<blockList id="section-10.1.lst0">
|
707
|
+
<listIntroduction>intro</listIntroduction>
|
708
|
+
<item id="section-10.1.lst0.a">
|
709
|
+
<num>(a)</num>
|
710
|
+
<p>foo</p>
|
711
|
+
</item>
|
712
|
+
</blockList>
|
588
713
|
XML
|
589
714
|
)
|
590
715
|
end
|