slaw 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/README.md +5 -0
- data/lib/slaw/grammars/tables.treetop +33 -7
- data/lib/slaw/grammars/tables_nodes.rb +1 -1
- data/lib/slaw/grammars/za/act_nodes.rb +1 -1
- data/lib/slaw/grammars/za/act_text.xsl +3 -1
- data/lib/slaw/parse/blocklists.rb +7 -5
- data/lib/slaw/version.rb +1 -1
- data/spec/generator_spec.rb +1 -1
- data/spec/parse/builder_spec.rb +10 -10
- data/spec/spec_helper.rb +5 -0
- data/spec/za/act_block_spec.rb +13 -13
- data/spec/za/act_inline_spec.rb +2 -2
- data/spec/za/act_table_spec.rb +44 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71eaedcb879cb296c11690f4d885135caf60282724350fac3d8e500a61605c32
|
4
|
+
data.tar.gz: 26bc65de89b73bf06b5084c7f233c70c18ff940296e1721e5247d51e39d0b251
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83ac8241f21659e0f678ef7222015491ba18e86228d83687b1cbd8e40f57c93339b05f74884b2c727522fc7093a408497135db7dd72e27fd5b59c93146b78620
|
7
|
+
data.tar.gz: 59db7ddb266e6d029a8370d623b6482b1c67fb479a0a2b6fcda2cf7d3612a7314946a82165c8fe8a5b397e59b09c9704b68bbc02f9ed0cc6747ad3db4c363d0a
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--require spec_helper
|
data/README.md
CHANGED
@@ -81,6 +81,11 @@ You can create your own grammar by creating a gem that provides these files and
|
|
81
81
|
|
82
82
|
## Changelog
|
83
83
|
|
84
|
+
### 3.3.0 (1 May 2019)
|
85
|
+
|
86
|
+
* Only renest annotated blocklists
|
87
|
+
* Table grammar uses additional rules and permits whitespace
|
88
|
+
|
84
89
|
### 3.2.0 (22 April 2019)
|
85
90
|
|
86
91
|
* Permit inline content in chapter, part and section headings
|
@@ -14,38 +14,64 @@ module Slaw
|
|
14
14
|
# lines, which we do support.
|
15
15
|
|
16
16
|
rule table
|
17
|
-
|
17
|
+
table_start
|
18
18
|
table_body
|
19
|
-
|
19
|
+
table_end
|
20
20
|
<Table>
|
21
21
|
end
|
22
22
|
|
23
|
+
rule table_start
|
24
|
+
space? '{|' eol
|
25
|
+
end
|
26
|
+
|
27
|
+
rule table_end
|
28
|
+
space? '|}' eol
|
29
|
+
end
|
30
|
+
|
23
31
|
rule table_body
|
24
32
|
(table_row / table_cell)*
|
25
33
|
end
|
26
34
|
|
27
35
|
rule table_row
|
28
|
-
'|-' space? eol
|
36
|
+
space? '|-' space? eol
|
29
37
|
end
|
30
38
|
|
31
39
|
rule table_cell
|
32
40
|
# don't match end-of-table
|
33
|
-
!
|
34
|
-
|
35
|
-
#
|
36
|
-
|
41
|
+
!table_end
|
42
|
+
|
43
|
+
# td (|) or th (!) cell marker with attributes
|
44
|
+
table_cell_start attribs:table_attribs? space?
|
45
|
+
|
46
|
+
# cell's first content line, then multiple lines
|
47
|
+
content:(line:table_line (!table_cell_start space? line:table_line)*)
|
37
48
|
<TableCell>
|
38
49
|
end
|
39
50
|
|
51
|
+
# td (|) or th (!) cell marker
|
52
|
+
rule table_cell_start
|
53
|
+
space? [!|]
|
54
|
+
|
55
|
+
{
|
56
|
+
def th?
|
57
|
+
elements[1].text_value == '!'
|
58
|
+
end
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
40
62
|
rule table_line
|
41
63
|
inline_items:inline_items? eol
|
42
64
|
<TableLine>
|
43
65
|
end
|
44
66
|
|
67
|
+
# foo=one bar=two |
|
45
68
|
rule table_attribs
|
46
69
|
space? attribs:(table_attrib+) '|'
|
47
70
|
end
|
48
71
|
|
72
|
+
# foo=bar
|
73
|
+
# foo="bar"
|
74
|
+
# foo='bar'
|
49
75
|
rule table_attrib
|
50
76
|
name:([a-z_-]+) '=' value:(
|
51
77
|
('"' (!'"' .)* '"') /
|
@@ -10,7 +10,7 @@
|
|
10
10
|
<xsl:template name="escape">
|
11
11
|
<xsl:param name="value"/>
|
12
12
|
|
13
|
-
<xsl:variable name="prefix" select="translate(substring($value, 1,
|
13
|
+
<xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
14
14
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
15
15
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
16
16
|
|
@@ -21,6 +21,8 @@
|
|
21
21
|
starts-with($prefix, 'CHAPTER ') or
|
22
22
|
starts-with($prefix, 'PART ') or
|
23
23
|
starts-with($prefix, 'SCHEDULE ') or
|
24
|
+
starts-with($prefix, 'LONGTITLE ') or
|
25
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
24
26
|
starts-with($prefix, '{|') or
|
25
27
|
starts-with($numprefix, '(')">
|
26
28
|
<xsl:text>\</xsl:text>
|
@@ -3,10 +3,11 @@ module Slaw
|
|
3
3
|
module Blocklists
|
4
4
|
include Slaw::Namespace
|
5
5
|
|
6
|
-
# Correctly re-nest nested block lists
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
6
|
+
# Correctly re-nest nested block lists that are tagged with the "renest" attribute.
|
7
|
+
#
|
8
|
+
# We do this by identifying the numbering format of each item in the list
|
9
|
+
# and comparing it with the surrounding elements. When the numbering
|
10
|
+
# format changes, we start a new nested list.
|
10
11
|
#
|
11
12
|
# We make sure to handle special cases such as `(i)` coming between
|
12
13
|
# `(h)` and `(j)` versus being at the start of a `(i), (ii), (iii)`
|
@@ -34,7 +35,8 @@ module Slaw
|
|
34
35
|
#
|
35
36
|
# @param doc [Nokogiri::XML::Document] the document
|
36
37
|
def self.nest_blocklists(doc)
|
37
|
-
doc.xpath('//a:blockList', a: NS).each do |blocklist|
|
38
|
+
doc.xpath('//a:blockList[@renest]', a: NS).each do |blocklist|
|
39
|
+
blocklist.remove_attribute('renest')
|
38
40
|
items = blocklist.xpath('a:item', a: NS)
|
39
41
|
nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil) unless items.empty?
|
40
42
|
end
|
data/lib/slaw/version.rb
CHANGED
data/spec/generator_spec.rb
CHANGED
@@ -119,7 +119,7 @@ PREFACE not escaped
|
|
119
119
|
<heading>Section</heading>
|
120
120
|
<paragraph id="section-9.paragraph-0">
|
121
121
|
<content>
|
122
|
-
<blockList id="section-9.paragraph-0.list1">
|
122
|
+
<blockList id="section-9.paragraph-0.list1" renest="true">
|
123
123
|
<listIntroduction>(2) A special meeting <remark>[ foo ]</remark>:</listIntroduction>
|
124
124
|
<item id="section-9.paragraph-0.list1.a">
|
125
125
|
<num>(a)</num>
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -10,7 +10,7 @@ describe Slaw::Parse::Builder do
|
|
10
10
|
describe '#adjust_blocklists' do
|
11
11
|
it 'should nest simple blocks' do
|
12
12
|
doc = xml2doc(subsection(<<XML
|
13
|
-
<blockList id="section-10.1.lst0">
|
13
|
+
<blockList id="section-10.1.lst0" renest="true">
|
14
14
|
<item id="section-10.1.lst0.a">
|
15
15
|
<num>(a)</num>
|
16
16
|
<p>foo</p>
|
@@ -79,7 +79,7 @@ XML
|
|
79
79
|
|
80
80
|
it 'should jump back up a level' do
|
81
81
|
doc = xml2doc(subsection(<<XML
|
82
|
-
<blockList id="section-10.1.lst0">
|
82
|
+
<blockList id="section-10.1.lst0" renest="true">
|
83
83
|
<item id="section-10.1.lst0.a">
|
84
84
|
<num>(a)</num>
|
85
85
|
<p>foo</p>
|
@@ -130,7 +130,7 @@ XML
|
|
130
130
|
|
131
131
|
it 'should handle (i) correctly' do
|
132
132
|
doc = xml2doc(subsection(<<XML
|
133
|
-
<blockList id="section-10.1.lst0">
|
133
|
+
<blockList id="section-10.1.lst0" renest="true">
|
134
134
|
<item id="section-10.1.lst0.h">
|
135
135
|
<num>(h)</num>
|
136
136
|
<p>foo</p>
|
@@ -171,7 +171,7 @@ XML
|
|
171
171
|
|
172
172
|
it 'should handle (u) (v) and (x) correctly' do
|
173
173
|
doc = xml2doc(subsection(<<XML
|
174
|
-
<blockList id="section-10.1.lst0">
|
174
|
+
<blockList id="section-10.1.lst0" renest="true">
|
175
175
|
<item id="section-10.1.lst0.t">
|
176
176
|
<num>(t)</num>
|
177
177
|
<p>foo</p>
|
@@ -229,7 +229,7 @@ XML
|
|
229
229
|
|
230
230
|
it 'should handle (j) correctly' do
|
231
231
|
doc = xml2doc(subsection(<<XML
|
232
|
-
<blockList id="section-28.3.list2">
|
232
|
+
<blockList id="section-28.3.list2" renest="true">
|
233
233
|
<item id="section-28.3.list2.g">
|
234
234
|
<num>(g)</num>
|
235
235
|
<p>all <term refersTo="#term-memorial_work" id="trm381">memorial work</term> up to 150 mm in thickness must be securely attached to the base;</p>
|
@@ -303,7 +303,7 @@ XML
|
|
303
303
|
# -------------------------------------------------------------------------
|
304
304
|
it 'should handle (I) correctly' do
|
305
305
|
doc = xml2doc(subsection(<<XML
|
306
|
-
<blockList id="section-28.3.list2">
|
306
|
+
<blockList id="section-28.3.list2" renest="true">
|
307
307
|
<item id="section-28.3.list2.g">
|
308
308
|
<num>(g)</num>
|
309
309
|
<p>all memorial work up to 150 mm in thickness must be securely attached to the base;</p>
|
@@ -445,7 +445,7 @@ XML
|
|
445
445
|
|
446
446
|
it 'should treat (AA) after (z) a sublist' do
|
447
447
|
doc = xml2doc(subsection(<<XML
|
448
|
-
<blockList id="list0">
|
448
|
+
<blockList id="list0" renest="true">
|
449
449
|
<item id="list0.y">
|
450
450
|
<num>(y)</num>
|
451
451
|
<p>foo</p>
|
@@ -496,7 +496,7 @@ XML
|
|
496
496
|
|
497
497
|
it 'should handle deeply nested lists' do
|
498
498
|
doc = xml2doc(subsection(<<XML
|
499
|
-
<blockList id="list0">
|
499
|
+
<blockList id="list0" renest="true">
|
500
500
|
<item id="list0.a">
|
501
501
|
<num>(a)</num>
|
502
502
|
<p>foo</p>
|
@@ -599,7 +599,7 @@ XML
|
|
599
599
|
|
600
600
|
it 'should jump back up a level when finding (i) near (h)' do
|
601
601
|
doc = xml2doc(subsection(<<XML
|
602
|
-
<blockList id="section-10.1.lst0">
|
602
|
+
<blockList id="section-10.1.lst0" renest="true">
|
603
603
|
<item id="section-10.1.lst0.h">
|
604
604
|
<num>(h)</num>
|
605
605
|
<p>foo</p>
|
@@ -650,7 +650,7 @@ XML
|
|
650
650
|
|
651
651
|
it 'should handle dotted numbers correctly' do
|
652
652
|
doc = xml2doc(subsection(<<XML
|
653
|
-
<blockList id="section-9.subsection-2.list2">
|
653
|
+
<blockList id="section-9.subsection-2.list2" renest="true">
|
654
654
|
<item id="section-9.subsection-2.list2.9.2.1">
|
655
655
|
<num>9.2.1</num>
|
656
656
|
<p>is incapable of trading because of an illness, provided that:</p>
|
data/spec/spec_helper.rb
CHANGED
data/spec/za/act_block_spec.rb
CHANGED
@@ -85,7 +85,7 @@ EOS
|
|
85
85
|
<paragraph id="paragraph0">
|
86
86
|
<content>
|
87
87
|
<p>Some content before the section</p>
|
88
|
-
<blockList id="paragraph0.list1">
|
88
|
+
<blockList id="paragraph0.list1" renest="true">
|
89
89
|
<item id="paragraph0.list1.a">
|
90
90
|
<num>(a)</num>
|
91
91
|
<p>foo</p>
|
@@ -654,7 +654,7 @@ EOS
|
|
654
654
|
<num>(2)</num>
|
655
655
|
<content>
|
656
656
|
<p>title</p>
|
657
|
-
<blockList id="2.list1">
|
657
|
+
<blockList id="2.list1" renest="true">
|
658
658
|
<item id="2.list1.a">
|
659
659
|
<num>(a)</num>
|
660
660
|
<p>one</p>
|
@@ -687,7 +687,7 @@ EOS
|
|
687
687
|
to_xml(node, "", 1).should == '<subsection id="1">
|
688
688
|
<num>(1)</num>
|
689
689
|
<content>
|
690
|
-
<blockList id="1.list0">
|
690
|
+
<blockList id="1.list0" renest="true">
|
691
691
|
<item id="1.list0.a">
|
692
692
|
<num>(a)</num>
|
693
693
|
<p>one</p>
|
@@ -719,7 +719,7 @@ EOS
|
|
719
719
|
to_xml(node, "", 1).should == '<subsection id="1">
|
720
720
|
<num>(1)</num>
|
721
721
|
<content>
|
722
|
-
<blockList id="1.list0">
|
722
|
+
<blockList id="1.list0" renest="true">
|
723
723
|
<item id="1.list0.a">
|
724
724
|
<num>(a)</num>
|
725
725
|
<p>one</p>
|
@@ -778,7 +778,7 @@ EOS
|
|
778
778
|
<num>(1)</num>
|
779
779
|
<content>
|
780
780
|
<p>here\'s my really cool list,</p>
|
781
|
-
<blockList id="1.list1">
|
781
|
+
<blockList id="1.list1" renest="true">
|
782
782
|
<item id="1.list1.a">
|
783
783
|
<num>(a)</num>
|
784
784
|
<p>one</p>
|
@@ -812,7 +812,7 @@ EOS
|
|
812
812
|
<num>(1)</num>
|
813
813
|
<content>
|
814
814
|
<p>here\'s my really cool list,</p>
|
815
|
-
<blockList id="1.list1">
|
815
|
+
<blockList id="1.list1" renest="true">
|
816
816
|
<item id="1.list1.a">
|
817
817
|
<num>(a)</num>
|
818
818
|
<p/>
|
@@ -854,7 +854,7 @@ EOS
|
|
854
854
|
<num>9.9</num>
|
855
855
|
<content>
|
856
856
|
<p>foo</p>
|
857
|
-
<blockList id="9.9.list1">
|
857
|
+
<blockList id="9.9.list1" renest="true">
|
858
858
|
<item id="9.9.list1.9.9.1">
|
859
859
|
<num>9.9.1</num>
|
860
860
|
<p>item1</p>
|
@@ -888,7 +888,7 @@ EOS
|
|
888
888
|
<num>(1)</num>
|
889
889
|
<content>
|
890
890
|
<p>a list</p>
|
891
|
-
<blockList id="1.list1">
|
891
|
+
<blockList id="1.list1" renest="true">
|
892
892
|
<item id="1.list1.a">
|
893
893
|
<num>(a)</num>
|
894
894
|
<p>item 1</p>
|
@@ -899,7 +899,7 @@ EOS
|
|
899
899
|
</item>
|
900
900
|
</blockList>
|
901
901
|
<p>some text</p>
|
902
|
-
<blockList id="1.list3">
|
902
|
+
<blockList id="1.list3" renest="true">
|
903
903
|
<item id="1.list3.c">
|
904
904
|
<num>(c)</num>
|
905
905
|
<p>item 3</p>
|
@@ -1346,7 +1346,7 @@ EOS
|
|
1346
1346
|
<heading>Section</heading>
|
1347
1347
|
<paragraph id="section-1.paragraph0">
|
1348
1348
|
<content>
|
1349
|
-
<blockList id="section-1.paragraph0.list0">
|
1349
|
+
<blockList id="section-1.paragraph0.list0" renest="true">
|
1350
1350
|
<item id="section-1.paragraph0.list0.a">
|
1351
1351
|
<num>(a)</num>
|
1352
1352
|
<p>first</p>
|
@@ -1424,7 +1424,7 @@ EOS
|
|
1424
1424
|
<paragraph id="section-1.paragraph0">
|
1425
1425
|
<content>
|
1426
1426
|
<p>naked statement (c) blah</p>
|
1427
|
-
<blockList id="section-1.paragraph0.list1">
|
1427
|
+
<blockList id="section-1.paragraph0.list1" renest="true">
|
1428
1428
|
<item id="section-1.paragraph0.list1.a">
|
1429
1429
|
<num>(a)</num>
|
1430
1430
|
<p>foo</p>
|
@@ -1466,7 +1466,7 @@ EOS
|
|
1466
1466
|
<num>(2)</num>
|
1467
1467
|
<content>
|
1468
1468
|
<p>Schedule 1</p>
|
1469
|
-
<blockList id="section-1.2.list1">
|
1469
|
+
<blockList id="section-1.2.list1" renest="true">
|
1470
1470
|
<item id="section-1.2.list1.a">
|
1471
1471
|
<num>(a)</num>
|
1472
1472
|
<p>Part 1</p>
|
@@ -1538,7 +1538,7 @@ EOS
|
|
1538
1538
|
<num>3.1</num>
|
1539
1539
|
<content>
|
1540
1540
|
<p>Informal trading may include, amongst others:-</p>
|
1541
|
-
<blockList id="section-3.3.1.list1">
|
1541
|
+
<blockList id="section-3.3.1.list1" renest="true">
|
1542
1542
|
<item id="section-3.3.1.list1.3.1.1">
|
1543
1543
|
<num>3.1.1</num>
|
1544
1544
|
<p>street trading;</p>
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -125,7 +125,7 @@ EOS
|
|
125
125
|
<num>(1)</num>
|
126
126
|
<content>
|
127
127
|
<p>something</p>
|
128
|
-
<blockList id="section-1.1.list1">
|
128
|
+
<blockList id="section-1.1.list1" renest="true">
|
129
129
|
<item id="section-1.1.list1.a">
|
130
130
|
<num>(a)</num>
|
131
131
|
<p>with a remark <remark status="editorial">[Section 1 amended by Act 23 of 2004]</remark></p>
|
@@ -285,7 +285,7 @@ EOS
|
|
285
285
|
EOS
|
286
286
|
to_xml(node, "").should == '<paragraph id="paragraph0">
|
287
287
|
<content>
|
288
|
-
<blockList id="paragraph0.list0">
|
288
|
+
<blockList id="paragraph0.list0" renest="true">
|
289
289
|
<item id="paragraph0.list0.2.18.1">
|
290
290
|
<num>2.18.1</num>
|
291
291
|
<p>a traffic officer appointed in terms of section 3 of the Road Traffic <ref href="/za/act/1989/29">Act, No. 29 of 1989</ref> or section 3A of the National Road Traffic <ref href="/za/act/1996/93">Act No. 93 of 1996</ref> as the case may be;</p>
|
data/spec/za/act_table_spec.rb
CHANGED
@@ -164,6 +164,50 @@ EOS
|
|
164
164
|
</table>'
|
165
165
|
end
|
166
166
|
|
167
|
+
it 'should allow whitespace at start of table rows' do
|
168
|
+
node = parse :table, <<EOS
|
169
|
+
{|
|
170
|
+
! foo
|
171
|
+
three
|
172
|
+
|-
|
173
|
+
| four
|
174
|
+
|}
|
175
|
+
EOS
|
176
|
+
|
177
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
178
|
+
<tr>
|
179
|
+
<th>
|
180
|
+
<p>foo<eol/>three</p>
|
181
|
+
</th>
|
182
|
+
</tr>
|
183
|
+
<tr>
|
184
|
+
<td>
|
185
|
+
<p>four</p>
|
186
|
+
</td>
|
187
|
+
</tr>
|
188
|
+
</table>'
|
189
|
+
end
|
190
|
+
|
191
|
+
it 'should tolerate lines that aren\'t really ending lines' do
|
192
|
+
node = parse :table, <<EOS
|
193
|
+
{|
|
194
|
+
| cell
|
195
|
+
|} another cell
|
196
|
+
|}
|
197
|
+
EOS
|
198
|
+
|
199
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
200
|
+
<tr>
|
201
|
+
<td>
|
202
|
+
<p>cell</p>
|
203
|
+
</td>
|
204
|
+
<td>
|
205
|
+
<p>} another cell</p>
|
206
|
+
</td>
|
207
|
+
</tr>
|
208
|
+
</table>'
|
209
|
+
end
|
210
|
+
|
167
211
|
it 'should parse a table in a section' do
|
168
212
|
node = parse :section, <<EOS
|
169
213
|
10. A section title
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -118,6 +118,7 @@ extensions: []
|
|
118
118
|
extra_rdoc_files: []
|
119
119
|
files:
|
120
120
|
- ".gitignore"
|
121
|
+
- ".rspec"
|
121
122
|
- ".travis.yml"
|
122
123
|
- Gemfile
|
123
124
|
- LICENSE.txt
|