slaw 3.2.0 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/README.md +5 -0
- data/lib/slaw/grammars/tables.treetop +33 -7
- data/lib/slaw/grammars/tables_nodes.rb +1 -1
- data/lib/slaw/grammars/za/act_nodes.rb +1 -1
- data/lib/slaw/grammars/za/act_text.xsl +3 -1
- data/lib/slaw/parse/blocklists.rb +7 -5
- data/lib/slaw/version.rb +1 -1
- data/spec/generator_spec.rb +1 -1
- data/spec/parse/builder_spec.rb +10 -10
- data/spec/spec_helper.rb +5 -0
- data/spec/za/act_block_spec.rb +13 -13
- data/spec/za/act_inline_spec.rb +2 -2
- data/spec/za/act_table_spec.rb +44 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71eaedcb879cb296c11690f4d885135caf60282724350fac3d8e500a61605c32
|
4
|
+
data.tar.gz: 26bc65de89b73bf06b5084c7f233c70c18ff940296e1721e5247d51e39d0b251
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 83ac8241f21659e0f678ef7222015491ba18e86228d83687b1cbd8e40f57c93339b05f74884b2c727522fc7093a408497135db7dd72e27fd5b59c93146b78620
|
7
|
+
data.tar.gz: 59db7ddb266e6d029a8370d623b6482b1c67fb479a0a2b6fcda2cf7d3612a7314946a82165c8fe8a5b397e59b09c9704b68bbc02f9ed0cc6747ad3db4c363d0a
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--require spec_helper
|
data/README.md
CHANGED
@@ -81,6 +81,11 @@ You can create your own grammar by creating a gem that provides these files and
|
|
81
81
|
|
82
82
|
## Changelog
|
83
83
|
|
84
|
+
### 3.3.0 (1 May 2019)
|
85
|
+
|
86
|
+
* Only renest annotated blocklists
|
87
|
+
* Table grammar uses additional rules and permits whitespace
|
88
|
+
|
84
89
|
### 3.2.0 (22 April 2019)
|
85
90
|
|
86
91
|
* Permit inline content in chapter, part and section headings
|
@@ -14,38 +14,64 @@ module Slaw
|
|
14
14
|
# lines, which we do support.
|
15
15
|
|
16
16
|
rule table
|
17
|
-
|
17
|
+
table_start
|
18
18
|
table_body
|
19
|
-
|
19
|
+
table_end
|
20
20
|
<Table>
|
21
21
|
end
|
22
22
|
|
23
|
+
rule table_start
|
24
|
+
space? '{|' eol
|
25
|
+
end
|
26
|
+
|
27
|
+
rule table_end
|
28
|
+
space? '|}' eol
|
29
|
+
end
|
30
|
+
|
23
31
|
rule table_body
|
24
32
|
(table_row / table_cell)*
|
25
33
|
end
|
26
34
|
|
27
35
|
rule table_row
|
28
|
-
'|-' space? eol
|
36
|
+
space? '|-' space? eol
|
29
37
|
end
|
30
38
|
|
31
39
|
rule table_cell
|
32
40
|
# don't match end-of-table
|
33
|
-
!
|
34
|
-
|
35
|
-
#
|
36
|
-
|
41
|
+
!table_end
|
42
|
+
|
43
|
+
# td (|) or th (!) cell marker with attributes
|
44
|
+
table_cell_start attribs:table_attribs? space?
|
45
|
+
|
46
|
+
# cell's first content line, then multiple lines
|
47
|
+
content:(line:table_line (!table_cell_start space? line:table_line)*)
|
37
48
|
<TableCell>
|
38
49
|
end
|
39
50
|
|
51
|
+
# td (|) or th (!) cell marker
|
52
|
+
rule table_cell_start
|
53
|
+
space? [!|]
|
54
|
+
|
55
|
+
{
|
56
|
+
def th?
|
57
|
+
elements[1].text_value == '!'
|
58
|
+
end
|
59
|
+
}
|
60
|
+
end
|
61
|
+
|
40
62
|
rule table_line
|
41
63
|
inline_items:inline_items? eol
|
42
64
|
<TableLine>
|
43
65
|
end
|
44
66
|
|
67
|
+
# foo=one bar=two |
|
45
68
|
rule table_attribs
|
46
69
|
space? attribs:(table_attrib+) '|'
|
47
70
|
end
|
48
71
|
|
72
|
+
# foo=bar
|
73
|
+
# foo="bar"
|
74
|
+
# foo='bar'
|
49
75
|
rule table_attrib
|
50
76
|
name:([a-z_-]+) '=' value:(
|
51
77
|
('"' (!'"' .)* '"') /
|
@@ -10,7 +10,7 @@
|
|
10
10
|
<xsl:template name="escape">
|
11
11
|
<xsl:param name="value"/>
|
12
12
|
|
13
|
-
<xsl:variable name="prefix" select="translate(substring($value, 1,
|
13
|
+
<xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
14
14
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
15
15
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
16
16
|
|
@@ -21,6 +21,8 @@
|
|
21
21
|
starts-with($prefix, 'CHAPTER ') or
|
22
22
|
starts-with($prefix, 'PART ') or
|
23
23
|
starts-with($prefix, 'SCHEDULE ') or
|
24
|
+
starts-with($prefix, 'LONGTITLE ') or
|
25
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
24
26
|
starts-with($prefix, '{|') or
|
25
27
|
starts-with($numprefix, '(')">
|
26
28
|
<xsl:text>\</xsl:text>
|
@@ -3,10 +3,11 @@ module Slaw
|
|
3
3
|
module Blocklists
|
4
4
|
include Slaw::Namespace
|
5
5
|
|
6
|
-
# Correctly re-nest nested block lists
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
6
|
+
# Correctly re-nest nested block lists that are tagged with the "renest" attribute.
|
7
|
+
#
|
8
|
+
# We do this by identifying the numbering format of each item in the list
|
9
|
+
# and comparing it with the surrounding elements. When the numbering
|
10
|
+
# format changes, we start a new nested list.
|
10
11
|
#
|
11
12
|
# We make sure to handle special cases such as `(i)` coming between
|
12
13
|
# `(h)` and `(j)` versus being at the start of a `(i), (ii), (iii)`
|
@@ -34,7 +35,8 @@ module Slaw
|
|
34
35
|
#
|
35
36
|
# @param doc [Nokogiri::XML::Document] the document
|
36
37
|
def self.nest_blocklists(doc)
|
37
|
-
doc.xpath('//a:blockList', a: NS).each do |blocklist|
|
38
|
+
doc.xpath('//a:blockList[@renest]', a: NS).each do |blocklist|
|
39
|
+
blocklist.remove_attribute('renest')
|
38
40
|
items = blocklist.xpath('a:item', a: NS)
|
39
41
|
nest_blocklist_items(items.to_a, guess_number_format(items.first), nil, nil) unless items.empty?
|
40
42
|
end
|
data/lib/slaw/version.rb
CHANGED
data/spec/generator_spec.rb
CHANGED
@@ -119,7 +119,7 @@ PREFACE not escaped
|
|
119
119
|
<heading>Section</heading>
|
120
120
|
<paragraph id="section-9.paragraph-0">
|
121
121
|
<content>
|
122
|
-
<blockList id="section-9.paragraph-0.list1">
|
122
|
+
<blockList id="section-9.paragraph-0.list1" renest="true">
|
123
123
|
<listIntroduction>(2) A special meeting <remark>[ foo ]</remark>:</listIntroduction>
|
124
124
|
<item id="section-9.paragraph-0.list1.a">
|
125
125
|
<num>(a)</num>
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -10,7 +10,7 @@ describe Slaw::Parse::Builder do
|
|
10
10
|
describe '#adjust_blocklists' do
|
11
11
|
it 'should nest simple blocks' do
|
12
12
|
doc = xml2doc(subsection(<<XML
|
13
|
-
<blockList id="section-10.1.lst0">
|
13
|
+
<blockList id="section-10.1.lst0" renest="true">
|
14
14
|
<item id="section-10.1.lst0.a">
|
15
15
|
<num>(a)</num>
|
16
16
|
<p>foo</p>
|
@@ -79,7 +79,7 @@ XML
|
|
79
79
|
|
80
80
|
it 'should jump back up a level' do
|
81
81
|
doc = xml2doc(subsection(<<XML
|
82
|
-
<blockList id="section-10.1.lst0">
|
82
|
+
<blockList id="section-10.1.lst0" renest="true">
|
83
83
|
<item id="section-10.1.lst0.a">
|
84
84
|
<num>(a)</num>
|
85
85
|
<p>foo</p>
|
@@ -130,7 +130,7 @@ XML
|
|
130
130
|
|
131
131
|
it 'should handle (i) correctly' do
|
132
132
|
doc = xml2doc(subsection(<<XML
|
133
|
-
<blockList id="section-10.1.lst0">
|
133
|
+
<blockList id="section-10.1.lst0" renest="true">
|
134
134
|
<item id="section-10.1.lst0.h">
|
135
135
|
<num>(h)</num>
|
136
136
|
<p>foo</p>
|
@@ -171,7 +171,7 @@ XML
|
|
171
171
|
|
172
172
|
it 'should handle (u) (v) and (x) correctly' do
|
173
173
|
doc = xml2doc(subsection(<<XML
|
174
|
-
<blockList id="section-10.1.lst0">
|
174
|
+
<blockList id="section-10.1.lst0" renest="true">
|
175
175
|
<item id="section-10.1.lst0.t">
|
176
176
|
<num>(t)</num>
|
177
177
|
<p>foo</p>
|
@@ -229,7 +229,7 @@ XML
|
|
229
229
|
|
230
230
|
it 'should handle (j) correctly' do
|
231
231
|
doc = xml2doc(subsection(<<XML
|
232
|
-
<blockList id="section-28.3.list2">
|
232
|
+
<blockList id="section-28.3.list2" renest="true">
|
233
233
|
<item id="section-28.3.list2.g">
|
234
234
|
<num>(g)</num>
|
235
235
|
<p>all <term refersTo="#term-memorial_work" id="trm381">memorial work</term> up to 150 mm in thickness must be securely attached to the base;</p>
|
@@ -303,7 +303,7 @@ XML
|
|
303
303
|
# -------------------------------------------------------------------------
|
304
304
|
it 'should handle (I) correctly' do
|
305
305
|
doc = xml2doc(subsection(<<XML
|
306
|
-
<blockList id="section-28.3.list2">
|
306
|
+
<blockList id="section-28.3.list2" renest="true">
|
307
307
|
<item id="section-28.3.list2.g">
|
308
308
|
<num>(g)</num>
|
309
309
|
<p>all memorial work up to 150 mm in thickness must be securely attached to the base;</p>
|
@@ -445,7 +445,7 @@ XML
|
|
445
445
|
|
446
446
|
it 'should treat (AA) after (z) a sublist' do
|
447
447
|
doc = xml2doc(subsection(<<XML
|
448
|
-
<blockList id="list0">
|
448
|
+
<blockList id="list0" renest="true">
|
449
449
|
<item id="list0.y">
|
450
450
|
<num>(y)</num>
|
451
451
|
<p>foo</p>
|
@@ -496,7 +496,7 @@ XML
|
|
496
496
|
|
497
497
|
it 'should handle deeply nested lists' do
|
498
498
|
doc = xml2doc(subsection(<<XML
|
499
|
-
<blockList id="list0">
|
499
|
+
<blockList id="list0" renest="true">
|
500
500
|
<item id="list0.a">
|
501
501
|
<num>(a)</num>
|
502
502
|
<p>foo</p>
|
@@ -599,7 +599,7 @@ XML
|
|
599
599
|
|
600
600
|
it 'should jump back up a level when finding (i) near (h)' do
|
601
601
|
doc = xml2doc(subsection(<<XML
|
602
|
-
<blockList id="section-10.1.lst0">
|
602
|
+
<blockList id="section-10.1.lst0" renest="true">
|
603
603
|
<item id="section-10.1.lst0.h">
|
604
604
|
<num>(h)</num>
|
605
605
|
<p>foo</p>
|
@@ -650,7 +650,7 @@ XML
|
|
650
650
|
|
651
651
|
it 'should handle dotted numbers correctly' do
|
652
652
|
doc = xml2doc(subsection(<<XML
|
653
|
-
<blockList id="section-9.subsection-2.list2">
|
653
|
+
<blockList id="section-9.subsection-2.list2" renest="true">
|
654
654
|
<item id="section-9.subsection-2.list2.9.2.1">
|
655
655
|
<num>9.2.1</num>
|
656
656
|
<p>is incapable of trading because of an illness, provided that:</p>
|
data/spec/spec_helper.rb
CHANGED
data/spec/za/act_block_spec.rb
CHANGED
@@ -85,7 +85,7 @@ EOS
|
|
85
85
|
<paragraph id="paragraph0">
|
86
86
|
<content>
|
87
87
|
<p>Some content before the section</p>
|
88
|
-
<blockList id="paragraph0.list1">
|
88
|
+
<blockList id="paragraph0.list1" renest="true">
|
89
89
|
<item id="paragraph0.list1.a">
|
90
90
|
<num>(a)</num>
|
91
91
|
<p>foo</p>
|
@@ -654,7 +654,7 @@ EOS
|
|
654
654
|
<num>(2)</num>
|
655
655
|
<content>
|
656
656
|
<p>title</p>
|
657
|
-
<blockList id="2.list1">
|
657
|
+
<blockList id="2.list1" renest="true">
|
658
658
|
<item id="2.list1.a">
|
659
659
|
<num>(a)</num>
|
660
660
|
<p>one</p>
|
@@ -687,7 +687,7 @@ EOS
|
|
687
687
|
to_xml(node, "", 1).should == '<subsection id="1">
|
688
688
|
<num>(1)</num>
|
689
689
|
<content>
|
690
|
-
<blockList id="1.list0">
|
690
|
+
<blockList id="1.list0" renest="true">
|
691
691
|
<item id="1.list0.a">
|
692
692
|
<num>(a)</num>
|
693
693
|
<p>one</p>
|
@@ -719,7 +719,7 @@ EOS
|
|
719
719
|
to_xml(node, "", 1).should == '<subsection id="1">
|
720
720
|
<num>(1)</num>
|
721
721
|
<content>
|
722
|
-
<blockList id="1.list0">
|
722
|
+
<blockList id="1.list0" renest="true">
|
723
723
|
<item id="1.list0.a">
|
724
724
|
<num>(a)</num>
|
725
725
|
<p>one</p>
|
@@ -778,7 +778,7 @@ EOS
|
|
778
778
|
<num>(1)</num>
|
779
779
|
<content>
|
780
780
|
<p>here\'s my really cool list,</p>
|
781
|
-
<blockList id="1.list1">
|
781
|
+
<blockList id="1.list1" renest="true">
|
782
782
|
<item id="1.list1.a">
|
783
783
|
<num>(a)</num>
|
784
784
|
<p>one</p>
|
@@ -812,7 +812,7 @@ EOS
|
|
812
812
|
<num>(1)</num>
|
813
813
|
<content>
|
814
814
|
<p>here\'s my really cool list,</p>
|
815
|
-
<blockList id="1.list1">
|
815
|
+
<blockList id="1.list1" renest="true">
|
816
816
|
<item id="1.list1.a">
|
817
817
|
<num>(a)</num>
|
818
818
|
<p/>
|
@@ -854,7 +854,7 @@ EOS
|
|
854
854
|
<num>9.9</num>
|
855
855
|
<content>
|
856
856
|
<p>foo</p>
|
857
|
-
<blockList id="9.9.list1">
|
857
|
+
<blockList id="9.9.list1" renest="true">
|
858
858
|
<item id="9.9.list1.9.9.1">
|
859
859
|
<num>9.9.1</num>
|
860
860
|
<p>item1</p>
|
@@ -888,7 +888,7 @@ EOS
|
|
888
888
|
<num>(1)</num>
|
889
889
|
<content>
|
890
890
|
<p>a list</p>
|
891
|
-
<blockList id="1.list1">
|
891
|
+
<blockList id="1.list1" renest="true">
|
892
892
|
<item id="1.list1.a">
|
893
893
|
<num>(a)</num>
|
894
894
|
<p>item 1</p>
|
@@ -899,7 +899,7 @@ EOS
|
|
899
899
|
</item>
|
900
900
|
</blockList>
|
901
901
|
<p>some text</p>
|
902
|
-
<blockList id="1.list3">
|
902
|
+
<blockList id="1.list3" renest="true">
|
903
903
|
<item id="1.list3.c">
|
904
904
|
<num>(c)</num>
|
905
905
|
<p>item 3</p>
|
@@ -1346,7 +1346,7 @@ EOS
|
|
1346
1346
|
<heading>Section</heading>
|
1347
1347
|
<paragraph id="section-1.paragraph0">
|
1348
1348
|
<content>
|
1349
|
-
<blockList id="section-1.paragraph0.list0">
|
1349
|
+
<blockList id="section-1.paragraph0.list0" renest="true">
|
1350
1350
|
<item id="section-1.paragraph0.list0.a">
|
1351
1351
|
<num>(a)</num>
|
1352
1352
|
<p>first</p>
|
@@ -1424,7 +1424,7 @@ EOS
|
|
1424
1424
|
<paragraph id="section-1.paragraph0">
|
1425
1425
|
<content>
|
1426
1426
|
<p>naked statement (c) blah</p>
|
1427
|
-
<blockList id="section-1.paragraph0.list1">
|
1427
|
+
<blockList id="section-1.paragraph0.list1" renest="true">
|
1428
1428
|
<item id="section-1.paragraph0.list1.a">
|
1429
1429
|
<num>(a)</num>
|
1430
1430
|
<p>foo</p>
|
@@ -1466,7 +1466,7 @@ EOS
|
|
1466
1466
|
<num>(2)</num>
|
1467
1467
|
<content>
|
1468
1468
|
<p>Schedule 1</p>
|
1469
|
-
<blockList id="section-1.2.list1">
|
1469
|
+
<blockList id="section-1.2.list1" renest="true">
|
1470
1470
|
<item id="section-1.2.list1.a">
|
1471
1471
|
<num>(a)</num>
|
1472
1472
|
<p>Part 1</p>
|
@@ -1538,7 +1538,7 @@ EOS
|
|
1538
1538
|
<num>3.1</num>
|
1539
1539
|
<content>
|
1540
1540
|
<p>Informal trading may include, amongst others:-</p>
|
1541
|
-
<blockList id="section-3.3.1.list1">
|
1541
|
+
<blockList id="section-3.3.1.list1" renest="true">
|
1542
1542
|
<item id="section-3.3.1.list1.3.1.1">
|
1543
1543
|
<num>3.1.1</num>
|
1544
1544
|
<p>street trading;</p>
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -125,7 +125,7 @@ EOS
|
|
125
125
|
<num>(1)</num>
|
126
126
|
<content>
|
127
127
|
<p>something</p>
|
128
|
-
<blockList id="section-1.1.list1">
|
128
|
+
<blockList id="section-1.1.list1" renest="true">
|
129
129
|
<item id="section-1.1.list1.a">
|
130
130
|
<num>(a)</num>
|
131
131
|
<p>with a remark <remark status="editorial">[Section 1 amended by Act 23 of 2004]</remark></p>
|
@@ -285,7 +285,7 @@ EOS
|
|
285
285
|
EOS
|
286
286
|
to_xml(node, "").should == '<paragraph id="paragraph0">
|
287
287
|
<content>
|
288
|
-
<blockList id="paragraph0.list0">
|
288
|
+
<blockList id="paragraph0.list0" renest="true">
|
289
289
|
<item id="paragraph0.list0.2.18.1">
|
290
290
|
<num>2.18.1</num>
|
291
291
|
<p>a traffic officer appointed in terms of section 3 of the Road Traffic <ref href="/za/act/1989/29">Act, No. 29 of 1989</ref> or section 3A of the National Road Traffic <ref href="/za/act/1996/93">Act No. 93 of 1996</ref> as the case may be;</p>
|
data/spec/za/act_table_spec.rb
CHANGED
@@ -164,6 +164,50 @@ EOS
|
|
164
164
|
</table>'
|
165
165
|
end
|
166
166
|
|
167
|
+
it 'should allow whitespace at start of table rows' do
|
168
|
+
node = parse :table, <<EOS
|
169
|
+
{|
|
170
|
+
! foo
|
171
|
+
three
|
172
|
+
|-
|
173
|
+
| four
|
174
|
+
|}
|
175
|
+
EOS
|
176
|
+
|
177
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
178
|
+
<tr>
|
179
|
+
<th>
|
180
|
+
<p>foo<eol/>three</p>
|
181
|
+
</th>
|
182
|
+
</tr>
|
183
|
+
<tr>
|
184
|
+
<td>
|
185
|
+
<p>four</p>
|
186
|
+
</td>
|
187
|
+
</tr>
|
188
|
+
</table>'
|
189
|
+
end
|
190
|
+
|
191
|
+
it 'should tolerate lines that aren\'t really ending lines' do
|
192
|
+
node = parse :table, <<EOS
|
193
|
+
{|
|
194
|
+
| cell
|
195
|
+
|} another cell
|
196
|
+
|}
|
197
|
+
EOS
|
198
|
+
|
199
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
200
|
+
<tr>
|
201
|
+
<td>
|
202
|
+
<p>cell</p>
|
203
|
+
</td>
|
204
|
+
<td>
|
205
|
+
<p>} another cell</p>
|
206
|
+
</td>
|
207
|
+
</tr>
|
208
|
+
</table>'
|
209
|
+
end
|
210
|
+
|
167
211
|
it 'should parse a table in a section' do
|
168
212
|
node = parse :section, <<EOS
|
169
213
|
10. A section title
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-05-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -118,6 +118,7 @@ extensions: []
|
|
118
118
|
extra_rdoc_files: []
|
119
119
|
files:
|
120
120
|
- ".gitignore"
|
121
|
+
- ".rspec"
|
121
122
|
- ".travis.yml"
|
122
123
|
- Gemfile
|
123
124
|
- LICENSE.txt
|