slaw 10.4.1 → 10.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/slaw/grammars/inlines.treetop +1 -1
- data/lib/slaw/grammars/inlines_nodes.rb +6 -2
- data/lib/slaw/grammars/za/act_text.xsl +141 -31
- data/lib/slaw/version.rb +1 -1
- data/spec/fixtures/roundtrip-escapes.txt +20 -0
- data/spec/generator_spec.rb +60 -15
- data/spec/za/act_block_spec.rb +4 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba413f53b9d24192d6ce5a168eed83ca55a317dcb3768009cfdf3bf902a23327
|
4
|
+
data.tar.gz: f664b13ce90bb21b65c0fffb2f780dc5937429c31ac2a9d6270259e0669f2f61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f0929aa1fcce2c86f6340fec7731f5e865e0b58b793b4f7d64ba48ec2463299498b4b96fea3487a11ab57a2ae932273e27c593395cd2ca263347006fa36a88a
|
7
|
+
data.tar.gz: 72aceb516d091bd396deeb828873058a3cda366524c3e1819590ae77f021ab9a806baf0108f1239dceaaf0e7a64713e1c954ec385b5ed03563f1d702cd4b0984
|
data/README.md
CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.5.0 (20 April 2021)
|
90
|
+
|
91
|
+
* Handle escaping inlines when unparsing.
|
92
|
+
|
89
93
|
### 10.4.1 (14 April 2021)
|
90
94
|
|
91
95
|
* Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
|
@@ -37,8 +37,12 @@ module Slaw
|
|
37
37
|
|
38
38
|
class InlineItem < Treetop::Runtime::SyntaxNode
|
39
39
|
def to_xml(b, idprefix)
|
40
|
-
|
41
|
-
|
40
|
+
if text_value.start_with? '\\'
|
41
|
+
# handle escaped characters: \a -> a
|
42
|
+
b.text(text_value[1..])
|
43
|
+
else
|
44
|
+
b.text(text_value)
|
45
|
+
end
|
42
46
|
end
|
43
47
|
end
|
44
48
|
|
@@ -9,31 +9,141 @@
|
|
9
9
|
<xsl:strip-space elements="*"/>
|
10
10
|
<xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
|
11
11
|
|
12
|
+
<!-- replaces "value" in "text" with "replacement" -->
|
13
|
+
<xsl:template name="string-replace-all">
|
14
|
+
<xsl:param name="text" />
|
15
|
+
<xsl:param name="value" />
|
16
|
+
<xsl:param name="replacement" />
|
17
|
+
|
18
|
+
<xsl:choose>
|
19
|
+
<xsl:when test="$text = '' or $value = '' or not($value)">
|
20
|
+
<xsl:value-of select="$text" />
|
21
|
+
</xsl:when>
|
22
|
+
<xsl:when test="contains($text, $value)">
|
23
|
+
<xsl:value-of select="substring-before($text, $value)"/>
|
24
|
+
<xsl:value-of select="$replacement" />
|
25
|
+
<xsl:call-template name="string-replace-all">
|
26
|
+
<xsl:with-param name="text" select="substring-after($text, $value)" />
|
27
|
+
<xsl:with-param name="value" select="$value" />
|
28
|
+
<xsl:with-param name="replacement" select="$replacement" />
|
29
|
+
</xsl:call-template>
|
30
|
+
</xsl:when>
|
31
|
+
<xsl:otherwise>
|
32
|
+
<xsl:value-of select="$text" />
|
33
|
+
</xsl:otherwise>
|
34
|
+
</xsl:choose>
|
35
|
+
</xsl:template>
|
36
|
+
|
37
|
+
<!-- Escape inline markers with a backslash -->
|
38
|
+
<xsl:template name="escape-inlines">
|
39
|
+
<xsl:param name="text" />
|
40
|
+
|
41
|
+
<!-- This works from the inside out, first escaping backslash chars themselves, then escaping
|
42
|
+
the different types of inline markers -->
|
43
|
+
<xsl:call-template name="string-replace-all">
|
44
|
+
<xsl:with-param name="text">
|
45
|
+
<xsl:call-template name="string-replace-all">
|
46
|
+
<xsl:with-param name="text">
|
47
|
+
<xsl:call-template name="string-replace-all">
|
48
|
+
<xsl:with-param name="text">
|
49
|
+
<xsl:call-template name="string-replace-all">
|
50
|
+
<xsl:with-param name="text">
|
51
|
+
<xsl:call-template name="string-replace-all">
|
52
|
+
<xsl:with-param name="text">
|
53
|
+
<xsl:call-template name="string-replace-all">
|
54
|
+
<xsl:with-param name="text">
|
55
|
+
<xsl:call-template name="string-replace-all">
|
56
|
+
<xsl:with-param name="text">
|
57
|
+
<xsl:call-template name="string-replace-all">
|
58
|
+
<xsl:with-param name="text">
|
59
|
+
<xsl:call-template name="string-replace-all">
|
60
|
+
<xsl:with-param name="text">
|
61
|
+
<xsl:call-template name="string-replace-all">
|
62
|
+
<xsl:with-param name="text" select="$text" />
|
63
|
+
<xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
|
64
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
|
65
|
+
</xsl:call-template>
|
66
|
+
</xsl:with-param>
|
67
|
+
<xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
|
68
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
|
69
|
+
</xsl:call-template>
|
70
|
+
</xsl:with-param>
|
71
|
+
<xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
|
72
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
|
73
|
+
</xsl:call-template>
|
74
|
+
</xsl:with-param>
|
75
|
+
<xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
|
76
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
|
77
|
+
</xsl:call-template>
|
78
|
+
</xsl:with-param>
|
79
|
+
<xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
|
80
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
|
81
|
+
</xsl:call-template>
|
82
|
+
</xsl:with-param>
|
83
|
+
<xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
|
84
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
|
85
|
+
</xsl:call-template>
|
86
|
+
</xsl:with-param>
|
87
|
+
<xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
|
88
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
|
89
|
+
</xsl:call-template>
|
90
|
+
</xsl:with-param>
|
91
|
+
<xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
|
92
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
|
93
|
+
</xsl:call-template>
|
94
|
+
</xsl:with-param>
|
95
|
+
<xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
|
96
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
|
97
|
+
</xsl:call-template>
|
98
|
+
</xsl:with-param>
|
99
|
+
<xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
|
100
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
|
101
|
+
</xsl:call-template>
|
102
|
+
</xsl:template>
|
103
|
+
|
12
104
|
<!-- adds a backslash to the start of the value param, if necessary -->
|
13
|
-
<xsl:template name="escape">
|
105
|
+
<xsl:template name="escape-prefixes">
|
14
106
|
<xsl:param name="value"/>
|
15
107
|
|
16
108
|
<xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
17
109
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
18
110
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
19
111
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
112
|
+
<xsl:variable name="slash">
|
113
|
+
<!-- p tags must escape initial content that looks like a block element marker -->
|
114
|
+
<xsl:if test="$prefix = 'BODY' or
|
115
|
+
$prefix = 'PREAMBLE' or
|
116
|
+
$prefix = 'PREFACE' or
|
117
|
+
starts-with($prefix, 'CHAPTER ') or
|
118
|
+
starts-with($prefix, 'PART ') or
|
119
|
+
starts-with($prefix, 'SUBPART ') or
|
120
|
+
starts-with($prefix, 'SCHEDULE ') or
|
121
|
+
starts-with($prefix, 'HEADING ') or
|
122
|
+
starts-with($prefix, 'SUBHEADING ') or
|
123
|
+
starts-with($prefix, 'LONGTITLE ') or
|
124
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
125
|
+
starts-with($prefix, '{|') or
|
126
|
+
starts-with($numprefix, '(')">
|
127
|
+
<xsl:value-of select="'\'" />
|
128
|
+
</xsl:if>
|
129
|
+
</xsl:variable>
|
130
|
+
|
131
|
+
<xsl:value-of select="concat($slash, $value)" />
|
132
|
+
</xsl:template>
|
133
|
+
|
134
|
+
<!-- adds a backslash to the start of the text param, if necessary -->
|
135
|
+
<xsl:template name="escape">
|
136
|
+
<xsl:param name="value"/>
|
137
|
+
|
138
|
+
<xsl:variable name="escaped">
|
139
|
+
<xsl:call-template name="escape-inlines">
|
140
|
+
<xsl:with-param name="text" select="$value" />
|
141
|
+
</xsl:call-template>
|
142
|
+
</xsl:variable>
|
143
|
+
|
144
|
+
<xsl:call-template name="escape-prefixes">
|
145
|
+
<xsl:with-param name="value" select="$escaped" />
|
146
|
+
</xsl:call-template>
|
37
147
|
</xsl:template>
|
38
148
|
|
39
149
|
<xsl:template match="a:act">
|
@@ -157,12 +267,19 @@
|
|
157
267
|
</xsl:template>
|
158
268
|
|
159
269
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
160
|
-
<xsl:template match="a:p[not(ancestor::a:table)]/text()[
|
270
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
|
161
271
|
<xsl:call-template name="escape">
|
162
272
|
<xsl:with-param name="value" select="." />
|
163
273
|
</xsl:call-template>
|
164
274
|
</xsl:template>
|
165
275
|
|
276
|
+
<!-- escape inlines in text nodes -->
|
277
|
+
<xsl:template match="text()">
|
278
|
+
<xsl:call-template name="escape-inlines">
|
279
|
+
<xsl:with-param name="text" select="." />
|
280
|
+
</xsl:call-template>
|
281
|
+
</xsl:template>
|
282
|
+
|
166
283
|
|
167
284
|
<!-- attachments/schedules -->
|
168
285
|
<xsl:template match="a:attachment">
|
@@ -192,31 +309,24 @@
|
|
192
309
|
<xsl:value-of select="." />
|
193
310
|
<xsl:text>" </xsl:text>
|
194
311
|
</xsl:for-each>
|
195
|
-
<xsl:text>
|
196
|
-
|-</xsl:text>
|
312
|
+
<xsl:text> |-</xsl:text>
|
197
313
|
|
198
314
|
<xsl:apply-templates />
|
199
|
-
<xsl:text>
|
200
|
-
|}
|
201
|
-
|
202
|
-
</xsl:text>
|
315
|
+
<xsl:text> |} </xsl:text>
|
203
316
|
</xsl:template>
|
204
317
|
|
205
318
|
<xsl:template match="a:tr">
|
206
319
|
<xsl:apply-templates />
|
207
|
-
<xsl:text>
|
208
|
-
|-</xsl:text>
|
320
|
+
<xsl:text> |-</xsl:text>
|
209
321
|
</xsl:template>
|
210
322
|
|
211
323
|
<xsl:template match="a:th|a:td">
|
212
324
|
<xsl:choose>
|
213
325
|
<xsl:when test="local-name(.) = 'th'">
|
214
|
-
<xsl:text>
|
215
|
-
! </xsl:text>
|
326
|
+
<xsl:text> ! </xsl:text>
|
216
327
|
</xsl:when>
|
217
328
|
<xsl:when test="local-name(.) = 'td'">
|
218
|
-
<xsl:text>
|
219
|
-
| </xsl:text>
|
329
|
+
<xsl:text> | </xsl:text>
|
220
330
|
</xsl:when>
|
221
331
|
</xsl:choose>
|
222
332
|
|
data/lib/slaw/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
BODY
|
2
|
+
|
3
|
+
1. Section that tests escapes
|
4
|
+
|
5
|
+
text \\ with a single slash
|
6
|
+
|
7
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
8
|
+
|
9
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
10
|
+
|
11
|
+
refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
|
12
|
+
|
13
|
+
nested ** stars \*\* in bold \*\***
|
14
|
+
|
15
|
+
nested // slashes \/\/ in italics \/\///
|
16
|
+
|
17
|
+
nested ** stars in // italics \*\* // and bold **
|
18
|
+
|
19
|
+
super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
|
20
|
+
|
data/spec/generator_spec.rb
CHANGED
@@ -78,45 +78,81 @@ XML
|
|
78
78
|
|
79
79
|
1. Section
|
80
80
|
|
81
|
-
|
81
|
+
\\Chapter 2 ignored
|
82
82
|
|
83
83
|
Chapters
|
84
84
|
|
85
|
-
|
85
|
+
\\Part 2 ignored
|
86
86
|
|
87
87
|
participation
|
88
88
|
|
89
|
-
|
89
|
+
\\Schedule 2 ignored
|
90
90
|
|
91
91
|
Schedules
|
92
92
|
|
93
|
-
|
93
|
+
\\HEADING x
|
94
94
|
|
95
|
-
|
95
|
+
\\SUBHEADING x
|
96
96
|
|
97
97
|
BODY not escaped
|
98
98
|
|
99
|
-
|
99
|
+
\\BODY
|
100
100
|
|
101
101
|
PREAMBLE not escaped
|
102
102
|
|
103
|
-
|
103
|
+
\\PREAMBLE
|
104
104
|
|
105
105
|
PREFACE not escaped
|
106
106
|
|
107
|
-
|
107
|
+
\\PREFACE
|
108
108
|
|
109
|
-
|
109
|
+
\\2. ignored
|
110
110
|
|
111
|
-
|
111
|
+
\\2.1 ignored
|
112
112
|
|
113
|
-
|
113
|
+
\\(2) ignored
|
114
114
|
|
115
|
-
|
115
|
+
\\(a) ignored
|
116
116
|
|
117
|
-
|
117
|
+
\\(2a) ignored
|
118
118
|
|
119
|
-
|
119
|
+
\\{| ignored
|
120
|
+
|
121
|
+
'
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should escape inlines when unparsing' do
|
125
|
+
doc = xml2doc(section(<<'XML'
|
126
|
+
<num>1.</num>
|
127
|
+
<heading>Section</heading>
|
128
|
+
<paragraph id="section-1.paragraph-0">
|
129
|
+
<content>
|
130
|
+
<p>text \ with a single slash</p>
|
131
|
+
<p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
|
132
|
+
<p>inlines that ** should // be [[ escaped ![ and ]]</p>
|
133
|
+
<p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
|
134
|
+
<p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
|
135
|
+
</content>
|
136
|
+
</paragraph>
|
137
|
+
XML
|
138
|
+
))
|
139
|
+
|
140
|
+
text = subject.text_from_act(doc)
|
141
|
+
# NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
|
142
|
+
# which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
|
143
|
+
text.should == 'BODY
|
144
|
+
|
145
|
+
1. Section
|
146
|
+
|
147
|
+
text \\\\ with a single slash
|
148
|
+
|
149
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
150
|
+
|
151
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
152
|
+
|
153
|
+
refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
|
154
|
+
|
155
|
+
super ^^with \^\^^^ and sub _^\_^ with \^_^_
|
120
156
|
|
121
157
|
'
|
122
158
|
end
|
@@ -148,7 +184,7 @@ XML
|
|
148
184
|
|
149
185
|
1. Section
|
150
186
|
|
151
|
-
|
187
|
+
\\(2) A special meeting [[ foo ]]:
|
152
188
|
|
153
189
|
(a) the chairperson so directs; or
|
154
190
|
|
@@ -269,4 +305,13 @@ Subject to approval in terms of this By-Law.
|
|
269
305
|
'
|
270
306
|
end
|
271
307
|
end
|
308
|
+
|
309
|
+
describe 'round trip' do
|
310
|
+
it 'should be idempotent for escapes' do
|
311
|
+
text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
|
312
|
+
act = subject.generate_from_text(text)
|
313
|
+
xml = act.to_xml(encoding: 'utf-8')
|
314
|
+
subject.text_from_act(act).should == text
|
315
|
+
end
|
316
|
+
end
|
272
317
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -117,16 +117,19 @@ EOS
|
|
117
117
|
it 'should handle escaped content' do
|
118
118
|
node = parse :body, <<EOS
|
119
119
|
\\1. ignored
|
120
|
+
foo \\\\bar
|
120
121
|
|
121
122
|
\\CROSSHEADING cross\\heading
|
122
123
|
|
123
124
|
1. Sec\\tion
|
124
125
|
\\Chapter 2 ignored
|
126
|
+
Some text with a \\\\real backslash
|
125
127
|
EOS
|
126
128
|
to_xml(node).should == '<body>
|
127
129
|
<hcontainer eId="hcontainer_1" name="hcontainer">
|
128
130
|
<content>
|
129
131
|
<p>1. ignored</p>
|
132
|
+
<p>foo \\bar</p>
|
130
133
|
<p>CROSSHEADING crossheading</p>
|
131
134
|
</content>
|
132
135
|
</hcontainer>
|
@@ -136,6 +139,7 @@ EOS
|
|
136
139
|
<hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
|
137
140
|
<content>
|
138
141
|
<p>Chapter 2 ignored</p>
|
142
|
+
<p>Some text with a \\real backslash</p>
|
139
143
|
</content>
|
140
144
|
</hcontainer>
|
141
145
|
</section>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-04-
|
11
|
+
date: 2021-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -144,6 +144,7 @@ files:
|
|
144
144
|
- spec/counters_spec.rb
|
145
145
|
- spec/extract/extractor_spec.rb
|
146
146
|
- spec/fixtures/community-fire-safety.xml
|
147
|
+
- spec/fixtures/roundtrip-escapes.txt
|
147
148
|
- spec/generator_spec.rb
|
148
149
|
- spec/parse/blocklists_spec.rb
|
149
150
|
- spec/parse/builder_spec.rb
|
@@ -182,6 +183,7 @@ test_files:
|
|
182
183
|
- spec/counters_spec.rb
|
183
184
|
- spec/extract/extractor_spec.rb
|
184
185
|
- spec/fixtures/community-fire-safety.xml
|
186
|
+
- spec/fixtures/roundtrip-escapes.txt
|
185
187
|
- spec/generator_spec.rb
|
186
188
|
- spec/parse/blocklists_spec.rb
|
187
189
|
- spec/parse/builder_spec.rb
|