slaw 10.4.1 → 10.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/slaw/grammars/inlines.treetop +1 -1
- data/lib/slaw/grammars/inlines_nodes.rb +6 -2
- data/lib/slaw/grammars/za/act_text.xsl +141 -31
- data/lib/slaw/version.rb +1 -1
- data/spec/fixtures/roundtrip-escapes.txt +20 -0
- data/spec/generator_spec.rb +60 -15
- data/spec/za/act_block_spec.rb +4 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba413f53b9d24192d6ce5a168eed83ca55a317dcb3768009cfdf3bf902a23327
|
4
|
+
data.tar.gz: f664b13ce90bb21b65c0fffb2f780dc5937429c31ac2a9d6270259e0669f2f61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f0929aa1fcce2c86f6340fec7731f5e865e0b58b793b4f7d64ba48ec2463299498b4b96fea3487a11ab57a2ae932273e27c593395cd2ca263347006fa36a88a
|
7
|
+
data.tar.gz: 72aceb516d091bd396deeb828873058a3cda366524c3e1819590ae77f021ab9a806baf0108f1239dceaaf0e7a64713e1c954ec385b5ed03563f1d702cd4b0984
|
data/README.md
CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.5.0 (20 April 2021)
|
90
|
+
|
91
|
+
* Handle escaping inlines when unparsing.
|
92
|
+
|
89
93
|
### 10.4.1 (14 April 2021)
|
90
94
|
|
91
95
|
* Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
|
@@ -37,8 +37,12 @@ module Slaw
|
|
37
37
|
|
38
38
|
class InlineItem < Treetop::Runtime::SyntaxNode
|
39
39
|
def to_xml(b, idprefix)
|
40
|
-
|
41
|
-
|
40
|
+
if text_value.start_with? '\\'
|
41
|
+
# handle escaped characters: \a -> a
|
42
|
+
b.text(text_value[1..])
|
43
|
+
else
|
44
|
+
b.text(text_value)
|
45
|
+
end
|
42
46
|
end
|
43
47
|
end
|
44
48
|
|
@@ -9,31 +9,141 @@
|
|
9
9
|
<xsl:strip-space elements="*"/>
|
10
10
|
<xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
|
11
11
|
|
12
|
+
<!-- replaces "value" in "text" with "replacement" -->
|
13
|
+
<xsl:template name="string-replace-all">
|
14
|
+
<xsl:param name="text" />
|
15
|
+
<xsl:param name="value" />
|
16
|
+
<xsl:param name="replacement" />
|
17
|
+
|
18
|
+
<xsl:choose>
|
19
|
+
<xsl:when test="$text = '' or $value = '' or not($value)">
|
20
|
+
<xsl:value-of select="$text" />
|
21
|
+
</xsl:when>
|
22
|
+
<xsl:when test="contains($text, $value)">
|
23
|
+
<xsl:value-of select="substring-before($text, $value)"/>
|
24
|
+
<xsl:value-of select="$replacement" />
|
25
|
+
<xsl:call-template name="string-replace-all">
|
26
|
+
<xsl:with-param name="text" select="substring-after($text, $value)" />
|
27
|
+
<xsl:with-param name="value" select="$value" />
|
28
|
+
<xsl:with-param name="replacement" select="$replacement" />
|
29
|
+
</xsl:call-template>
|
30
|
+
</xsl:when>
|
31
|
+
<xsl:otherwise>
|
32
|
+
<xsl:value-of select="$text" />
|
33
|
+
</xsl:otherwise>
|
34
|
+
</xsl:choose>
|
35
|
+
</xsl:template>
|
36
|
+
|
37
|
+
<!-- Escape inline markers with a backslash -->
|
38
|
+
<xsl:template name="escape-inlines">
|
39
|
+
<xsl:param name="text" />
|
40
|
+
|
41
|
+
<!-- This works from the inside out, first escaping backslash chars themselves, then escaping
|
42
|
+
the different types of inline markers -->
|
43
|
+
<xsl:call-template name="string-replace-all">
|
44
|
+
<xsl:with-param name="text">
|
45
|
+
<xsl:call-template name="string-replace-all">
|
46
|
+
<xsl:with-param name="text">
|
47
|
+
<xsl:call-template name="string-replace-all">
|
48
|
+
<xsl:with-param name="text">
|
49
|
+
<xsl:call-template name="string-replace-all">
|
50
|
+
<xsl:with-param name="text">
|
51
|
+
<xsl:call-template name="string-replace-all">
|
52
|
+
<xsl:with-param name="text">
|
53
|
+
<xsl:call-template name="string-replace-all">
|
54
|
+
<xsl:with-param name="text">
|
55
|
+
<xsl:call-template name="string-replace-all">
|
56
|
+
<xsl:with-param name="text">
|
57
|
+
<xsl:call-template name="string-replace-all">
|
58
|
+
<xsl:with-param name="text">
|
59
|
+
<xsl:call-template name="string-replace-all">
|
60
|
+
<xsl:with-param name="text">
|
61
|
+
<xsl:call-template name="string-replace-all">
|
62
|
+
<xsl:with-param name="text" select="$text" />
|
63
|
+
<xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
|
64
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
|
65
|
+
</xsl:call-template>
|
66
|
+
</xsl:with-param>
|
67
|
+
<xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
|
68
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
|
69
|
+
</xsl:call-template>
|
70
|
+
</xsl:with-param>
|
71
|
+
<xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
|
72
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
|
73
|
+
</xsl:call-template>
|
74
|
+
</xsl:with-param>
|
75
|
+
<xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
|
76
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
|
77
|
+
</xsl:call-template>
|
78
|
+
</xsl:with-param>
|
79
|
+
<xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
|
80
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
|
81
|
+
</xsl:call-template>
|
82
|
+
</xsl:with-param>
|
83
|
+
<xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
|
84
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
|
85
|
+
</xsl:call-template>
|
86
|
+
</xsl:with-param>
|
87
|
+
<xsl:with-param name="value"><xsl:value-of select="', 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
17
109
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
18
110
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
19
111
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
112
|
+
<xsl:variable name="slash">
|
113
|
+
<!-- p tags must escape initial content that looks like a block element marker -->
|
114
|
+
<xsl:if test="$prefix = 'BODY' or
|
115
|
+
$prefix = 'PREAMBLE' or
|
116
|
+
$prefix = 'PREFACE' or
|
117
|
+
starts-with($prefix, 'CHAPTER ') or
|
118
|
+
starts-with($prefix, 'PART ') or
|
119
|
+
starts-with($prefix, 'SUBPART ') or
|
120
|
+
starts-with($prefix, 'SCHEDULE ') or
|
121
|
+
starts-with($prefix, 'HEADING ') or
|
122
|
+
starts-with($prefix, 'SUBHEADING ') or
|
123
|
+
starts-with($prefix, 'LONGTITLE ') or
|
124
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
125
|
+
starts-with($prefix, '{|') or
|
126
|
+
starts-with($numprefix, '(')">
|
127
|
+
<xsl:value-of select="'\'" />
|
128
|
+
</xsl:if>
|
129
|
+
</xsl:variable>
|
130
|
+
|
131
|
+
<xsl:value-of select="concat($slash, $value)" />
|
132
|
+
</xsl:template>
|
133
|
+
|
134
|
+
<!-- adds a backslash to the start of the text param, if necessary -->
|
135
|
+
<xsl:template name="escape">
|
136
|
+
<xsl:param name="value"/>
|
137
|
+
|
138
|
+
<xsl:variable name="escaped">
|
139
|
+
<xsl:call-template name="escape-inlines">
|
140
|
+
<xsl:with-param name="text" select="$value" />
|
141
|
+
</xsl:call-template>
|
142
|
+
</xsl:variable>
|
143
|
+
|
144
|
+
<xsl:call-template name="escape-prefixes">
|
145
|
+
<xsl:with-param name="value" select="$escaped" />
|
146
|
+
</xsl:call-template>
|
37
147
|
</xsl:template>
|
38
148
|
|
39
149
|
<xsl:template match="a:act">
|
@@ -157,12 +267,19 @@
|
|
157
267
|
</xsl:template>
|
158
268
|
|
159
269
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
160
|
-
<xsl:template match="a:p[not(ancestor::a:table)]/text()[
|
270
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
|
161
271
|
<xsl:call-template name="escape">
|
162
272
|
<xsl:with-param name="value" select="." />
|
163
273
|
</xsl:call-template>
|
164
274
|
</xsl:template>
|
165
275
|
|
276
|
+
<!-- escape inlines in text nodes -->
|
277
|
+
<xsl:template match="text()">
|
278
|
+
<xsl:call-template name="escape-inlines">
|
279
|
+
<xsl:with-param name="text" select="." />
|
280
|
+
</xsl:call-template>
|
281
|
+
</xsl:template>
|
282
|
+
|
166
283
|
|
167
284
|
<!-- attachments/schedules -->
|
168
285
|
<xsl:template match="a:attachment">
|
@@ -192,31 +309,24 @@
|
|
192
309
|
<xsl:value-of select="." />
|
193
310
|
<xsl:text>" </xsl:text>
|
194
311
|
</xsl:for-each>
|
195
|
-
<xsl:text>
|
196
|
-
|-</xsl:text>
|
312
|
+
<xsl:text> |-</xsl:text>
|
197
313
|
|
198
314
|
<xsl:apply-templates />
|
199
|
-
<xsl:text>
|
200
|
-
|}
|
201
|
-
|
202
|
-
</xsl:text>
|
315
|
+
<xsl:text> |} </xsl:text>
|
203
316
|
</xsl:template>
|
204
317
|
|
205
318
|
<xsl:template match="a:tr">
|
206
319
|
<xsl:apply-templates />
|
207
|
-
<xsl:text>
|
208
|
-
|-</xsl:text>
|
320
|
+
<xsl:text> |-</xsl:text>
|
209
321
|
</xsl:template>
|
210
322
|
|
211
323
|
<xsl:template match="a:th|a:td">
|
212
324
|
<xsl:choose>
|
213
325
|
<xsl:when test="local-name(.) = 'th'">
|
214
|
-
<xsl:text>
|
215
|
-
! </xsl:text>
|
326
|
+
<xsl:text> ! </xsl:text>
|
216
327
|
</xsl:when>
|
217
328
|
<xsl:when test="local-name(.) = 'td'">
|
218
|
-
<xsl:text>
|
219
|
-
| </xsl:text>
|
329
|
+
<xsl:text> | </xsl:text>
|
220
330
|
</xsl:when>
|
221
331
|
</xsl:choose>
|
222
332
|
|
data/lib/slaw/version.rb
CHANGED
@@ -0,0 +1,20 @@
|
|
1
|
+
BODY
|
2
|
+
|
3
|
+
1. Section that tests escapes
|
4
|
+
|
5
|
+
text \\ with a single slash
|
6
|
+
|
7
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
8
|
+
|
9
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
10
|
+
|
11
|
+
refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
|
12
|
+
|
13
|
+
nested ** stars \*\* in bold \*\***
|
14
|
+
|
15
|
+
nested // slashes \/\/ in italics \/\///
|
16
|
+
|
17
|
+
nested ** stars in // italics \*\* // and bold **
|
18
|
+
|
19
|
+
super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
|
20
|
+
|
data/spec/generator_spec.rb
CHANGED
@@ -78,45 +78,81 @@ XML
|
|
78
78
|
|
79
79
|
1. Section
|
80
80
|
|
81
|
-
|
81
|
+
\\Chapter 2 ignored
|
82
82
|
|
83
83
|
Chapters
|
84
84
|
|
85
|
-
|
85
|
+
\\Part 2 ignored
|
86
86
|
|
87
87
|
participation
|
88
88
|
|
89
|
-
|
89
|
+
\\Schedule 2 ignored
|
90
90
|
|
91
91
|
Schedules
|
92
92
|
|
93
|
-
|
93
|
+
\\HEADING x
|
94
94
|
|
95
|
-
|
95
|
+
\\SUBHEADING x
|
96
96
|
|
97
97
|
BODY not escaped
|
98
98
|
|
99
|
-
|
99
|
+
\\BODY
|
100
100
|
|
101
101
|
PREAMBLE not escaped
|
102
102
|
|
103
|
-
|
103
|
+
\\PREAMBLE
|
104
104
|
|
105
105
|
PREFACE not escaped
|
106
106
|
|
107
|
-
|
107
|
+
\\PREFACE
|
108
108
|
|
109
|
-
|
109
|
+
\\2. ignored
|
110
110
|
|
111
|
-
|
111
|
+
\\2.1 ignored
|
112
112
|
|
113
|
-
|
113
|
+
\\(2) ignored
|
114
114
|
|
115
|
-
|
115
|
+
\\(a) ignored
|
116
116
|
|
117
|
-
|
117
|
+
\\(2a) ignored
|
118
118
|
|
119
|
-
|
119
|
+
\\{| ignored
|
120
|
+
|
121
|
+
'
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should escape inlines when unparsing' do
|
125
|
+
doc = xml2doc(section(<<'XML'
|
126
|
+
<num>1.</num>
|
127
|
+
<heading>Section</heading>
|
128
|
+
<paragraph id="section-1.paragraph-0">
|
129
|
+
<content>
|
130
|
+
<p>text \ with a single slash</p>
|
131
|
+
<p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
|
132
|
+
<p>inlines that ** should // be [[ escaped ![ and ]]</p>
|
133
|
+
<p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
|
134
|
+
<p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
|
135
|
+
</content>
|
136
|
+
</paragraph>
|
137
|
+
XML
|
138
|
+
))
|
139
|
+
|
140
|
+
text = subject.text_from_act(doc)
|
141
|
+
# NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
|
142
|
+
# which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
|
143
|
+
text.should == 'BODY
|
144
|
+
|
145
|
+
1. Section
|
146
|
+
|
147
|
+
text \\\\ with a single slash
|
148
|
+
|
149
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
150
|
+
|
151
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
152
|
+
|
153
|
+
refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
|
154
|
+
|
155
|
+
super ^^with \^\^^^ and sub _^\_^ with \^_^_
|
120
156
|
|
121
157
|
'
|
122
158
|
end
|
@@ -148,7 +184,7 @@ XML
|
|
148
184
|
|
149
185
|
1. Section
|
150
186
|
|
151
|
-
|
187
|
+
\\(2) A special meeting [[ foo ]]:
|
152
188
|
|
153
189
|
(a) the chairperson so directs; or
|
154
190
|
|
@@ -269,4 +305,13 @@ Subject to approval in terms of this By-Law.
|
|
269
305
|
'
|
270
306
|
end
|
271
307
|
end
|
308
|
+
|
309
|
+
describe 'round trip' do
|
310
|
+
it 'should be idempotent for escapes' do
|
311
|
+
text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
|
312
|
+
act = subject.generate_from_text(text)
|
313
|
+
xml = act.to_xml(encoding: 'utf-8')
|
314
|
+
subject.text_from_act(act).should == text
|
315
|
+
end
|
316
|
+
end
|
272
317
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -117,16 +117,19 @@ EOS
|
|
117
117
|
it 'should handle escaped content' do
|
118
118
|
node = parse :body, <<EOS
|
119
119
|
\\1. ignored
|
120
|
+
foo \\\\bar
|
120
121
|
|
121
122
|
\\CROSSHEADING cross\\heading
|
122
123
|
|
123
124
|
1. Sec\\tion
|
124
125
|
\\Chapter 2 ignored
|
126
|
+
Some text with a \\\\real backslash
|
125
127
|
EOS
|
126
128
|
to_xml(node).should == '<body>
|
127
129
|
<hcontainer eId="hcontainer_1" name="hcontainer">
|
128
130
|
<content>
|
129
131
|
<p>1. ignored</p>
|
132
|
+
<p>foo \\bar</p>
|
130
133
|
<p>CROSSHEADING crossheading</p>
|
131
134
|
</content>
|
132
135
|
</hcontainer>
|
@@ -136,6 +139,7 @@ EOS
|
|
136
139
|
<hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
|
137
140
|
<content>
|
138
141
|
<p>Chapter 2 ignored</p>
|
142
|
+
<p>Some text with a \\real backslash</p>
|
139
143
|
</content>
|
140
144
|
</hcontainer>
|
141
145
|
</section>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-04-
|
11
|
+
date: 2021-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -144,6 +144,7 @@ files:
|
|
144
144
|
- spec/counters_spec.rb
|
145
145
|
- spec/extract/extractor_spec.rb
|
146
146
|
- spec/fixtures/community-fire-safety.xml
|
147
|
+
- spec/fixtures/roundtrip-escapes.txt
|
147
148
|
- spec/generator_spec.rb
|
148
149
|
- spec/parse/blocklists_spec.rb
|
149
150
|
- spec/parse/builder_spec.rb
|
@@ -182,6 +183,7 @@ test_files:
|
|
182
183
|
- spec/counters_spec.rb
|
183
184
|
- spec/extract/extractor_spec.rb
|
184
185
|
- spec/fixtures/community-fire-safety.xml
|
186
|
+
- spec/fixtures/roundtrip-escapes.txt
|
185
187
|
- spec/generator_spec.rb
|
186
188
|
- spec/parse/blocklists_spec.rb
|
187
189
|
- spec/parse/builder_spec.rb
|