slaw 10.4.1 → 10.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa5a1bf4a3098846a8f22ab9cff0c1a44f376407b9cc9c2f314ec69d9445c073
4
- data.tar.gz: 918bc6711f33db010f38c80606a52c7f9ff446778230dcbeafa27d34abc5eceb
3
+ metadata.gz: ba413f53b9d24192d6ce5a168eed83ca55a317dcb3768009cfdf3bf902a23327
4
+ data.tar.gz: f664b13ce90bb21b65c0fffb2f780dc5937429c31ac2a9d6270259e0669f2f61
5
5
  SHA512:
6
- metadata.gz: 1231195c46118f5098b4b31deca137981d03dacdcdb57175c30779cd4abc0339a95d480d3d08962e8d8dfae8d6c67880c1f219867583fe3df15b531391b19086
7
- data.tar.gz: 7bdbb9416a5c77eddab03eafef40b8aae830c2bcb71dc2cb4dc15f746be7404fd7122dbc242150e6ae90a9dca4f3a144f59013e3d36d5662de30f815aaf6ab00
6
+ metadata.gz: 0f0929aa1fcce2c86f6340fec7731f5e865e0b58b793b4f7d64ba48ec2463299498b4b96fea3487a11ab57a2ae932273e27c593395cd2ca263347006fa36a88a
7
+ data.tar.gz: 72aceb516d091bd396deeb828873058a3cda366524c3e1819590ae77f021ab9a806baf0108f1239dceaaf0e7a64713e1c954ec385b5ed03563f1d702cd4b0984
data/README.md CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.5.0 (20 April 2021)
90
+
91
+ * Handle escaping inlines when unparsing.
92
+
89
93
  ### 10.4.1 (14 April 2021)
90
94
 
91
95
  * Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
@@ -20,7 +20,7 @@ module Slaw
20
20
  end
21
21
 
22
22
  rule inline_item
23
- remark / image / ref / bold / italics / superscript / subscript / [^\n]
23
+ remark / image / ref / bold / italics / superscript / subscript / '\\'? [^\n]
24
24
  <InlineItem>
25
25
  end
26
26
 
@@ -37,8 +37,12 @@ module Slaw
37
37
 
38
38
  class InlineItem < Treetop::Runtime::SyntaxNode
39
39
  def to_xml(b, idprefix)
40
- # handle escaped characters foo\/bar -> foo/bar
41
- b.text(text_value.gsub(/\\(.)?/, '\1'))
40
+ if text_value.start_with? '\\'
41
+ # handle escaped characters: \a -> a
42
+ b.text(text_value[1..])
43
+ else
44
+ b.text(text_value)
45
+ end
42
46
  end
43
47
  end
44
48
 
@@ -9,31 +9,141 @@
9
9
  <xsl:strip-space elements="*"/>
10
10
  <xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
11
11
 
12
+ <!-- replaces "value" in "text" with "replacement" -->
13
+ <xsl:template name="string-replace-all">
14
+ <xsl:param name="text" />
15
+ <xsl:param name="value" />
16
+ <xsl:param name="replacement" />
17
+
18
+ <xsl:choose>
19
+ <xsl:when test="$text = '' or $value = '' or not($value)">
20
+ <xsl:value-of select="$text" />
21
+ </xsl:when>
22
+ <xsl:when test="contains($text, $value)">
23
+ <xsl:value-of select="substring-before($text, $value)"/>
24
+ <xsl:value-of select="$replacement" />
25
+ <xsl:call-template name="string-replace-all">
26
+ <xsl:with-param name="text" select="substring-after($text, $value)" />
27
+ <xsl:with-param name="value" select="$value" />
28
+ <xsl:with-param name="replacement" select="$replacement" />
29
+ </xsl:call-template>
30
+ </xsl:when>
31
+ <xsl:otherwise>
32
+ <xsl:value-of select="$text" />
33
+ </xsl:otherwise>
34
+ </xsl:choose>
35
+ </xsl:template>
36
+
37
+ <!-- Escape inline markers with a backslash -->
38
+ <xsl:template name="escape-inlines">
39
+ <xsl:param name="text" />
40
+
41
+ <!-- This works from the inside out, first escaping backslash chars themselves, then escaping
42
+ the different types of inline markers -->
43
+ <xsl:call-template name="string-replace-all">
44
+ <xsl:with-param name="text">
45
+ <xsl:call-template name="string-replace-all">
46
+ <xsl:with-param name="text">
47
+ <xsl:call-template name="string-replace-all">
48
+ <xsl:with-param name="text">
49
+ <xsl:call-template name="string-replace-all">
50
+ <xsl:with-param name="text">
51
+ <xsl:call-template name="string-replace-all">
52
+ <xsl:with-param name="text">
53
+ <xsl:call-template name="string-replace-all">
54
+ <xsl:with-param name="text">
55
+ <xsl:call-template name="string-replace-all">
56
+ <xsl:with-param name="text">
57
+ <xsl:call-template name="string-replace-all">
58
+ <xsl:with-param name="text">
59
+ <xsl:call-template name="string-replace-all">
60
+ <xsl:with-param name="text">
61
+ <xsl:call-template name="string-replace-all">
62
+ <xsl:with-param name="text" select="$text" />
63
+ <xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
64
+ <xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
65
+ </xsl:call-template>
66
+ </xsl:with-param>
67
+ <xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
68
+ <xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
69
+ </xsl:call-template>
70
+ </xsl:with-param>
71
+ <xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
72
+ <xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
73
+ </xsl:call-template>
74
+ </xsl:with-param>
75
+ <xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
76
+ <xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
77
+ </xsl:call-template>
78
+ </xsl:with-param>
79
+ <xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
80
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
81
+ </xsl:call-template>
82
+ </xsl:with-param>
83
+ <xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
84
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
85
+ </xsl:call-template>
86
+ </xsl:with-param>
87
+ <xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
88
+ <xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
89
+ </xsl:call-template>
90
+ </xsl:with-param>
91
+ <xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
92
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
93
+ </xsl:call-template>
94
+ </xsl:with-param>
95
+ <xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
96
+ <xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
97
+ </xsl:call-template>
98
+ </xsl:with-param>
99
+ <xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
100
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
101
+ </xsl:call-template>
102
+ </xsl:template>
103
+
12
104
  <!-- adds a backslash to the start of the value param, if necessary -->
13
- <xsl:template name="escape">
105
+ <xsl:template name="escape-prefixes">
14
106
  <xsl:param name="value"/>
15
107
 
16
108
  <xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
17
109
  <!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
18
110
  <xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
19
111
 
20
- <!-- p tags must escape initial content that looks like a block element marker -->
21
- <xsl:if test="$prefix = 'BODY' or
22
- $prefix = 'PREAMBLE' or
23
- $prefix = 'PREFACE' or
24
- starts-with($prefix, 'CHAPTER ') or
25
- starts-with($prefix, 'PART ') or
26
- starts-with($prefix, 'SUBPART ') or
27
- starts-with($prefix, 'SCHEDULE ') or
28
- starts-with($prefix, 'HEADING ') or
29
- starts-with($prefix, 'SUBHEADING ') or
30
- starts-with($prefix, 'LONGTITLE ') or
31
- starts-with($prefix, 'CROSSHEADING ') or
32
- starts-with($prefix, '{|') or
33
- starts-with($numprefix, '(')">
34
- <xsl:text>\</xsl:text>
35
- </xsl:if>
36
- <xsl:value-of select="$value"/>
112
+ <xsl:variable name="slash">
113
+ <!-- p tags must escape initial content that looks like a block element marker -->
114
+ <xsl:if test="$prefix = 'BODY' or
115
+ $prefix = 'PREAMBLE' or
116
+ $prefix = 'PREFACE' or
117
+ starts-with($prefix, 'CHAPTER ') or
118
+ starts-with($prefix, 'PART ') or
119
+ starts-with($prefix, 'SUBPART ') or
120
+ starts-with($prefix, 'SCHEDULE ') or
121
+ starts-with($prefix, 'HEADING ') or
122
+ starts-with($prefix, 'SUBHEADING ') or
123
+ starts-with($prefix, 'LONGTITLE ') or
124
+ starts-with($prefix, 'CROSSHEADING ') or
125
+ starts-with($prefix, '{|') or
126
+ starts-with($numprefix, '(')">
127
+ <xsl:value-of select="'\'" />
128
+ </xsl:if>
129
+ </xsl:variable>
130
+
131
+ <xsl:value-of select="concat($slash, $value)" />
132
+ </xsl:template>
133
+
134
+ <!-- adds a backslash to the start of the text param, if necessary -->
135
+ <xsl:template name="escape">
136
+ <xsl:param name="value"/>
137
+
138
+ <xsl:variable name="escaped">
139
+ <xsl:call-template name="escape-inlines">
140
+ <xsl:with-param name="text" select="$value" />
141
+ </xsl:call-template>
142
+ </xsl:variable>
143
+
144
+ <xsl:call-template name="escape-prefixes">
145
+ <xsl:with-param name="value" select="$escaped" />
146
+ </xsl:call-template>
37
147
  </xsl:template>
38
148
 
39
149
  <xsl:template match="a:act">
@@ -157,12 +267,19 @@
157
267
  </xsl:template>
158
268
 
159
269
  <!-- first text nodes of these elems must be escaped if they have special chars -->
160
- <xsl:template match="a:p[not(ancestor::a:table)]/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
270
+ <xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
161
271
  <xsl:call-template name="escape">
162
272
  <xsl:with-param name="value" select="." />
163
273
  </xsl:call-template>
164
274
  </xsl:template>
165
275
 
276
+ <!-- escape inlines in text nodes -->
277
+ <xsl:template match="text()">
278
+ <xsl:call-template name="escape-inlines">
279
+ <xsl:with-param name="text" select="." />
280
+ </xsl:call-template>
281
+ </xsl:template>
282
+
166
283
 
167
284
  <!-- attachments/schedules -->
168
285
  <xsl:template match="a:attachment">
@@ -192,31 +309,24 @@
192
309
  <xsl:value-of select="." />
193
310
  <xsl:text>" </xsl:text>
194
311
  </xsl:for-each>
195
- <xsl:text>
196
- |-</xsl:text>
312
+ <xsl:text>&#10;|-</xsl:text>
197
313
 
198
314
  <xsl:apply-templates />
199
- <xsl:text>
200
- |}
201
-
202
- </xsl:text>
315
+ <xsl:text>&#10;|}&#10;&#10;</xsl:text>
203
316
  </xsl:template>
204
317
 
205
318
  <xsl:template match="a:tr">
206
319
  <xsl:apply-templates />
207
- <xsl:text>
208
- |-</xsl:text>
320
+ <xsl:text>&#10;|-</xsl:text>
209
321
  </xsl:template>
210
322
 
211
323
  <xsl:template match="a:th|a:td">
212
324
  <xsl:choose>
213
325
  <xsl:when test="local-name(.) = 'th'">
214
- <xsl:text>
215
- ! </xsl:text>
326
+ <xsl:text>&#10;! </xsl:text>
216
327
  </xsl:when>
217
328
  <xsl:when test="local-name(.) = 'td'">
218
- <xsl:text>
219
- | </xsl:text>
329
+ <xsl:text>&#10;| </xsl:text>
220
330
  </xsl:when>
221
331
  </xsl:choose>
222
332
 
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "10.4.1"
2
+ VERSION = "10.5.0"
3
3
  end
@@ -0,0 +1,20 @@
1
+ BODY
2
+
3
+ 1. Section that tests escapes
4
+
5
+ text \\ with a single slash
6
+
7
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
8
+
9
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
10
+
11
+ refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
12
+
13
+ nested ** stars \*\* in bold \*\***
14
+
15
+ nested // slashes \/\/ in italics \/\///
16
+
17
+ nested ** stars in // italics \*\* // and bold **
18
+
19
+ super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
20
+
@@ -78,45 +78,81 @@ XML
78
78
 
79
79
  1. Section
80
80
 
81
- \Chapter 2 ignored
81
+ \\Chapter 2 ignored
82
82
 
83
83
  Chapters
84
84
 
85
- \Part 2 ignored
85
+ \\Part 2 ignored
86
86
 
87
87
  participation
88
88
 
89
- \Schedule 2 ignored
89
+ \\Schedule 2 ignored
90
90
 
91
91
  Schedules
92
92
 
93
- \HEADING x
93
+ \\HEADING x
94
94
 
95
- \SUBHEADING x
95
+ \\SUBHEADING x
96
96
 
97
97
  BODY not escaped
98
98
 
99
- \BODY
99
+ \\BODY
100
100
 
101
101
  PREAMBLE not escaped
102
102
 
103
- \PREAMBLE
103
+ \\PREAMBLE
104
104
 
105
105
  PREFACE not escaped
106
106
 
107
- \PREFACE
107
+ \\PREFACE
108
108
 
109
- \2. ignored
109
+ \\2. ignored
110
110
 
111
- \2.1 ignored
111
+ \\2.1 ignored
112
112
 
113
- \(2) ignored
113
+ \\(2) ignored
114
114
 
115
- \(a) ignored
115
+ \\(a) ignored
116
116
 
117
- \(2a) ignored
117
+ \\(2a) ignored
118
118
 
119
- \{| ignored
119
+ \\{| ignored
120
+
121
+ '
122
+ end
123
+
124
+ it 'should escape inlines when unparsing' do
125
+ doc = xml2doc(section(<<'XML'
126
+ <num>1.</num>
127
+ <heading>Section</heading>
128
+ <paragraph id="section-1.paragraph-0">
129
+ <content>
130
+ <p>text \ with a single slash</p>
131
+ <p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
132
+ <p>inlines that ** should // be [[ escaped ![ and ]]</p>
133
+ <p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
134
+ <p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
135
+ </content>
136
+ </paragraph>
137
+ XML
138
+ ))
139
+
140
+ text = subject.text_from_act(doc)
141
+ # NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
142
+ # which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
143
+ text.should == 'BODY
144
+
145
+ 1. Section
146
+
147
+ text \\\\ with a single slash
148
+
149
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
150
+
151
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
152
+
153
+ refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
154
+
155
+ super ^^with \^\^^^ and sub _^\_^ with \^_^_
120
156
 
121
157
  '
122
158
  end
@@ -148,7 +184,7 @@ XML
148
184
 
149
185
  1. Section
150
186
 
151
- \(2) A special meeting [[ foo ]]:
187
+ \\(2) A special meeting [[ foo ]]:
152
188
 
153
189
  (a) the chairperson so directs; or
154
190
 
@@ -269,4 +305,13 @@ Subject to approval in terms of this By-Law.
269
305
  '
270
306
  end
271
307
  end
308
+
309
+ describe 'round trip' do
310
+ it 'should be idempotent for escapes' do
311
+ text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
312
+ act = subject.generate_from_text(text)
313
+ xml = act.to_xml(encoding: 'utf-8')
314
+ subject.text_from_act(act).should == text
315
+ end
316
+ end
272
317
  end
@@ -117,16 +117,19 @@ EOS
117
117
  it 'should handle escaped content' do
118
118
  node = parse :body, <<EOS
119
119
  \\1. ignored
120
+ foo \\\\bar
120
121
 
121
122
  \\CROSSHEADING cross\\heading
122
123
 
123
124
  1. Sec\\tion
124
125
  \\Chapter 2 ignored
126
+ Some text with a \\\\real backslash
125
127
  EOS
126
128
  to_xml(node).should == '<body>
127
129
  <hcontainer eId="hcontainer_1" name="hcontainer">
128
130
  <content>
129
131
  <p>1. ignored</p>
132
+ <p>foo \\bar</p>
130
133
  <p>CROSSHEADING crossheading</p>
131
134
  </content>
132
135
  </hcontainer>
@@ -136,6 +139,7 @@ EOS
136
139
  <hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
137
140
  <content>
138
141
  <p>Chapter 2 ignored</p>
142
+ <p>Some text with a \\real backslash</p>
139
143
  </content>
140
144
  </hcontainer>
141
145
  </section>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.4.1
4
+ version: 10.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-14 00:00:00.000000000 Z
11
+ date: 2021-04-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -144,6 +144,7 @@ files:
144
144
  - spec/counters_spec.rb
145
145
  - spec/extract/extractor_spec.rb
146
146
  - spec/fixtures/community-fire-safety.xml
147
+ - spec/fixtures/roundtrip-escapes.txt
147
148
  - spec/generator_spec.rb
148
149
  - spec/parse/blocklists_spec.rb
149
150
  - spec/parse/builder_spec.rb
@@ -182,6 +183,7 @@ test_files:
182
183
  - spec/counters_spec.rb
183
184
  - spec/extract/extractor_spec.rb
184
185
  - spec/fixtures/community-fire-safety.xml
186
+ - spec/fixtures/roundtrip-escapes.txt
185
187
  - spec/generator_spec.rb
186
188
  - spec/parse/blocklists_spec.rb
187
189
  - spec/parse/builder_spec.rb