slaw 10.4.1 → 10.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa5a1bf4a3098846a8f22ab9cff0c1a44f376407b9cc9c2f314ec69d9445c073
4
- data.tar.gz: 918bc6711f33db010f38c80606a52c7f9ff446778230dcbeafa27d34abc5eceb
3
+ metadata.gz: ba413f53b9d24192d6ce5a168eed83ca55a317dcb3768009cfdf3bf902a23327
4
+ data.tar.gz: f664b13ce90bb21b65c0fffb2f780dc5937429c31ac2a9d6270259e0669f2f61
5
5
  SHA512:
6
- metadata.gz: 1231195c46118f5098b4b31deca137981d03dacdcdb57175c30779cd4abc0339a95d480d3d08962e8d8dfae8d6c67880c1f219867583fe3df15b531391b19086
7
- data.tar.gz: 7bdbb9416a5c77eddab03eafef40b8aae830c2bcb71dc2cb4dc15f746be7404fd7122dbc242150e6ae90a9dca4f3a144f59013e3d36d5662de30f815aaf6ab00
6
+ metadata.gz: 0f0929aa1fcce2c86f6340fec7731f5e865e0b58b793b4f7d64ba48ec2463299498b4b96fea3487a11ab57a2ae932273e27c593395cd2ca263347006fa36a88a
7
+ data.tar.gz: 72aceb516d091bd396deeb828873058a3cda366524c3e1819590ae77f021ab9a806baf0108f1239dceaaf0e7a64713e1c954ec385b5ed03563f1d702cd4b0984
data/README.md CHANGED
@@ -86,6 +86,10 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.5.0 (20 April 2021)
90
+
91
+ * Handle escaping inlines when unparsing.
92
+
89
93
  ### 10.4.1 (14 April 2021)
90
94
 
91
95
  * Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
@@ -20,7 +20,7 @@ module Slaw
20
20
  end
21
21
 
22
22
  rule inline_item
23
- remark / image / ref / bold / italics / superscript / subscript / [^\n]
23
+ remark / image / ref / bold / italics / superscript / subscript / '\\'? [^\n]
24
24
  <InlineItem>
25
25
  end
26
26
 
@@ -37,8 +37,12 @@ module Slaw
37
37
 
38
38
  class InlineItem < Treetop::Runtime::SyntaxNode
39
39
  def to_xml(b, idprefix)
40
- # handle escaped characters foo\/bar -> foo/bar
41
- b.text(text_value.gsub(/\\(.)?/, '\1'))
40
+ if text_value.start_with? '\\'
41
+ # handle escaped characters: \a -> a
42
+ b.text(text_value[1..])
43
+ else
44
+ b.text(text_value)
45
+ end
42
46
  end
43
47
  end
44
48
 
@@ -9,31 +9,141 @@
9
9
  <xsl:strip-space elements="*"/>
10
10
  <xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
11
11
 
12
+ <!-- replaces "value" in "text" with "replacement" -->
13
+ <xsl:template name="string-replace-all">
14
+ <xsl:param name="text" />
15
+ <xsl:param name="value" />
16
+ <xsl:param name="replacement" />
17
+
18
+ <xsl:choose>
19
+ <xsl:when test="$text = '' or $value = '' or not($value)">
20
+ <xsl:value-of select="$text" />
21
+ </xsl:when>
22
+ <xsl:when test="contains($text, $value)">
23
+ <xsl:value-of select="substring-before($text, $value)"/>
24
+ <xsl:value-of select="$replacement" />
25
+ <xsl:call-template name="string-replace-all">
26
+ <xsl:with-param name="text" select="substring-after($text, $value)" />
27
+ <xsl:with-param name="value" select="$value" />
28
+ <xsl:with-param name="replacement" select="$replacement" />
29
+ </xsl:call-template>
30
+ </xsl:when>
31
+ <xsl:otherwise>
32
+ <xsl:value-of select="$text" />
33
+ </xsl:otherwise>
34
+ </xsl:choose>
35
+ </xsl:template>
36
+
37
+ <!-- Escape inline markers with a backslash -->
38
+ <xsl:template name="escape-inlines">
39
+ <xsl:param name="text" />
40
+
41
+ <!-- This works from the inside out, first escaping backslash chars themselves, then escaping
42
+ the different types of inline markers -->
43
+ <xsl:call-template name="string-replace-all">
44
+ <xsl:with-param name="text">
45
+ <xsl:call-template name="string-replace-all">
46
+ <xsl:with-param name="text">
47
+ <xsl:call-template name="string-replace-all">
48
+ <xsl:with-param name="text">
49
+ <xsl:call-template name="string-replace-all">
50
+ <xsl:with-param name="text">
51
+ <xsl:call-template name="string-replace-all">
52
+ <xsl:with-param name="text">
53
+ <xsl:call-template name="string-replace-all">
54
+ <xsl:with-param name="text">
55
+ <xsl:call-template name="string-replace-all">
56
+ <xsl:with-param name="text">
57
+ <xsl:call-template name="string-replace-all">
58
+ <xsl:with-param name="text">
59
+ <xsl:call-template name="string-replace-all">
60
+ <xsl:with-param name="text">
61
+ <xsl:call-template name="string-replace-all">
62
+ <xsl:with-param name="text" select="$text" />
63
+ <xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
64
+ <xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
65
+ </xsl:call-template>
66
+ </xsl:with-param>
67
+ <xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
68
+ <xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
69
+ </xsl:call-template>
70
+ </xsl:with-param>
71
+ <xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
72
+ <xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
73
+ </xsl:call-template>
74
+ </xsl:with-param>
75
+ <xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
76
+ <xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
77
+ </xsl:call-template>
78
+ </xsl:with-param>
79
+ <xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
80
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
81
+ </xsl:call-template>
82
+ </xsl:with-param>
83
+ <xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
84
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
85
+ </xsl:call-template>
86
+ </xsl:with-param>
87
+ <xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
88
+ <xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
89
+ </xsl:call-template>
90
+ </xsl:with-param>
91
+ <xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
92
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
93
+ </xsl:call-template>
94
+ </xsl:with-param>
95
+ <xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
96
+ <xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
97
+ </xsl:call-template>
98
+ </xsl:with-param>
99
+ <xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
100
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
101
+ </xsl:call-template>
102
+ </xsl:template>
103
+
12
104
  <!-- adds a backslash to the start of the value param, if necessary -->
13
- <xsl:template name="escape">
105
+ <xsl:template name="escape-prefixes">
14
106
  <xsl:param name="value"/>
15
107
 
16
108
  <xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
17
109
  <!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
18
110
  <xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
19
111
 
20
- <!-- p tags must escape initial content that looks like a block element marker -->
21
- <xsl:if test="$prefix = 'BODY' or
22
- $prefix = 'PREAMBLE' or
23
- $prefix = 'PREFACE' or
24
- starts-with($prefix, 'CHAPTER ') or
25
- starts-with($prefix, 'PART ') or
26
- starts-with($prefix, 'SUBPART ') or
27
- starts-with($prefix, 'SCHEDULE ') or
28
- starts-with($prefix, 'HEADING ') or
29
- starts-with($prefix, 'SUBHEADING ') or
30
- starts-with($prefix, 'LONGTITLE ') or
31
- starts-with($prefix, 'CROSSHEADING ') or
32
- starts-with($prefix, '{|') or
33
- starts-with($numprefix, '(')">
34
- <xsl:text>\</xsl:text>
35
- </xsl:if>
36
- <xsl:value-of select="$value"/>
112
+ <xsl:variable name="slash">
113
+ <!-- p tags must escape initial content that looks like a block element marker -->
114
+ <xsl:if test="$prefix = 'BODY' or
115
+ $prefix = 'PREAMBLE' or
116
+ $prefix = 'PREFACE' or
117
+ starts-with($prefix, 'CHAPTER ') or
118
+ starts-with($prefix, 'PART ') or
119
+ starts-with($prefix, 'SUBPART ') or
120
+ starts-with($prefix, 'SCHEDULE ') or
121
+ starts-with($prefix, 'HEADING ') or
122
+ starts-with($prefix, 'SUBHEADING ') or
123
+ starts-with($prefix, 'LONGTITLE ') or
124
+ starts-with($prefix, 'CROSSHEADING ') or
125
+ starts-with($prefix, '{|') or
126
+ starts-with($numprefix, '(')">
127
+ <xsl:value-of select="'\'" />
128
+ </xsl:if>
129
+ </xsl:variable>
130
+
131
+ <xsl:value-of select="concat($slash, $value)" />
132
+ </xsl:template>
133
+
134
+ <!-- adds a backslash to the start of the text param, if necessary -->
135
+ <xsl:template name="escape">
136
+ <xsl:param name="value"/>
137
+
138
+ <xsl:variable name="escaped">
139
+ <xsl:call-template name="escape-inlines">
140
+ <xsl:with-param name="text" select="$value" />
141
+ </xsl:call-template>
142
+ </xsl:variable>
143
+
144
+ <xsl:call-template name="escape-prefixes">
145
+ <xsl:with-param name="value" select="$escaped" />
146
+ </xsl:call-template>
37
147
  </xsl:template>
38
148
 
39
149
  <xsl:template match="a:act">
@@ -157,12 +267,19 @@
157
267
  </xsl:template>
158
268
 
159
269
  <!-- first text nodes of these elems must be escaped if they have special chars -->
160
- <xsl:template match="a:p[not(ancestor::a:table)]/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
270
+ <xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
161
271
  <xsl:call-template name="escape">
162
272
  <xsl:with-param name="value" select="." />
163
273
  </xsl:call-template>
164
274
  </xsl:template>
165
275
 
276
+ <!-- escape inlines in text nodes -->
277
+ <xsl:template match="text()">
278
+ <xsl:call-template name="escape-inlines">
279
+ <xsl:with-param name="text" select="." />
280
+ </xsl:call-template>
281
+ </xsl:template>
282
+
166
283
 
167
284
  <!-- attachments/schedules -->
168
285
  <xsl:template match="a:attachment">
@@ -192,31 +309,24 @@
192
309
  <xsl:value-of select="." />
193
310
  <xsl:text>" </xsl:text>
194
311
  </xsl:for-each>
195
- <xsl:text>
196
- |-</xsl:text>
312
+ <xsl:text>&#10;|-</xsl:text>
197
313
 
198
314
  <xsl:apply-templates />
199
- <xsl:text>
200
- |}
201
-
202
- </xsl:text>
315
+ <xsl:text>&#10;|}&#10;&#10;</xsl:text>
203
316
  </xsl:template>
204
317
 
205
318
  <xsl:template match="a:tr">
206
319
  <xsl:apply-templates />
207
- <xsl:text>
208
- |-</xsl:text>
320
+ <xsl:text>&#10;|-</xsl:text>
209
321
  </xsl:template>
210
322
 
211
323
  <xsl:template match="a:th|a:td">
212
324
  <xsl:choose>
213
325
  <xsl:when test="local-name(.) = 'th'">
214
- <xsl:text>
215
- ! </xsl:text>
326
+ <xsl:text>&#10;! </xsl:text>
216
327
  </xsl:when>
217
328
  <xsl:when test="local-name(.) = 'td'">
218
- <xsl:text>
219
- | </xsl:text>
329
+ <xsl:text>&#10;| </xsl:text>
220
330
  </xsl:when>
221
331
  </xsl:choose>
222
332
 
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "10.4.1"
2
+ VERSION = "10.5.0"
3
3
  end
@@ -0,0 +1,20 @@
1
+ BODY
2
+
3
+ 1. Section that tests escapes
4
+
5
+ text \\ with a single slash
6
+
7
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
8
+
9
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
10
+
11
+ refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
12
+
13
+ nested ** stars \*\* in bold \*\***
14
+
15
+ nested // slashes \/\/ in italics \/\///
16
+
17
+ nested ** stars in // italics \*\* // and bold **
18
+
19
+ super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
20
+
@@ -78,45 +78,81 @@ XML
78
78
 
79
79
  1. Section
80
80
 
81
- \Chapter 2 ignored
81
+ \\Chapter 2 ignored
82
82
 
83
83
  Chapters
84
84
 
85
- \Part 2 ignored
85
+ \\Part 2 ignored
86
86
 
87
87
  participation
88
88
 
89
- \Schedule 2 ignored
89
+ \\Schedule 2 ignored
90
90
 
91
91
  Schedules
92
92
 
93
- \HEADING x
93
+ \\HEADING x
94
94
 
95
- \SUBHEADING x
95
+ \\SUBHEADING x
96
96
 
97
97
  BODY not escaped
98
98
 
99
- \BODY
99
+ \\BODY
100
100
 
101
101
  PREAMBLE not escaped
102
102
 
103
- \PREAMBLE
103
+ \\PREAMBLE
104
104
 
105
105
  PREFACE not escaped
106
106
 
107
- \PREFACE
107
+ \\PREFACE
108
108
 
109
- \2. ignored
109
+ \\2. ignored
110
110
 
111
- \2.1 ignored
111
+ \\2.1 ignored
112
112
 
113
- \(2) ignored
113
+ \\(2) ignored
114
114
 
115
- \(a) ignored
115
+ \\(a) ignored
116
116
 
117
- \(2a) ignored
117
+ \\(2a) ignored
118
118
 
119
- \{| ignored
119
+ \\{| ignored
120
+
121
+ '
122
+ end
123
+
124
+ it 'should escape inlines when unparsing' do
125
+ doc = xml2doc(section(<<'XML'
126
+ <num>1.</num>
127
+ <heading>Section</heading>
128
+ <paragraph id="section-1.paragraph-0">
129
+ <content>
130
+ <p>text \ with a single slash</p>
131
+ <p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
132
+ <p>inlines that ** should // be [[ escaped ![ and ]]</p>
133
+ <p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
134
+ <p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
135
+ </content>
136
+ </paragraph>
137
+ XML
138
+ ))
139
+
140
+ text = subject.text_from_act(doc)
141
+ # NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
142
+ # which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
143
+ text.should == 'BODY
144
+
145
+ 1. Section
146
+
147
+ text \\\\ with a single slash
148
+
149
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
150
+
151
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
152
+
153
+ refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
154
+
155
+ super ^^with \^\^^^ and sub _^\_^ with \^_^_
120
156
 
121
157
  '
122
158
  end
@@ -148,7 +184,7 @@ XML
148
184
 
149
185
  1. Section
150
186
 
151
- \(2) A special meeting [[ foo ]]:
187
+ \\(2) A special meeting [[ foo ]]:
152
188
 
153
189
  (a) the chairperson so directs; or
154
190
 
@@ -269,4 +305,13 @@ Subject to approval in terms of this By-Law.
269
305
  '
270
306
  end
271
307
  end
308
+
309
+ describe 'round trip' do
310
+ it 'should be idempotent for escapes' do
311
+ text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
312
+ act = subject.generate_from_text(text)
313
+ xml = act.to_xml(encoding: 'utf-8')
314
+ subject.text_from_act(act).should == text
315
+ end
316
+ end
272
317
  end
@@ -117,16 +117,19 @@ EOS
117
117
  it 'should handle escaped content' do
118
118
  node = parse :body, <<EOS
119
119
  \\1. ignored
120
+ foo \\\\bar
120
121
 
121
122
  \\CROSSHEADING cross\\heading
122
123
 
123
124
  1. Sec\\tion
124
125
  \\Chapter 2 ignored
126
+ Some text with a \\\\real backslash
125
127
  EOS
126
128
  to_xml(node).should == '<body>
127
129
  <hcontainer eId="hcontainer_1" name="hcontainer">
128
130
  <content>
129
131
  <p>1. ignored</p>
132
+ <p>foo \\bar</p>
130
133
  <p>CROSSHEADING crossheading</p>
131
134
  </content>
132
135
  </hcontainer>
@@ -136,6 +139,7 @@ EOS
136
139
  <hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
137
140
  <content>
138
141
  <p>Chapter 2 ignored</p>
142
+ <p>Some text with a \\real backslash</p>
139
143
  </content>
140
144
  </hcontainer>
141
145
  </section>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.4.1
4
+ version: 10.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-14 00:00:00.000000000 Z
11
+ date: 2021-04-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -144,6 +144,7 @@ files:
144
144
  - spec/counters_spec.rb
145
145
  - spec/extract/extractor_spec.rb
146
146
  - spec/fixtures/community-fire-safety.xml
147
+ - spec/fixtures/roundtrip-escapes.txt
147
148
  - spec/generator_spec.rb
148
149
  - spec/parse/blocklists_spec.rb
149
150
  - spec/parse/builder_spec.rb
@@ -182,6 +183,7 @@ test_files:
182
183
  - spec/counters_spec.rb
183
184
  - spec/extract/extractor_spec.rb
184
185
  - spec/fixtures/community-fire-safety.xml
186
+ - spec/fixtures/roundtrip-escapes.txt
185
187
  - spec/generator_spec.rb
186
188
  - spec/parse/blocklists_spec.rb
187
189
  - spec/parse/builder_spec.rb