slaw 10.1.0 → 10.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -1
- data/README.md +21 -0
- data/lib/slaw/extract/extractor.rb +2 -9
- data/lib/slaw/grammars/counters.rb +18 -6
- data/lib/slaw/grammars/inlines.treetop +13 -1
- data/lib/slaw/grammars/inlines_nodes.rb +26 -1
- data/lib/slaw/grammars/za/act_text.xsl +153 -31
- data/lib/slaw/version.rb +1 -1
- data/slaw.gemspec +0 -1
- data/spec/counters_spec.rb +38 -0
- data/spec/fixtures/roundtrip-escapes.txt +20 -0
- data/spec/generator_spec.rb +60 -15
- data/spec/za/act_block_spec.rb +6 -2
- data/spec/za/act_inline_spec.rb +37 -0
- metadata +9 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba413f53b9d24192d6ce5a168eed83ca55a317dcb3768009cfdf3bf902a23327
|
4
|
+
data.tar.gz: f664b13ce90bb21b65c0fffb2f780dc5937429c31ac2a9d6270259e0669f2f61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f0929aa1fcce2c86f6340fec7731f5e865e0b58b793b4f7d64ba48ec2463299498b4b96fea3487a11ab57a2ae932273e27c593395cd2ca263347006fa36a88a
|
7
|
+
data.tar.gz: 72aceb516d091bd396deeb828873058a3cda366524c3e1819590ae77f021ab9a806baf0108f1239dceaaf0e7a64713e1c954ec385b5ed03563f1d702cd4b0984
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -86,6 +86,27 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.5.0 (20 April 2021)
|
90
|
+
|
91
|
+
* Handle escaping inlines when unparsing.
|
92
|
+
|
93
|
+
### 10.4.1 (14 April 2021)
|
94
|
+
|
95
|
+
* Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
|
96
|
+
|
97
|
+
### 10.4.0 (9 April 2021)
|
98
|
+
|
99
|
+
* Remove dependency on mimemagic. Guess file type based on filename instead.
|
100
|
+
|
101
|
+
### 10.3.1 (11 January 2021)
|
102
|
+
|
103
|
+
* Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
|
104
|
+
|
105
|
+
### 10.2.0 (4 September 2020)
|
106
|
+
|
107
|
+
* support inline superscript `^^text^^`
|
108
|
+
* support inline subscript `_^text^_`
|
109
|
+
|
89
110
|
### 10.1.0 (18 June 2020)
|
90
111
|
|
91
112
|
* hcontainer elements have name attributes, to be compliant with AKN 3.0
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'mimemagic'
|
2
|
-
|
3
1
|
module Slaw
|
4
2
|
module Extract
|
5
3
|
|
@@ -13,15 +11,10 @@ module Slaw
|
|
13
11
|
#
|
14
12
|
# @return [String] extracted text
|
15
13
|
def extract_from_file(filename)
|
16
|
-
|
17
|
-
|
18
|
-
case mimetype && mimetype.type
|
19
|
-
when 'text/html'
|
14
|
+
if filename.end_with? '.html' or filename.end_with? '.htm'
|
20
15
|
extract_from_html(filename)
|
21
|
-
when 'text/plain', nil
|
22
|
-
extract_from_text(filename)
|
23
16
|
else
|
24
|
-
|
17
|
+
extract_from_text(filename)
|
25
18
|
end
|
26
19
|
end
|
27
20
|
|
@@ -24,20 +24,32 @@ module Slaw
|
|
24
24
|
# Clean a <num> value for use in an eId
|
25
25
|
# See https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692306
|
26
26
|
#
|
27
|
-
# The number part of the identifiers of such elements corresponds to the
|
27
|
+
# "The number part of the identifiers of such elements corresponds to the
|
28
28
|
# stripping of all final punctuation, meaningless separations as well as
|
29
29
|
# redundant characters in the content of the <num> element. The
|
30
|
-
# representation is case-sensitive
|
30
|
+
# representation is case-sensitive."
|
31
|
+
#
|
32
|
+
# Our algorithm is:
|
33
|
+
# 1. strip all leading and trailing whitespace and punctuation (using the unicode punctuation blocks)
|
34
|
+
# 2. strip all whitespace
|
35
|
+
# 3. replace all remaining punctuation with a hyphen.
|
36
|
+
#
|
37
|
+
# The General Punctuation block is \u2000-\u206F, and the Supplemental Punctuation block is \u2E00-\u2E7F.
|
31
38
|
#
|
32
39
|
# (i) -> i
|
33
40
|
# 1.2. -> 1-2
|
41
|
+
# “2.3“ -> 2-3
|
34
42
|
# 3a bis -> 3abis
|
35
43
|
def self.clean(num)
|
44
|
+
# leading whitespace and punctuation
|
45
|
+
num = num.gsub(/^[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '')
|
46
|
+
# trailing whitespace and punctuation
|
47
|
+
num.gsub!(/[\s\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+$/, '')
|
48
|
+
# whitespace
|
49
|
+
num.gsub!(/\s/, '')
|
50
|
+
# remaining punctuation to a hyphen
|
51
|
+
num.gsub!(/[\u{2000}-\u{206f}\u{2e00}-\u{2e7f}!"#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]+/, '-')
|
36
52
|
num
|
37
|
-
.gsub(/[ ()\[\]]/, '')
|
38
|
-
.gsub(/\.+$/, '')
|
39
|
-
.gsub(/^\.+/, '')
|
40
|
-
.gsub(/\.+/, '-')
|
41
53
|
end
|
42
54
|
end
|
43
55
|
end
|
@@ -20,7 +20,7 @@ module Slaw
|
|
20
20
|
end
|
21
21
|
|
22
22
|
rule inline_item
|
23
|
-
remark / image / ref / bold / italics / [^\n]
|
23
|
+
remark / image / ref / bold / italics / superscript / subscript / '\\'? [^\n]
|
24
24
|
<InlineItem>
|
25
25
|
end
|
26
26
|
|
@@ -57,6 +57,18 @@ module Slaw
|
|
57
57
|
<Ref>
|
58
58
|
end
|
59
59
|
|
60
|
+
rule superscript
|
61
|
+
# ^^foo^^
|
62
|
+
'^^' content:(!'^^' inline_item)+ '^^'
|
63
|
+
<Superscript>
|
64
|
+
end
|
65
|
+
|
66
|
+
rule subscript
|
67
|
+
# _^foo^_
|
68
|
+
'_^' content:(!'^_' inline_item)+ '^_'
|
69
|
+
<Subscript>
|
70
|
+
end
|
71
|
+
|
60
72
|
end
|
61
73
|
end
|
62
74
|
end
|
@@ -37,7 +37,12 @@ module Slaw
|
|
37
37
|
|
38
38
|
class InlineItem < Treetop::Runtime::SyntaxNode
|
39
39
|
def to_xml(b, idprefix)
|
40
|
-
|
40
|
+
if text_value.start_with? '\\'
|
41
|
+
# handle escaped characters: \a -> a
|
42
|
+
b.text(text_value[1..])
|
43
|
+
else
|
44
|
+
b.text(text_value)
|
45
|
+
end
|
41
46
|
end
|
42
47
|
end
|
43
48
|
|
@@ -71,6 +76,26 @@ module Slaw
|
|
71
76
|
end
|
72
77
|
end
|
73
78
|
|
79
|
+
class Superscript < Treetop::Runtime::SyntaxNode
|
80
|
+
def to_xml(b, idprefix)
|
81
|
+
b.sup { |b|
|
82
|
+
for e in content.elements
|
83
|
+
e.inline_item.to_xml(b, idprefix)
|
84
|
+
end
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class Subscript < Treetop::Runtime::SyntaxNode
|
90
|
+
def to_xml(b, idprefix)
|
91
|
+
b.sub { |b|
|
92
|
+
for e in content.elements
|
93
|
+
e.inline_item.to_xml(b, idprefix)
|
94
|
+
end
|
95
|
+
}
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
74
99
|
end
|
75
100
|
end
|
76
101
|
end
|
@@ -9,31 +9,141 @@
|
|
9
9
|
<xsl:strip-space elements="*"/>
|
10
10
|
<xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
|
11
11
|
|
12
|
+
<!-- replaces "value" in "text" with "replacement" -->
|
13
|
+
<xsl:template name="string-replace-all">
|
14
|
+
<xsl:param name="text" />
|
15
|
+
<xsl:param name="value" />
|
16
|
+
<xsl:param name="replacement" />
|
17
|
+
|
18
|
+
<xsl:choose>
|
19
|
+
<xsl:when test="$text = '' or $value = '' or not($value)">
|
20
|
+
<xsl:value-of select="$text" />
|
21
|
+
</xsl:when>
|
22
|
+
<xsl:when test="contains($text, $value)">
|
23
|
+
<xsl:value-of select="substring-before($text, $value)"/>
|
24
|
+
<xsl:value-of select="$replacement" />
|
25
|
+
<xsl:call-template name="string-replace-all">
|
26
|
+
<xsl:with-param name="text" select="substring-after($text, $value)" />
|
27
|
+
<xsl:with-param name="value" select="$value" />
|
28
|
+
<xsl:with-param name="replacement" select="$replacement" />
|
29
|
+
</xsl:call-template>
|
30
|
+
</xsl:when>
|
31
|
+
<xsl:otherwise>
|
32
|
+
<xsl:value-of select="$text" />
|
33
|
+
</xsl:otherwise>
|
34
|
+
</xsl:choose>
|
35
|
+
</xsl:template>
|
36
|
+
|
37
|
+
<!-- Escape inline markers with a backslash -->
|
38
|
+
<xsl:template name="escape-inlines">
|
39
|
+
<xsl:param name="text" />
|
40
|
+
|
41
|
+
<!-- This works from the inside out, first escaping backslash chars themselves, then escaping
|
42
|
+
the different types of inline markers -->
|
43
|
+
<xsl:call-template name="string-replace-all">
|
44
|
+
<xsl:with-param name="text">
|
45
|
+
<xsl:call-template name="string-replace-all">
|
46
|
+
<xsl:with-param name="text">
|
47
|
+
<xsl:call-template name="string-replace-all">
|
48
|
+
<xsl:with-param name="text">
|
49
|
+
<xsl:call-template name="string-replace-all">
|
50
|
+
<xsl:with-param name="text">
|
51
|
+
<xsl:call-template name="string-replace-all">
|
52
|
+
<xsl:with-param name="text">
|
53
|
+
<xsl:call-template name="string-replace-all">
|
54
|
+
<xsl:with-param name="text">
|
55
|
+
<xsl:call-template name="string-replace-all">
|
56
|
+
<xsl:with-param name="text">
|
57
|
+
<xsl:call-template name="string-replace-all">
|
58
|
+
<xsl:with-param name="text">
|
59
|
+
<xsl:call-template name="string-replace-all">
|
60
|
+
<xsl:with-param name="text">
|
61
|
+
<xsl:call-template name="string-replace-all">
|
62
|
+
<xsl:with-param name="text" select="$text" />
|
63
|
+
<xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
|
64
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
|
65
|
+
</xsl:call-template>
|
66
|
+
</xsl:with-param>
|
67
|
+
<xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
|
68
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
|
69
|
+
</xsl:call-template>
|
70
|
+
</xsl:with-param>
|
71
|
+
<xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
|
72
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
|
73
|
+
</xsl:call-template>
|
74
|
+
</xsl:with-param>
|
75
|
+
<xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
|
76
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
|
77
|
+
</xsl:call-template>
|
78
|
+
</xsl:with-param>
|
79
|
+
<xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
|
80
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
|
81
|
+
</xsl:call-template>
|
82
|
+
</xsl:with-param>
|
83
|
+
<xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
|
84
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
|
85
|
+
</xsl:call-template>
|
86
|
+
</xsl:with-param>
|
87
|
+
<xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
|
88
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
|
89
|
+
</xsl:call-template>
|
90
|
+
</xsl:with-param>
|
91
|
+
<xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
|
92
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
|
93
|
+
</xsl:call-template>
|
94
|
+
</xsl:with-param>
|
95
|
+
<xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
|
96
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
|
97
|
+
</xsl:call-template>
|
98
|
+
</xsl:with-param>
|
99
|
+
<xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
|
100
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
|
101
|
+
</xsl:call-template>
|
102
|
+
</xsl:template>
|
103
|
+
|
12
104
|
<!-- adds a backslash to the start of the value param, if necessary -->
|
13
|
-
<xsl:template name="escape">
|
105
|
+
<xsl:template name="escape-prefixes">
|
14
106
|
<xsl:param name="value"/>
|
15
107
|
|
16
108
|
<xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
17
109
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
18
110
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
19
111
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
112
|
+
<xsl:variable name="slash">
|
113
|
+
<!-- p tags must escape initial content that looks like a block element marker -->
|
114
|
+
<xsl:if test="$prefix = 'BODY' or
|
115
|
+
$prefix = 'PREAMBLE' or
|
116
|
+
$prefix = 'PREFACE' or
|
117
|
+
starts-with($prefix, 'CHAPTER ') or
|
118
|
+
starts-with($prefix, 'PART ') or
|
119
|
+
starts-with($prefix, 'SUBPART ') or
|
120
|
+
starts-with($prefix, 'SCHEDULE ') or
|
121
|
+
starts-with($prefix, 'HEADING ') or
|
122
|
+
starts-with($prefix, 'SUBHEADING ') or
|
123
|
+
starts-with($prefix, 'LONGTITLE ') or
|
124
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
125
|
+
starts-with($prefix, '{|') or
|
126
|
+
starts-with($numprefix, '(')">
|
127
|
+
<xsl:value-of select="'\'" />
|
128
|
+
</xsl:if>
|
129
|
+
</xsl:variable>
|
130
|
+
|
131
|
+
<xsl:value-of select="concat($slash, $value)" />
|
132
|
+
</xsl:template>
|
133
|
+
|
134
|
+
<!-- adds a backslash to the start of the text param, if necessary -->
|
135
|
+
<xsl:template name="escape">
|
136
|
+
<xsl:param name="value"/>
|
137
|
+
|
138
|
+
<xsl:variable name="escaped">
|
139
|
+
<xsl:call-template name="escape-inlines">
|
140
|
+
<xsl:with-param name="text" select="$value" />
|
141
|
+
</xsl:call-template>
|
142
|
+
</xsl:variable>
|
143
|
+
|
144
|
+
<xsl:call-template name="escape-prefixes">
|
145
|
+
<xsl:with-param name="value" select="$escaped" />
|
146
|
+
</xsl:call-template>
|
37
147
|
</xsl:template>
|
38
148
|
|
39
149
|
<xsl:template match="a:act">
|
@@ -157,12 +267,19 @@
|
|
157
267
|
</xsl:template>
|
158
268
|
|
159
269
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
160
|
-
<xsl:template match="a:p[not(ancestor::a:table)]/text()[
|
270
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
|
161
271
|
<xsl:call-template name="escape">
|
162
272
|
<xsl:with-param name="value" select="." />
|
163
273
|
</xsl:call-template>
|
164
274
|
</xsl:template>
|
165
275
|
|
276
|
+
<!-- escape inlines in text nodes -->
|
277
|
+
<xsl:template match="text()">
|
278
|
+
<xsl:call-template name="escape-inlines">
|
279
|
+
<xsl:with-param name="text" select="." />
|
280
|
+
</xsl:call-template>
|
281
|
+
</xsl:template>
|
282
|
+
|
166
283
|
|
167
284
|
<!-- attachments/schedules -->
|
168
285
|
<xsl:template match="a:attachment">
|
@@ -192,31 +309,24 @@
|
|
192
309
|
<xsl:value-of select="." />
|
193
310
|
<xsl:text>" </xsl:text>
|
194
311
|
</xsl:for-each>
|
195
|
-
<xsl:text>
|
196
|
-
|-</xsl:text>
|
312
|
+
<xsl:text> |-</xsl:text>
|
197
313
|
|
198
314
|
<xsl:apply-templates />
|
199
|
-
<xsl:text>
|
200
|
-
|}
|
201
|
-
|
202
|
-
</xsl:text>
|
315
|
+
<xsl:text> |} </xsl:text>
|
203
316
|
</xsl:template>
|
204
317
|
|
205
318
|
<xsl:template match="a:tr">
|
206
319
|
<xsl:apply-templates />
|
207
|
-
<xsl:text>
|
208
|
-
|-</xsl:text>
|
320
|
+
<xsl:text> |-</xsl:text>
|
209
321
|
</xsl:template>
|
210
322
|
|
211
323
|
<xsl:template match="a:th|a:td">
|
212
324
|
<xsl:choose>
|
213
325
|
<xsl:when test="local-name(.) = 'th'">
|
214
|
-
<xsl:text>
|
215
|
-
! </xsl:text>
|
326
|
+
<xsl:text> ! </xsl:text>
|
216
327
|
</xsl:when>
|
217
328
|
<xsl:when test="local-name(.) = 'td'">
|
218
|
-
<xsl:text>
|
219
|
-
| </xsl:text>
|
329
|
+
<xsl:text> | </xsl:text>
|
220
330
|
</xsl:when>
|
221
331
|
</xsl:choose>
|
222
332
|
|
@@ -275,6 +385,18 @@
|
|
275
385
|
<xsl:text>**</xsl:text>
|
276
386
|
</xsl:template>
|
277
387
|
|
388
|
+
<xsl:template match="a:sup">
|
389
|
+
<xsl:text>^^</xsl:text>
|
390
|
+
<xsl:apply-templates />
|
391
|
+
<xsl:text>^^</xsl:text>
|
392
|
+
</xsl:template>
|
393
|
+
|
394
|
+
<xsl:template match="a:sub">
|
395
|
+
<xsl:text>_^</xsl:text>
|
396
|
+
<xsl:apply-templates />
|
397
|
+
<xsl:text>^_</xsl:text>
|
398
|
+
</xsl:template>
|
399
|
+
|
278
400
|
<xsl:template match="a:eol">
|
279
401
|
<xsl:text> </xsl:text>
|
280
402
|
</xsl:template>
|
data/lib/slaw/version.rb
CHANGED
data/slaw.gemspec
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
require 'slaw'
|
6
|
+
|
7
|
+
describe Slaw::Grammars::Counters do
|
8
|
+
describe '#clean' do
|
9
|
+
it 'should remove leading and trailing punctuation' do
|
10
|
+
described_class.clean("").should == ""
|
11
|
+
described_class.clean(" ").should == ""
|
12
|
+
described_class.clean("( )").should == ""
|
13
|
+
described_class.clean("(123.4-5)").should == "123-4-5"
|
14
|
+
described_class.clean("(312.32.7)").should == "312-32-7"
|
15
|
+
described_class.clean("(312_32_7)").should == "312-32-7"
|
16
|
+
described_class.clean("(6)").should == "6"
|
17
|
+
described_class.clean("[16]").should == "16"
|
18
|
+
described_class.clean("(i)").should == "i"
|
19
|
+
described_class.clean("[i]").should == "i"
|
20
|
+
described_class.clean("(2bis)").should == "2bis"
|
21
|
+
described_class.clean('"1.2.').should == "1-2"
|
22
|
+
described_class.clean("1.2.").should == "1-2"
|
23
|
+
described_class.clean("“2.3").should == "2-3"
|
24
|
+
described_class.clean("2,3").should == "2-3"
|
25
|
+
described_class.clean("2,3, 4,").should == "2-3-4"
|
26
|
+
described_class.clean("3a bis").should == "3abis"
|
27
|
+
described_class.clean("3é").should == "3é"
|
28
|
+
described_class.clean(" -3a--4,9").should == "3a-4-9"
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should handle non-arabic numerals' do
|
32
|
+
# hebrew aleph
|
33
|
+
described_class.clean("(א)").should == "א"
|
34
|
+
# chinese 3
|
35
|
+
described_class.clean("(三)").should == "三"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
BODY
|
2
|
+
|
3
|
+
1. Section that tests escapes
|
4
|
+
|
5
|
+
text \\ with a single slash
|
6
|
+
|
7
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
8
|
+
|
9
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
10
|
+
|
11
|
+
refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
|
12
|
+
|
13
|
+
nested ** stars \*\* in bold \*\***
|
14
|
+
|
15
|
+
nested // slashes \/\/ in italics \/\///
|
16
|
+
|
17
|
+
nested ** stars in // italics \*\* // and bold **
|
18
|
+
|
19
|
+
super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
|
20
|
+
|
data/spec/generator_spec.rb
CHANGED
@@ -78,45 +78,81 @@ XML
|
|
78
78
|
|
79
79
|
1. Section
|
80
80
|
|
81
|
-
|
81
|
+
\\Chapter 2 ignored
|
82
82
|
|
83
83
|
Chapters
|
84
84
|
|
85
|
-
|
85
|
+
\\Part 2 ignored
|
86
86
|
|
87
87
|
participation
|
88
88
|
|
89
|
-
|
89
|
+
\\Schedule 2 ignored
|
90
90
|
|
91
91
|
Schedules
|
92
92
|
|
93
|
-
|
93
|
+
\\HEADING x
|
94
94
|
|
95
|
-
|
95
|
+
\\SUBHEADING x
|
96
96
|
|
97
97
|
BODY not escaped
|
98
98
|
|
99
|
-
|
99
|
+
\\BODY
|
100
100
|
|
101
101
|
PREAMBLE not escaped
|
102
102
|
|
103
|
-
|
103
|
+
\\PREAMBLE
|
104
104
|
|
105
105
|
PREFACE not escaped
|
106
106
|
|
107
|
-
|
107
|
+
\\PREFACE
|
108
108
|
|
109
|
-
|
109
|
+
\\2. ignored
|
110
110
|
|
111
|
-
|
111
|
+
\\2.1 ignored
|
112
112
|
|
113
|
-
|
113
|
+
\\(2) ignored
|
114
114
|
|
115
|
-
|
115
|
+
\\(a) ignored
|
116
116
|
|
117
|
-
|
117
|
+
\\(2a) ignored
|
118
118
|
|
119
|
-
|
119
|
+
\\{| ignored
|
120
|
+
|
121
|
+
'
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should escape inlines when unparsing' do
|
125
|
+
doc = xml2doc(section(<<'XML'
|
126
|
+
<num>1.</num>
|
127
|
+
<heading>Section</heading>
|
128
|
+
<paragraph id="section-1.paragraph-0">
|
129
|
+
<content>
|
130
|
+
<p>text \ with a single slash</p>
|
131
|
+
<p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
|
132
|
+
<p>inlines that ** should // be [[ escaped ![ and ]]</p>
|
133
|
+
<p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
|
134
|
+
<p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
|
135
|
+
</content>
|
136
|
+
</paragraph>
|
137
|
+
XML
|
138
|
+
))
|
139
|
+
|
140
|
+
text = subject.text_from_act(doc)
|
141
|
+
# NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
|
142
|
+
# which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
|
143
|
+
text.should == 'BODY
|
144
|
+
|
145
|
+
1. Section
|
146
|
+
|
147
|
+
text \\\\ with a single slash
|
148
|
+
|
149
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
150
|
+
|
151
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
152
|
+
|
153
|
+
refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
|
154
|
+
|
155
|
+
super ^^with \^\^^^ and sub _^\_^ with \^_^_
|
120
156
|
|
121
157
|
'
|
122
158
|
end
|
@@ -148,7 +184,7 @@ XML
|
|
148
184
|
|
149
185
|
1. Section
|
150
186
|
|
151
|
-
|
187
|
+
\\(2) A special meeting [[ foo ]]:
|
152
188
|
|
153
189
|
(a) the chairperson so directs; or
|
154
190
|
|
@@ -269,4 +305,13 @@ Subject to approval in terms of this By-Law.
|
|
269
305
|
'
|
270
306
|
end
|
271
307
|
end
|
308
|
+
|
309
|
+
describe 'round trip' do
|
310
|
+
it 'should be idempotent for escapes' do
|
311
|
+
text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
|
312
|
+
act = subject.generate_from_text(text)
|
313
|
+
xml = act.to_xml(encoding: 'utf-8')
|
314
|
+
subject.text_from_act(act).should == text
|
315
|
+
end
|
316
|
+
end
|
272
317
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -117,16 +117,19 @@ EOS
|
|
117
117
|
it 'should handle escaped content' do
|
118
118
|
node = parse :body, <<EOS
|
119
119
|
\\1. ignored
|
120
|
+
foo \\\\bar
|
120
121
|
|
121
|
-
\\CROSSHEADING
|
122
|
+
\\CROSSHEADING cross\\heading
|
122
123
|
|
123
|
-
1.
|
124
|
+
1. Sec\\tion
|
124
125
|
\\Chapter 2 ignored
|
126
|
+
Some text with a \\\\real backslash
|
125
127
|
EOS
|
126
128
|
to_xml(node).should == '<body>
|
127
129
|
<hcontainer eId="hcontainer_1" name="hcontainer">
|
128
130
|
<content>
|
129
131
|
<p>1. ignored</p>
|
132
|
+
<p>foo \\bar</p>
|
130
133
|
<p>CROSSHEADING crossheading</p>
|
131
134
|
</content>
|
132
135
|
</hcontainer>
|
@@ -136,6 +139,7 @@ EOS
|
|
136
139
|
<hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
|
137
140
|
<content>
|
138
141
|
<p>Chapter 2 ignored</p>
|
142
|
+
<p>Some text with a \\real backslash</p>
|
139
143
|
</content>
|
140
144
|
</hcontainer>
|
141
145
|
</section>
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -325,6 +325,17 @@ EOS
|
|
325
325
|
<p>This statement has <remark status="editorial">[<ref href="/foo/bar">a link in</ref> a remark]</remark></p>
|
326
326
|
<p>This statement has <remark status="editorial">[a <ref href="/foo/bar">link in a remark</ref>]</remark></p>
|
327
327
|
</content>
|
328
|
+
</hcontainer>'
|
329
|
+
end
|
330
|
+
|
331
|
+
it 'should handle escapes in links' do
|
332
|
+
node = parse :generic_container, <<EOS
|
333
|
+
Visit the site [https:\\/\\/example.com](https://example.com) for more.
|
334
|
+
EOS
|
335
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
336
|
+
<content>
|
337
|
+
<p>Visit the site <ref href="https://example.com">https://example.com</ref> for more.</p>
|
338
|
+
</content>
|
328
339
|
</hcontainer>'
|
329
340
|
end
|
330
341
|
end
|
@@ -498,4 +509,30 @@ EOS
|
|
498
509
|
end
|
499
510
|
end
|
500
511
|
|
512
|
+
describe 'superscript' do
|
513
|
+
it 'should handle superscript' do
|
514
|
+
node = parse :generic_container, <<EOS
|
515
|
+
Hello ^^super **bold** ^^ foo
|
516
|
+
EOS
|
517
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
518
|
+
<content>
|
519
|
+
<p>Hello <sup>super <b>bold</b> </sup> foo</p>
|
520
|
+
</content>
|
521
|
+
</hcontainer>'
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
describe 'subscript' do
|
526
|
+
it 'should handle subscript' do
|
527
|
+
node = parse :generic_container, <<EOS
|
528
|
+
Hello _^sub **bold** ^_ foo
|
529
|
+
EOS
|
530
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
531
|
+
<content>
|
532
|
+
<p>Hello <sub>sub <b>bold</b> </sub> foo</p>
|
533
|
+
</content>
|
534
|
+
</hcontainer>'
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
501
538
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,6 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0.20'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: mimemagic
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0.2'
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0.2'
|
111
97
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
112
98
|
acts from plain text and PDF documents.
|
113
99
|
email:
|
@@ -155,8 +141,10 @@ files:
|
|
155
141
|
- lib/slaw/version.rb
|
156
142
|
- lib/slaw/xml_support.rb
|
157
143
|
- slaw.gemspec
|
144
|
+
- spec/counters_spec.rb
|
158
145
|
- spec/extract/extractor_spec.rb
|
159
146
|
- spec/fixtures/community-fire-safety.xml
|
147
|
+
- spec/fixtures/roundtrip-escapes.txt
|
160
148
|
- spec/generator_spec.rb
|
161
149
|
- spec/parse/blocklists_spec.rb
|
162
150
|
- spec/parse/builder_spec.rb
|
@@ -172,7 +160,7 @@ homepage: https://github.com/longhotsummer/slaw
|
|
172
160
|
licenses:
|
173
161
|
- MIT
|
174
162
|
metadata: {}
|
175
|
-
post_install_message:
|
163
|
+
post_install_message:
|
176
164
|
rdoc_options: []
|
177
165
|
require_paths:
|
178
166
|
- lib
|
@@ -188,12 +176,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
188
176
|
version: '0'
|
189
177
|
requirements: []
|
190
178
|
rubygems_version: 3.0.3
|
191
|
-
signing_key:
|
179
|
+
signing_key:
|
192
180
|
specification_version: 4
|
193
181
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
194
182
|
test_files:
|
183
|
+
- spec/counters_spec.rb
|
195
184
|
- spec/extract/extractor_spec.rb
|
196
185
|
- spec/fixtures/community-fire-safety.xml
|
186
|
+
- spec/fixtures/roundtrip-escapes.txt
|
197
187
|
- spec/generator_spec.rb
|
198
188
|
- spec/parse/blocklists_spec.rb
|
199
189
|
- spec/parse/builder_spec.rb
|