slaw 10.3.1 → 10.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +22 -0
- data/README.md +20 -0
- data/lib/slaw/extract/extractor.rb +2 -9
- data/lib/slaw/extract/html_to_akn_text.xsl +29 -23
- data/lib/slaw/grammars/inlines.treetop +7 -1
- data/lib/slaw/grammars/inlines_nodes.rb +15 -1
- data/lib/slaw/grammars/za/act_text.xsl +154 -32
- data/lib/slaw/version.rb +1 -1
- data/slaw.gemspec +0 -1
- data/spec/fixtures/roundtrip-escapes.txt +24 -0
- data/spec/generator_spec.rb +81 -15
- data/spec/za/act_block_spec.rb +6 -2
- data/spec/za/act_inline_spec.rb +24 -0
- metadata +8 -20
- data/.travis.yml +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e4cdd070ad171d0d999cbaa93da4fb2723df0a2d430679833d3588903049575
|
4
|
+
data.tar.gz: 33bffb144c455bb8d35f7c8dc7e3593041eead5d1c76569575afa8df73bd0a8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b69cf6e2303be5096d3697ced6fe3bb1538d46a5657c718f8e1c6e8e4edee1b1c24ea4b7a6f199920499412b716cfa21e0fddb72be46928377d02145236831cf
|
7
|
+
data.tar.gz: 13744a12c6e8f62d90cd6fd4b553e3d45f4dd7b38ca076c6da2d2dcea7e72a31feb9345f687d7d29fa5a0d9a8874c4a506929f5c1a73838f2b551df8cc063806
|
@@ -0,0 +1,22 @@
|
|
1
|
+
name: Test
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
|
9
|
+
strategy:
|
10
|
+
matrix:
|
11
|
+
ruby-version: [2.7, 2.6]
|
12
|
+
|
13
|
+
steps:
|
14
|
+
- uses: actions/checkout@v2
|
15
|
+
- name: Set up Ruby ${{ matrix.ruby-version }}
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
ruby-version: ${{ matrix.ruby-version }}
|
19
|
+
- name: Install dependencies
|
20
|
+
run: bundle install
|
21
|
+
- name: Run tests
|
22
|
+
run: bundle exec rake
|
data/README.md
CHANGED
@@ -86,6 +86,26 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.7.0 (11 June 2021)
|
90
|
+
|
91
|
+
* Support underlines with `__text__`
|
92
|
+
|
93
|
+
### 10.6.0 (10 May 2021)
|
94
|
+
|
95
|
+
* Handle sup and sub when extracting from HTML.
|
96
|
+
|
97
|
+
### 10.5.0 (20 April 2021)
|
98
|
+
|
99
|
+
* Handle escaping inlines when unparsing.
|
100
|
+
|
101
|
+
### 10.4.1 (14 April 2021)
|
102
|
+
|
103
|
+
* Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
|
104
|
+
|
105
|
+
### 10.4.0 (9 April 2021)
|
106
|
+
|
107
|
+
* Remove dependency on mimemagic. Guess file type based on filename instead.
|
108
|
+
|
89
109
|
### 10.3.1 (11 January 2021)
|
90
110
|
|
91
111
|
* Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'mimemagic'
|
2
|
-
|
3
1
|
module Slaw
|
4
2
|
module Extract
|
5
3
|
|
@@ -13,15 +11,10 @@ module Slaw
|
|
13
11
|
#
|
14
12
|
# @return [String] extracted text
|
15
13
|
def extract_from_file(filename)
|
16
|
-
|
17
|
-
|
18
|
-
case mimetype && mimetype.type
|
19
|
-
when 'text/html'
|
14
|
+
if filename.end_with? '.html' or filename.end_with? '.htm'
|
20
15
|
extract_from_html(filename)
|
21
|
-
when 'text/plain', nil
|
22
|
-
extract_from_text(filename)
|
23
16
|
else
|
24
|
-
|
17
|
+
extract_from_text(filename)
|
25
18
|
end
|
26
19
|
end
|
27
20
|
|
@@ -11,9 +11,10 @@
|
|
11
11
|
|
12
12
|
<xsl:template match="head|style|script|link" />
|
13
13
|
|
14
|
-
|
14
|
+
<!-- block containers that end with newlines -->
|
15
|
+
<xsl:template match="ul|ol|section|article|h1|h2|h3|h4|h5">
|
15
16
|
<xsl:apply-templates />
|
16
|
-
<xsl:text> </xsl:text>
|
17
|
+
<xsl:text> </xsl:text>
|
17
18
|
</xsl:template>
|
18
19
|
|
19
20
|
<xsl:template match="ul/li">
|
@@ -23,20 +24,23 @@
|
|
23
24
|
<xsl:text> </xsl:text>
|
24
25
|
</xsl:template>
|
25
26
|
|
27
|
+
<!-- numbered lists should include a number -->
|
26
28
|
<xsl:template match="ol/li">
|
27
|
-
<!-- 1. foo -->
|
29
|
+
<!-- \1. foo -->
|
28
30
|
<xsl:text>\</xsl:text>
|
29
|
-
<xsl:
|
31
|
+
<xsl:choose>
|
32
|
+
<xsl:when test="@value">
|
33
|
+
<xsl:value-of select="@value" />
|
34
|
+
</xsl:when>
|
35
|
+
<xsl:otherwise>
|
36
|
+
<xsl:value-of select="position()" />
|
37
|
+
</xsl:otherwise>
|
38
|
+
</xsl:choose>
|
30
39
|
<xsl:text>. </xsl:text>
|
31
40
|
<xsl:apply-templates />
|
32
41
|
<xsl:text> </xsl:text>
|
33
42
|
</xsl:template>
|
34
43
|
|
35
|
-
<xsl:template match="h1|h2|h3|h4|h5">
|
36
|
-
<xsl:apply-templates />
|
37
|
-
<xsl:text> </xsl:text>
|
38
|
-
</xsl:template>
|
39
|
-
|
40
44
|
<xsl:template match="p|div">
|
41
45
|
<xsl:choose>
|
42
46
|
<xsl:when test="starts-with(., '[') and substring(., string-length(.)) = ']'">
|
@@ -51,32 +55,27 @@
|
|
51
55
|
<xsl:text> </xsl:text>
|
52
56
|
</xsl:template>
|
53
57
|
|
58
|
+
<!-- START tables -->
|
59
|
+
|
54
60
|
<xsl:template match="table">
|
55
61
|
<xsl:text>{| </xsl:text>
|
56
|
-
<xsl:text>
|
57
|
-
|-</xsl:text>
|
62
|
+
<xsl:text> |-</xsl:text>
|
58
63
|
<xsl:apply-templates />
|
59
|
-
<xsl:text>
|
60
|
-
|}
|
61
|
-
|
62
|
-
</xsl:text>
|
64
|
+
<xsl:text> |} </xsl:text>
|
63
65
|
</xsl:template>
|
64
66
|
|
65
67
|
<xsl:template match="tr">
|
66
68
|
<xsl:apply-templates />
|
67
|
-
<xsl:text>
|
68
|
-
|-</xsl:text>
|
69
|
+
<xsl:text> |-</xsl:text>
|
69
70
|
</xsl:template>
|
70
71
|
|
71
72
|
<xsl:template match="th|td">
|
72
73
|
<xsl:choose>
|
73
74
|
<xsl:when test="local-name(.) = 'th'">
|
74
|
-
<xsl:text>
|
75
|
-
! </xsl:text>
|
75
|
+
<xsl:text> ! </xsl:text>
|
76
76
|
</xsl:when>
|
77
77
|
<xsl:when test="local-name(.) = 'td'">
|
78
|
-
<xsl:text>
|
79
|
-
| </xsl:text>
|
78
|
+
<xsl:text> | </xsl:text>
|
80
79
|
</xsl:when>
|
81
80
|
</xsl:choose>
|
82
81
|
|
@@ -118,8 +117,15 @@
|
|
118
117
|
</xsl:template>
|
119
118
|
|
120
119
|
<xsl:template match="br">
|
121
|
-
<xsl:text>
|
122
|
-
</xsl:
|
120
|
+
<xsl:text> </xsl:text>
|
121
|
+
</xsl:template>
|
122
|
+
|
123
|
+
<xsl:template match="sup">
|
124
|
+
<xsl:text>^^</xsl:text><xsl:apply-templates /><xsl:text>^^</xsl:text>
|
125
|
+
</xsl:template>
|
126
|
+
|
127
|
+
<xsl:template match="sub">
|
128
|
+
<xsl:text>_^</xsl:text><xsl:apply-templates /><xsl:text>^_</xsl:text>
|
123
129
|
</xsl:template>
|
124
130
|
|
125
131
|
|
@@ -20,7 +20,7 @@ module Slaw
|
|
20
20
|
end
|
21
21
|
|
22
22
|
rule inline_item
|
23
|
-
remark / image / ref / bold / italics / superscript / subscript / [^\n]
|
23
|
+
remark / image / ref / bold / italics / superscript / subscript / underline / '\\'? [^\n]
|
24
24
|
<InlineItem>
|
25
25
|
end
|
26
26
|
|
@@ -69,6 +69,12 @@ module Slaw
|
|
69
69
|
<Subscript>
|
70
70
|
end
|
71
71
|
|
72
|
+
rule underline
|
73
|
+
# __foo__
|
74
|
+
'__' content:(!'__' inline_item)+ '__'
|
75
|
+
<Underline>
|
76
|
+
end
|
77
|
+
|
72
78
|
end
|
73
79
|
end
|
74
80
|
end
|
@@ -37,7 +37,12 @@ module Slaw
|
|
37
37
|
|
38
38
|
class InlineItem < Treetop::Runtime::SyntaxNode
|
39
39
|
def to_xml(b, idprefix)
|
40
|
-
|
40
|
+
if text_value.start_with? '\\'
|
41
|
+
# handle escaped characters: \a -> a
|
42
|
+
b.text(text_value[1..])
|
43
|
+
else
|
44
|
+
b.text(text_value)
|
45
|
+
end
|
41
46
|
end
|
42
47
|
end
|
43
48
|
|
@@ -91,6 +96,15 @@ module Slaw
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
|
99
|
+
class Underline < Treetop::Runtime::SyntaxNode
|
100
|
+
def to_xml(b, idprefix)
|
101
|
+
b.u { |b|
|
102
|
+
for e in content.elements
|
103
|
+
e.inline_item.to_xml(b, idprefix)
|
104
|
+
end
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
94
108
|
end
|
95
109
|
end
|
96
110
|
end
|
@@ -9,31 +9,147 @@
|
|
9
9
|
<xsl:strip-space elements="*"/>
|
10
10
|
<xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
|
11
11
|
|
12
|
+
<!-- replaces "value" in "text" with "replacement" -->
|
13
|
+
<xsl:template name="string-replace-all">
|
14
|
+
<xsl:param name="text" />
|
15
|
+
<xsl:param name="value" />
|
16
|
+
<xsl:param name="replacement" />
|
17
|
+
|
18
|
+
<xsl:choose>
|
19
|
+
<xsl:when test="$text = '' or $value = '' or not($value)">
|
20
|
+
<xsl:value-of select="$text" />
|
21
|
+
</xsl:when>
|
22
|
+
<xsl:when test="contains($text, $value)">
|
23
|
+
<xsl:value-of select="substring-before($text, $value)"/>
|
24
|
+
<xsl:value-of select="$replacement" />
|
25
|
+
<xsl:call-template name="string-replace-all">
|
26
|
+
<xsl:with-param name="text" select="substring-after($text, $value)" />
|
27
|
+
<xsl:with-param name="value" select="$value" />
|
28
|
+
<xsl:with-param name="replacement" select="$replacement" />
|
29
|
+
</xsl:call-template>
|
30
|
+
</xsl:when>
|
31
|
+
<xsl:otherwise>
|
32
|
+
<xsl:value-of select="$text" />
|
33
|
+
</xsl:otherwise>
|
34
|
+
</xsl:choose>
|
35
|
+
</xsl:template>
|
36
|
+
|
37
|
+
<!-- Escape inline markers with a backslash -->
|
38
|
+
<xsl:template name="escape-inlines">
|
39
|
+
<xsl:param name="text" />
|
40
|
+
|
41
|
+
<!-- This works from the inside out, first escaping backslash chars themselves, then escaping
|
42
|
+
the different types of inline markers -->
|
43
|
+
<xsl:call-template name="string-replace-all">
|
44
|
+
<xsl:with-param name="text">
|
45
|
+
<xsl:call-template name="string-replace-all">
|
46
|
+
<xsl:with-param name="text">
|
47
|
+
<xsl:call-template name="string-replace-all">
|
48
|
+
<xsl:with-param name="text">
|
49
|
+
<xsl:call-template name="string-replace-all">
|
50
|
+
<xsl:with-param name="text">
|
51
|
+
<xsl:call-template name="string-replace-all">
|
52
|
+
<xsl:with-param name="text">
|
53
|
+
<xsl:call-template name="string-replace-all">
|
54
|
+
<xsl:with-param name="text">
|
55
|
+
<xsl:call-template name="string-replace-all">
|
56
|
+
<xsl:with-param name="text">
|
57
|
+
<xsl:call-template name="string-replace-all">
|
58
|
+
<xsl:with-param name="text">
|
59
|
+
<xsl:call-template name="string-replace-all">
|
60
|
+
<xsl:with-param name="text">
|
61
|
+
<xsl:call-template name="string-replace-all">
|
62
|
+
<xsl:with-param name="text">
|
63
|
+
<xsl:call-template name="string-replace-all">
|
64
|
+
<xsl:with-param name="text" select="$text" />
|
65
|
+
<xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
|
66
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
|
67
|
+
</xsl:call-template>
|
68
|
+
</xsl:with-param>
|
69
|
+
<xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
|
70
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
|
71
|
+
</xsl:call-template>
|
72
|
+
</xsl:with-param>
|
73
|
+
<xsl:with-param name="value"><xsl:value-of select="'__'" /></xsl:with-param>
|
74
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_\_'" /></xsl:with-param>
|
75
|
+
</xsl:call-template>
|
76
|
+
</xsl:with-param>
|
77
|
+
<xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
|
78
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
|
79
|
+
</xsl:call-template>
|
80
|
+
</xsl:with-param>
|
81
|
+
<xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
|
82
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
|
83
|
+
</xsl:call-template>
|
84
|
+
</xsl:with-param>
|
85
|
+
<xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
|
86
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
|
87
|
+
</xsl:call-template>
|
88
|
+
</xsl:with-param>
|
89
|
+
<xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
|
90
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
|
91
|
+
</xsl:call-template>
|
92
|
+
</xsl:with-param>
|
93
|
+
<xsl:with-param name="value"><xsl:value-of select="', 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
17
115
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
18
116
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
19
117
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
118
|
+
<xsl:variable name="slash">
|
119
|
+
<!-- p tags must escape initial content that looks like a block element marker -->
|
120
|
+
<xsl:if test="$prefix = 'BODY' or
|
121
|
+
$prefix = 'PREAMBLE' or
|
122
|
+
$prefix = 'PREFACE' or
|
123
|
+
starts-with($prefix, 'CHAPTER ') or
|
124
|
+
starts-with($prefix, 'PART ') or
|
125
|
+
starts-with($prefix, 'SUBPART ') or
|
126
|
+
starts-with($prefix, 'SCHEDULE ') or
|
127
|
+
starts-with($prefix, 'HEADING ') or
|
128
|
+
starts-with($prefix, 'SUBHEADING ') or
|
129
|
+
starts-with($prefix, 'LONGTITLE ') or
|
130
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
131
|
+
starts-with($prefix, '{|') or
|
132
|
+
starts-with($numprefix, '(')">
|
133
|
+
<xsl:value-of select="'\'" />
|
134
|
+
</xsl:if>
|
135
|
+
</xsl:variable>
|
136
|
+
|
137
|
+
<xsl:value-of select="concat($slash, $value)" />
|
138
|
+
</xsl:template>
|
139
|
+
|
140
|
+
<!-- adds a backslash to the start of the text param, if necessary -->
|
141
|
+
<xsl:template name="escape">
|
142
|
+
<xsl:param name="value"/>
|
143
|
+
|
144
|
+
<xsl:variable name="escaped">
|
145
|
+
<xsl:call-template name="escape-inlines">
|
146
|
+
<xsl:with-param name="text" select="$value" />
|
147
|
+
</xsl:call-template>
|
148
|
+
</xsl:variable>
|
149
|
+
|
150
|
+
<xsl:call-template name="escape-prefixes">
|
151
|
+
<xsl:with-param name="value" select="$escaped" />
|
152
|
+
</xsl:call-template>
|
37
153
|
</xsl:template>
|
38
154
|
|
39
155
|
<xsl:template match="a:act">
|
@@ -157,12 +273,19 @@
|
|
157
273
|
</xsl:template>
|
158
274
|
|
159
275
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
160
|
-
<xsl:template match="a:p[not(ancestor::a:table)]/text()[
|
276
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
|
161
277
|
<xsl:call-template name="escape">
|
162
278
|
<xsl:with-param name="value" select="." />
|
163
279
|
</xsl:call-template>
|
164
280
|
</xsl:template>
|
165
281
|
|
282
|
+
<!-- escape inlines in text nodes -->
|
283
|
+
<xsl:template match="text()">
|
284
|
+
<xsl:call-template name="escape-inlines">
|
285
|
+
<xsl:with-param name="text" select="." />
|
286
|
+
</xsl:call-template>
|
287
|
+
</xsl:template>
|
288
|
+
|
166
289
|
|
167
290
|
<!-- attachments/schedules -->
|
168
291
|
<xsl:template match="a:attachment">
|
@@ -176,7 +299,7 @@
|
|
176
299
|
<xsl:text> </xsl:text>
|
177
300
|
</xsl:if>
|
178
301
|
|
179
|
-
<xsl:text>

|
302
|
+
<xsl:text> </xsl:text>
|
180
303
|
<xsl:apply-templates select="a:doc/a:mainBody" />
|
181
304
|
</xsl:template>
|
182
305
|
|
@@ -192,31 +315,24 @@
|
|
192
315
|
<xsl:value-of select="." />
|
193
316
|
<xsl:text>" </xsl:text>
|
194
317
|
</xsl:for-each>
|
195
|
-
<xsl:text>
|
196
|
-
|-</xsl:text>
|
318
|
+
<xsl:text> |-</xsl:text>
|
197
319
|
|
198
320
|
<xsl:apply-templates />
|
199
|
-
<xsl:text>
|
200
|
-
|}
|
201
|
-
|
202
|
-
</xsl:text>
|
321
|
+
<xsl:text> |} </xsl:text>
|
203
322
|
</xsl:template>
|
204
323
|
|
205
324
|
<xsl:template match="a:tr">
|
206
325
|
<xsl:apply-templates />
|
207
|
-
<xsl:text>
|
208
|
-
|-</xsl:text>
|
326
|
+
<xsl:text> |-</xsl:text>
|
209
327
|
</xsl:template>
|
210
328
|
|
211
329
|
<xsl:template match="a:th|a:td">
|
212
330
|
<xsl:choose>
|
213
331
|
<xsl:when test="local-name(.) = 'th'">
|
214
|
-
<xsl:text>
|
215
|
-
! </xsl:text>
|
332
|
+
<xsl:text> ! </xsl:text>
|
216
333
|
</xsl:when>
|
217
334
|
<xsl:when test="local-name(.) = 'td'">
|
218
|
-
<xsl:text>
|
219
|
-
| </xsl:text>
|
335
|
+
<xsl:text> | </xsl:text>
|
220
336
|
</xsl:when>
|
221
337
|
</xsl:choose>
|
222
338
|
|
@@ -287,6 +403,12 @@
|
|
287
403
|
<xsl:text>^_</xsl:text>
|
288
404
|
</xsl:template>
|
289
405
|
|
406
|
+
<xsl:template match="a:u">
|
407
|
+
<xsl:text>__</xsl:text>
|
408
|
+
<xsl:apply-templates />
|
409
|
+
<xsl:text>__</xsl:text>
|
410
|
+
</xsl:template>
|
411
|
+
|
290
412
|
<xsl:template match="a:eol">
|
291
413
|
<xsl:text> </xsl:text>
|
292
414
|
</xsl:template>
|
data/lib/slaw/version.rb
CHANGED
data/slaw.gemspec
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
BODY
|
2
|
+
|
3
|
+
1. Section that tests escapes
|
4
|
+
|
5
|
+
text \\ with a single slash
|
6
|
+
|
7
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
8
|
+
|
9
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
10
|
+
|
11
|
+
refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
|
12
|
+
|
13
|
+
nested ** stars \*\* in bold \*\***
|
14
|
+
|
15
|
+
nested // slashes \/\/ in italics \/\///
|
16
|
+
|
17
|
+
nested ** stars in // italics \*\* // and bold **
|
18
|
+
|
19
|
+
super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
|
20
|
+
|
21
|
+
underlines __underline with _ underscores__ and \_\_escaped underlines \_\_
|
22
|
+
|
23
|
+
mixed __underline **and \_\_ bold**__
|
24
|
+
|
data/spec/generator_spec.rb
CHANGED
@@ -78,45 +78,81 @@ XML
|
|
78
78
|
|
79
79
|
1. Section
|
80
80
|
|
81
|
-
|
81
|
+
\\Chapter 2 ignored
|
82
82
|
|
83
83
|
Chapters
|
84
84
|
|
85
|
-
|
85
|
+
\\Part 2 ignored
|
86
86
|
|
87
87
|
participation
|
88
88
|
|
89
|
-
|
89
|
+
\\Schedule 2 ignored
|
90
90
|
|
91
91
|
Schedules
|
92
92
|
|
93
|
-
|
93
|
+
\\HEADING x
|
94
94
|
|
95
|
-
|
95
|
+
\\SUBHEADING x
|
96
96
|
|
97
97
|
BODY not escaped
|
98
98
|
|
99
|
-
|
99
|
+
\\BODY
|
100
100
|
|
101
101
|
PREAMBLE not escaped
|
102
102
|
|
103
|
-
|
103
|
+
\\PREAMBLE
|
104
104
|
|
105
105
|
PREFACE not escaped
|
106
106
|
|
107
|
-
|
107
|
+
\\PREFACE
|
108
108
|
|
109
|
-
|
109
|
+
\\2. ignored
|
110
110
|
|
111
|
-
|
111
|
+
\\2.1 ignored
|
112
112
|
|
113
|
-
|
113
|
+
\\(2) ignored
|
114
114
|
|
115
|
-
|
115
|
+
\\(a) ignored
|
116
116
|
|
117
|
-
|
117
|
+
\\(2a) ignored
|
118
118
|
|
119
|
-
|
119
|
+
\\{| ignored
|
120
|
+
|
121
|
+
'
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should escape inlines when unparsing' do
|
125
|
+
doc = xml2doc(section(<<'XML'
|
126
|
+
<num>1.</num>
|
127
|
+
<heading>Section</heading>
|
128
|
+
<paragraph id="section-1.paragraph-0">
|
129
|
+
<content>
|
130
|
+
<p>text \ with a single slash</p>
|
131
|
+
<p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
|
132
|
+
<p>inlines that ** should // be [[ escaped ![ and ]]</p>
|
133
|
+
<p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
|
134
|
+
<p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
|
135
|
+
</content>
|
136
|
+
</paragraph>
|
137
|
+
XML
|
138
|
+
))
|
139
|
+
|
140
|
+
text = subject.text_from_act(doc)
|
141
|
+
# NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
|
142
|
+
# which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
|
143
|
+
text.should == 'BODY
|
144
|
+
|
145
|
+
1. Section
|
146
|
+
|
147
|
+
text \\\\ with a single slash
|
148
|
+
|
149
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
150
|
+
|
151
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
152
|
+
|
153
|
+
refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
|
154
|
+
|
155
|
+
super ^^with \^\^^^ and sub _^\_^ with \^_^_
|
120
156
|
|
121
157
|
'
|
122
158
|
end
|
@@ -148,7 +184,7 @@ XML
|
|
148
184
|
|
149
185
|
1. Section
|
150
186
|
|
151
|
-
|
187
|
+
\\(2) A special meeting [[ foo ]]:
|
152
188
|
|
153
189
|
(a) the chairperson so directs; or
|
154
190
|
|
@@ -201,6 +237,27 @@ XML
|
|
201
237
|
|
202
238
|
Hello [there](/za/act/123) friend.
|
203
239
|
|
240
|
+
'
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'should unparse underlines correctly' do
|
244
|
+
doc = xml2doc(section(<<XML
|
245
|
+
<num>1.</num>
|
246
|
+
<paragraph id="section-19.paragraph-0">
|
247
|
+
<content>
|
248
|
+
<p>Hello <u>underlined</u>.</p>
|
249
|
+
</content>
|
250
|
+
</paragraph>
|
251
|
+
XML
|
252
|
+
))
|
253
|
+
|
254
|
+
text = subject.text_from_act(doc)
|
255
|
+
text.should == 'BODY
|
256
|
+
|
257
|
+
1.
|
258
|
+
|
259
|
+
Hello __underlined__.
|
260
|
+
|
204
261
|
'
|
205
262
|
end
|
206
263
|
|
@@ -269,4 +326,13 @@ Subject to approval in terms of this By-Law.
|
|
269
326
|
'
|
270
327
|
end
|
271
328
|
end
|
329
|
+
|
330
|
+
describe 'round trip' do
|
331
|
+
it 'should be idempotent for escapes' do
|
332
|
+
text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
|
333
|
+
act = subject.generate_from_text(text)
|
334
|
+
xml = act.to_xml(encoding: 'utf-8')
|
335
|
+
subject.text_from_act(act).should == text
|
336
|
+
end
|
337
|
+
end
|
272
338
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -117,16 +117,19 @@ EOS
|
|
117
117
|
it 'should handle escaped content' do
|
118
118
|
node = parse :body, <<EOS
|
119
119
|
\\1. ignored
|
120
|
+
foo \\\\bar
|
120
121
|
|
121
|
-
\\CROSSHEADING
|
122
|
+
\\CROSSHEADING cross\\heading
|
122
123
|
|
123
|
-
1.
|
124
|
+
1. Sec\\tion
|
124
125
|
\\Chapter 2 ignored
|
126
|
+
Some text with a \\\\real backslash
|
125
127
|
EOS
|
126
128
|
to_xml(node).should == '<body>
|
127
129
|
<hcontainer eId="hcontainer_1" name="hcontainer">
|
128
130
|
<content>
|
129
131
|
<p>1. ignored</p>
|
132
|
+
<p>foo \\bar</p>
|
130
133
|
<p>CROSSHEADING crossheading</p>
|
131
134
|
</content>
|
132
135
|
</hcontainer>
|
@@ -136,6 +139,7 @@ EOS
|
|
136
139
|
<hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
|
137
140
|
<content>
|
138
141
|
<p>Chapter 2 ignored</p>
|
142
|
+
<p>Some text with a \\real backslash</p>
|
139
143
|
</content>
|
140
144
|
</hcontainer>
|
141
145
|
</section>
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -325,6 +325,17 @@ EOS
|
|
325
325
|
<p>This statement has <remark status="editorial">[<ref href="/foo/bar">a link in</ref> a remark]</remark></p>
|
326
326
|
<p>This statement has <remark status="editorial">[a <ref href="/foo/bar">link in a remark</ref>]</remark></p>
|
327
327
|
</content>
|
328
|
+
</hcontainer>'
|
329
|
+
end
|
330
|
+
|
331
|
+
it 'should handle escapes in links' do
|
332
|
+
node = parse :generic_container, <<EOS
|
333
|
+
Visit the site [https:\\/\\/example.com](https://example.com) for more.
|
334
|
+
EOS
|
335
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
336
|
+
<content>
|
337
|
+
<p>Visit the site <ref href="https://example.com">https://example.com</ref> for more.</p>
|
338
|
+
</content>
|
328
339
|
</hcontainer>'
|
329
340
|
end
|
330
341
|
end
|
@@ -524,4 +535,17 @@ EOS
|
|
524
535
|
end
|
525
536
|
end
|
526
537
|
|
538
|
+
describe 'underline' do
|
539
|
+
it 'should handle underline' do
|
540
|
+
node = parse :generic_container, <<EOS
|
541
|
+
Text __with underline__ and _ under__scores__.
|
542
|
+
EOS
|
543
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
544
|
+
<content>
|
545
|
+
<p>Text <u>with underline</u> and _ under<u>scores</u>.</p>
|
546
|
+
</content>
|
547
|
+
</hcontainer>'
|
548
|
+
end
|
549
|
+
end
|
550
|
+
|
527
551
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,6 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0.20'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: mimemagic
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0.2'
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0.2'
|
111
97
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
112
98
|
acts from plain text and PDF documents.
|
113
99
|
email:
|
@@ -117,9 +103,9 @@ executables:
|
|
117
103
|
extensions: []
|
118
104
|
extra_rdoc_files: []
|
119
105
|
files:
|
106
|
+
- ".github/workflows/test.yml"
|
120
107
|
- ".gitignore"
|
121
108
|
- ".rspec"
|
122
|
-
- ".travis.yml"
|
123
109
|
- Gemfile
|
124
110
|
- LICENSE.txt
|
125
111
|
- README.md
|
@@ -158,6 +144,7 @@ files:
|
|
158
144
|
- spec/counters_spec.rb
|
159
145
|
- spec/extract/extractor_spec.rb
|
160
146
|
- spec/fixtures/community-fire-safety.xml
|
147
|
+
- spec/fixtures/roundtrip-escapes.txt
|
161
148
|
- spec/generator_spec.rb
|
162
149
|
- spec/parse/blocklists_spec.rb
|
163
150
|
- spec/parse/builder_spec.rb
|
@@ -173,7 +160,7 @@ homepage: https://github.com/longhotsummer/slaw
|
|
173
160
|
licenses:
|
174
161
|
- MIT
|
175
162
|
metadata: {}
|
176
|
-
post_install_message:
|
163
|
+
post_install_message:
|
177
164
|
rdoc_options: []
|
178
165
|
require_paths:
|
179
166
|
- lib
|
@@ -189,13 +176,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
176
|
version: '0'
|
190
177
|
requirements: []
|
191
178
|
rubygems_version: 3.0.3
|
192
|
-
signing_key:
|
179
|
+
signing_key:
|
193
180
|
specification_version: 4
|
194
181
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
195
182
|
test_files:
|
196
183
|
- spec/counters_spec.rb
|
197
184
|
- spec/extract/extractor_spec.rb
|
198
185
|
- spec/fixtures/community-fire-safety.xml
|
186
|
+
- spec/fixtures/roundtrip-escapes.txt
|
199
187
|
- spec/generator_spec.rb
|
200
188
|
- spec/parse/blocklists_spec.rb
|
201
189
|
- spec/parse/builder_spec.rb
|