slaw 10.3.1 → 10.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +22 -0
- data/README.md +20 -0
- data/lib/slaw/extract/extractor.rb +2 -9
- data/lib/slaw/extract/html_to_akn_text.xsl +29 -23
- data/lib/slaw/grammars/inlines.treetop +7 -1
- data/lib/slaw/grammars/inlines_nodes.rb +15 -1
- data/lib/slaw/grammars/za/act_text.xsl +154 -32
- data/lib/slaw/version.rb +1 -1
- data/slaw.gemspec +0 -1
- data/spec/fixtures/roundtrip-escapes.txt +24 -0
- data/spec/generator_spec.rb +81 -15
- data/spec/za/act_block_spec.rb +6 -2
- data/spec/za/act_inline_spec.rb +24 -0
- metadata +8 -20
- data/.travis.yml +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e4cdd070ad171d0d999cbaa93da4fb2723df0a2d430679833d3588903049575
|
4
|
+
data.tar.gz: 33bffb144c455bb8d35f7c8dc7e3593041eead5d1c76569575afa8df73bd0a8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b69cf6e2303be5096d3697ced6fe3bb1538d46a5657c718f8e1c6e8e4edee1b1c24ea4b7a6f199920499412b716cfa21e0fddb72be46928377d02145236831cf
|
7
|
+
data.tar.gz: 13744a12c6e8f62d90cd6fd4b553e3d45f4dd7b38ca076c6da2d2dcea7e72a31feb9345f687d7d29fa5a0d9a8874c4a506929f5c1a73838f2b551df8cc063806
|
@@ -0,0 +1,22 @@
|
|
1
|
+
name: Test
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
|
9
|
+
strategy:
|
10
|
+
matrix:
|
11
|
+
ruby-version: [2.7, 2.6]
|
12
|
+
|
13
|
+
steps:
|
14
|
+
- uses: actions/checkout@v2
|
15
|
+
- name: Set up Ruby ${{ matrix.ruby-version }}
|
16
|
+
uses: ruby/setup-ruby@v1
|
17
|
+
with:
|
18
|
+
ruby-version: ${{ matrix.ruby-version }}
|
19
|
+
- name: Install dependencies
|
20
|
+
run: bundle install
|
21
|
+
- name: Run tests
|
22
|
+
run: bundle exec rake
|
data/README.md
CHANGED
@@ -86,6 +86,26 @@ You can create your own grammar by creating a gem that provides these files and
|
|
86
86
|
|
87
87
|
## Changelog
|
88
88
|
|
89
|
+
### 10.7.0 (11 June 2021)
|
90
|
+
|
91
|
+
* Support underlines with `__text__`
|
92
|
+
|
93
|
+
### 10.6.0 (10 May 2021)
|
94
|
+
|
95
|
+
* Handle sup and sub when extracting from HTML.
|
96
|
+
|
97
|
+
### 10.5.0 (20 April 2021)
|
98
|
+
|
99
|
+
* Handle escaping inlines when unparsing.
|
100
|
+
|
101
|
+
### 10.4.1 (14 April 2021)
|
102
|
+
|
103
|
+
* Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
|
104
|
+
|
105
|
+
### 10.4.0 (9 April 2021)
|
106
|
+
|
107
|
+
* Remove dependency on mimemagic. Guess file type based on filename instead.
|
108
|
+
|
89
109
|
### 10.3.1 (11 January 2021)
|
90
110
|
|
91
111
|
* Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'mimemagic'
|
2
|
-
|
3
1
|
module Slaw
|
4
2
|
module Extract
|
5
3
|
|
@@ -13,15 +11,10 @@ module Slaw
|
|
13
11
|
#
|
14
12
|
# @return [String] extracted text
|
15
13
|
def extract_from_file(filename)
|
16
|
-
|
17
|
-
|
18
|
-
case mimetype && mimetype.type
|
19
|
-
when 'text/html'
|
14
|
+
if filename.end_with? '.html' or filename.end_with? '.htm'
|
20
15
|
extract_from_html(filename)
|
21
|
-
when 'text/plain', nil
|
22
|
-
extract_from_text(filename)
|
23
16
|
else
|
24
|
-
|
17
|
+
extract_from_text(filename)
|
25
18
|
end
|
26
19
|
end
|
27
20
|
|
@@ -11,9 +11,10 @@
|
|
11
11
|
|
12
12
|
<xsl:template match="head|style|script|link" />
|
13
13
|
|
14
|
-
|
14
|
+
<!-- block containers that end with newlines -->
|
15
|
+
<xsl:template match="ul|ol|section|article|h1|h2|h3|h4|h5">
|
15
16
|
<xsl:apply-templates />
|
16
|
-
<xsl:text> </xsl:text>
|
17
|
+
<xsl:text> </xsl:text>
|
17
18
|
</xsl:template>
|
18
19
|
|
19
20
|
<xsl:template match="ul/li">
|
@@ -23,20 +24,23 @@
|
|
23
24
|
<xsl:text> </xsl:text>
|
24
25
|
</xsl:template>
|
25
26
|
|
27
|
+
<!-- numbered lists should include a number -->
|
26
28
|
<xsl:template match="ol/li">
|
27
|
-
<!-- 1. foo -->
|
29
|
+
<!-- \1. foo -->
|
28
30
|
<xsl:text>\</xsl:text>
|
29
|
-
<xsl:
|
31
|
+
<xsl:choose>
|
32
|
+
<xsl:when test="@value">
|
33
|
+
<xsl:value-of select="@value" />
|
34
|
+
</xsl:when>
|
35
|
+
<xsl:otherwise>
|
36
|
+
<xsl:value-of select="position()" />
|
37
|
+
</xsl:otherwise>
|
38
|
+
</xsl:choose>
|
30
39
|
<xsl:text>. </xsl:text>
|
31
40
|
<xsl:apply-templates />
|
32
41
|
<xsl:text> </xsl:text>
|
33
42
|
</xsl:template>
|
34
43
|
|
35
|
-
<xsl:template match="h1|h2|h3|h4|h5">
|
36
|
-
<xsl:apply-templates />
|
37
|
-
<xsl:text> </xsl:text>
|
38
|
-
</xsl:template>
|
39
|
-
|
40
44
|
<xsl:template match="p|div">
|
41
45
|
<xsl:choose>
|
42
46
|
<xsl:when test="starts-with(., '[') and substring(., string-length(.)) = ']'">
|
@@ -51,32 +55,27 @@
|
|
51
55
|
<xsl:text> </xsl:text>
|
52
56
|
</xsl:template>
|
53
57
|
|
58
|
+
<!-- START tables -->
|
59
|
+
|
54
60
|
<xsl:template match="table">
|
55
61
|
<xsl:text>{| </xsl:text>
|
56
|
-
<xsl:text>
|
57
|
-
|-</xsl:text>
|
62
|
+
<xsl:text> |-</xsl:text>
|
58
63
|
<xsl:apply-templates />
|
59
|
-
<xsl:text>
|
60
|
-
|}
|
61
|
-
|
62
|
-
</xsl:text>
|
64
|
+
<xsl:text> |} </xsl:text>
|
63
65
|
</xsl:template>
|
64
66
|
|
65
67
|
<xsl:template match="tr">
|
66
68
|
<xsl:apply-templates />
|
67
|
-
<xsl:text>
|
68
|
-
|-</xsl:text>
|
69
|
+
<xsl:text> |-</xsl:text>
|
69
70
|
</xsl:template>
|
70
71
|
|
71
72
|
<xsl:template match="th|td">
|
72
73
|
<xsl:choose>
|
73
74
|
<xsl:when test="local-name(.) = 'th'">
|
74
|
-
<xsl:text>
|
75
|
-
! </xsl:text>
|
75
|
+
<xsl:text> ! </xsl:text>
|
76
76
|
</xsl:when>
|
77
77
|
<xsl:when test="local-name(.) = 'td'">
|
78
|
-
<xsl:text>
|
79
|
-
| </xsl:text>
|
78
|
+
<xsl:text> | </xsl:text>
|
80
79
|
</xsl:when>
|
81
80
|
</xsl:choose>
|
82
81
|
|
@@ -118,8 +117,15 @@
|
|
118
117
|
</xsl:template>
|
119
118
|
|
120
119
|
<xsl:template match="br">
|
121
|
-
<xsl:text>
|
122
|
-
</xsl:
|
120
|
+
<xsl:text> </xsl:text>
|
121
|
+
</xsl:template>
|
122
|
+
|
123
|
+
<xsl:template match="sup">
|
124
|
+
<xsl:text>^^</xsl:text><xsl:apply-templates /><xsl:text>^^</xsl:text>
|
125
|
+
</xsl:template>
|
126
|
+
|
127
|
+
<xsl:template match="sub">
|
128
|
+
<xsl:text>_^</xsl:text><xsl:apply-templates /><xsl:text>^_</xsl:text>
|
123
129
|
</xsl:template>
|
124
130
|
|
125
131
|
|
@@ -20,7 +20,7 @@ module Slaw
|
|
20
20
|
end
|
21
21
|
|
22
22
|
rule inline_item
|
23
|
-
remark / image / ref / bold / italics / superscript / subscript / [^\n]
|
23
|
+
remark / image / ref / bold / italics / superscript / subscript / underline / '\\'? [^\n]
|
24
24
|
<InlineItem>
|
25
25
|
end
|
26
26
|
|
@@ -69,6 +69,12 @@ module Slaw
|
|
69
69
|
<Subscript>
|
70
70
|
end
|
71
71
|
|
72
|
+
rule underline
|
73
|
+
# __foo__
|
74
|
+
'__' content:(!'__' inline_item)+ '__'
|
75
|
+
<Underline>
|
76
|
+
end
|
77
|
+
|
72
78
|
end
|
73
79
|
end
|
74
80
|
end
|
@@ -37,7 +37,12 @@ module Slaw
|
|
37
37
|
|
38
38
|
class InlineItem < Treetop::Runtime::SyntaxNode
|
39
39
|
def to_xml(b, idprefix)
|
40
|
-
|
40
|
+
if text_value.start_with? '\\'
|
41
|
+
# handle escaped characters: \a -> a
|
42
|
+
b.text(text_value[1..])
|
43
|
+
else
|
44
|
+
b.text(text_value)
|
45
|
+
end
|
41
46
|
end
|
42
47
|
end
|
43
48
|
|
@@ -91,6 +96,15 @@ module Slaw
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
|
99
|
+
class Underline < Treetop::Runtime::SyntaxNode
|
100
|
+
def to_xml(b, idprefix)
|
101
|
+
b.u { |b|
|
102
|
+
for e in content.elements
|
103
|
+
e.inline_item.to_xml(b, idprefix)
|
104
|
+
end
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
94
108
|
end
|
95
109
|
end
|
96
110
|
end
|
@@ -9,31 +9,147 @@
|
|
9
9
|
<xsl:strip-space elements="*"/>
|
10
10
|
<xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
|
11
11
|
|
12
|
+
<!-- replaces "value" in "text" with "replacement" -->
|
13
|
+
<xsl:template name="string-replace-all">
|
14
|
+
<xsl:param name="text" />
|
15
|
+
<xsl:param name="value" />
|
16
|
+
<xsl:param name="replacement" />
|
17
|
+
|
18
|
+
<xsl:choose>
|
19
|
+
<xsl:when test="$text = '' or $value = '' or not($value)">
|
20
|
+
<xsl:value-of select="$text" />
|
21
|
+
</xsl:when>
|
22
|
+
<xsl:when test="contains($text, $value)">
|
23
|
+
<xsl:value-of select="substring-before($text, $value)"/>
|
24
|
+
<xsl:value-of select="$replacement" />
|
25
|
+
<xsl:call-template name="string-replace-all">
|
26
|
+
<xsl:with-param name="text" select="substring-after($text, $value)" />
|
27
|
+
<xsl:with-param name="value" select="$value" />
|
28
|
+
<xsl:with-param name="replacement" select="$replacement" />
|
29
|
+
</xsl:call-template>
|
30
|
+
</xsl:when>
|
31
|
+
<xsl:otherwise>
|
32
|
+
<xsl:value-of select="$text" />
|
33
|
+
</xsl:otherwise>
|
34
|
+
</xsl:choose>
|
35
|
+
</xsl:template>
|
36
|
+
|
37
|
+
<!-- Escape inline markers with a backslash -->
|
38
|
+
<xsl:template name="escape-inlines">
|
39
|
+
<xsl:param name="text" />
|
40
|
+
|
41
|
+
<!-- This works from the inside out, first escaping backslash chars themselves, then escaping
|
42
|
+
the different types of inline markers -->
|
43
|
+
<xsl:call-template name="string-replace-all">
|
44
|
+
<xsl:with-param name="text">
|
45
|
+
<xsl:call-template name="string-replace-all">
|
46
|
+
<xsl:with-param name="text">
|
47
|
+
<xsl:call-template name="string-replace-all">
|
48
|
+
<xsl:with-param name="text">
|
49
|
+
<xsl:call-template name="string-replace-all">
|
50
|
+
<xsl:with-param name="text">
|
51
|
+
<xsl:call-template name="string-replace-all">
|
52
|
+
<xsl:with-param name="text">
|
53
|
+
<xsl:call-template name="string-replace-all">
|
54
|
+
<xsl:with-param name="text">
|
55
|
+
<xsl:call-template name="string-replace-all">
|
56
|
+
<xsl:with-param name="text">
|
57
|
+
<xsl:call-template name="string-replace-all">
|
58
|
+
<xsl:with-param name="text">
|
59
|
+
<xsl:call-template name="string-replace-all">
|
60
|
+
<xsl:with-param name="text">
|
61
|
+
<xsl:call-template name="string-replace-all">
|
62
|
+
<xsl:with-param name="text">
|
63
|
+
<xsl:call-template name="string-replace-all">
|
64
|
+
<xsl:with-param name="text" select="$text" />
|
65
|
+
<xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
|
66
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
|
67
|
+
</xsl:call-template>
|
68
|
+
</xsl:with-param>
|
69
|
+
<xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
|
70
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
|
71
|
+
</xsl:call-template>
|
72
|
+
</xsl:with-param>
|
73
|
+
<xsl:with-param name="value"><xsl:value-of select="'__'" /></xsl:with-param>
|
74
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_\_'" /></xsl:with-param>
|
75
|
+
</xsl:call-template>
|
76
|
+
</xsl:with-param>
|
77
|
+
<xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
|
78
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
|
79
|
+
</xsl:call-template>
|
80
|
+
</xsl:with-param>
|
81
|
+
<xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
|
82
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
|
83
|
+
</xsl:call-template>
|
84
|
+
</xsl:with-param>
|
85
|
+
<xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
|
86
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
|
87
|
+
</xsl:call-template>
|
88
|
+
</xsl:with-param>
|
89
|
+
<xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
|
90
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
|
91
|
+
</xsl:call-template>
|
92
|
+
</xsl:with-param>
|
93
|
+
<xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
|
94
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
|
95
|
+
</xsl:call-template>
|
96
|
+
</xsl:with-param>
|
97
|
+
<xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
|
98
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
|
99
|
+
</xsl:call-template>
|
100
|
+
</xsl:with-param>
|
101
|
+
<xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
|
102
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
|
103
|
+
</xsl:call-template>
|
104
|
+
</xsl:with-param>
|
105
|
+
<xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
|
106
|
+
<xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
|
107
|
+
</xsl:call-template>
|
108
|
+
</xsl:template>
|
109
|
+
|
12
110
|
<!-- adds a backslash to the start of the value param, if necessary -->
|
13
|
-
<xsl:template name="escape">
|
111
|
+
<xsl:template name="escape-prefixes">
|
14
112
|
<xsl:param name="value"/>
|
15
113
|
|
16
114
|
<xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
|
17
115
|
<!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
|
18
116
|
<xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
|
19
117
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
118
|
+
<xsl:variable name="slash">
|
119
|
+
<!-- p tags must escape initial content that looks like a block element marker -->
|
120
|
+
<xsl:if test="$prefix = 'BODY' or
|
121
|
+
$prefix = 'PREAMBLE' or
|
122
|
+
$prefix = 'PREFACE' or
|
123
|
+
starts-with($prefix, 'CHAPTER ') or
|
124
|
+
starts-with($prefix, 'PART ') or
|
125
|
+
starts-with($prefix, 'SUBPART ') or
|
126
|
+
starts-with($prefix, 'SCHEDULE ') or
|
127
|
+
starts-with($prefix, 'HEADING ') or
|
128
|
+
starts-with($prefix, 'SUBHEADING ') or
|
129
|
+
starts-with($prefix, 'LONGTITLE ') or
|
130
|
+
starts-with($prefix, 'CROSSHEADING ') or
|
131
|
+
starts-with($prefix, '{|') or
|
132
|
+
starts-with($numprefix, '(')">
|
133
|
+
<xsl:value-of select="'\'" />
|
134
|
+
</xsl:if>
|
135
|
+
</xsl:variable>
|
136
|
+
|
137
|
+
<xsl:value-of select="concat($slash, $value)" />
|
138
|
+
</xsl:template>
|
139
|
+
|
140
|
+
<!-- adds a backslash to the start of the text param, if necessary -->
|
141
|
+
<xsl:template name="escape">
|
142
|
+
<xsl:param name="value"/>
|
143
|
+
|
144
|
+
<xsl:variable name="escaped">
|
145
|
+
<xsl:call-template name="escape-inlines">
|
146
|
+
<xsl:with-param name="text" select="$value" />
|
147
|
+
</xsl:call-template>
|
148
|
+
</xsl:variable>
|
149
|
+
|
150
|
+
<xsl:call-template name="escape-prefixes">
|
151
|
+
<xsl:with-param name="value" select="$escaped" />
|
152
|
+
</xsl:call-template>
|
37
153
|
</xsl:template>
|
38
154
|
|
39
155
|
<xsl:template match="a:act">
|
@@ -157,12 +273,19 @@
|
|
157
273
|
</xsl:template>
|
158
274
|
|
159
275
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
160
|
-
<xsl:template match="a:p[not(ancestor::a:table)]/text()[
|
276
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
|
161
277
|
<xsl:call-template name="escape">
|
162
278
|
<xsl:with-param name="value" select="." />
|
163
279
|
</xsl:call-template>
|
164
280
|
</xsl:template>
|
165
281
|
|
282
|
+
<!-- escape inlines in text nodes -->
|
283
|
+
<xsl:template match="text()">
|
284
|
+
<xsl:call-template name="escape-inlines">
|
285
|
+
<xsl:with-param name="text" select="." />
|
286
|
+
</xsl:call-template>
|
287
|
+
</xsl:template>
|
288
|
+
|
166
289
|
|
167
290
|
<!-- attachments/schedules -->
|
168
291
|
<xsl:template match="a:attachment">
|
@@ -176,7 +299,7 @@
|
|
176
299
|
<xsl:text> </xsl:text>
|
177
300
|
</xsl:if>
|
178
301
|
|
179
|
-
<xsl:text>

|
302
|
+
<xsl:text> </xsl:text>
|
180
303
|
<xsl:apply-templates select="a:doc/a:mainBody" />
|
181
304
|
</xsl:template>
|
182
305
|
|
@@ -192,31 +315,24 @@
|
|
192
315
|
<xsl:value-of select="." />
|
193
316
|
<xsl:text>" </xsl:text>
|
194
317
|
</xsl:for-each>
|
195
|
-
<xsl:text>
|
196
|
-
|-</xsl:text>
|
318
|
+
<xsl:text> |-</xsl:text>
|
197
319
|
|
198
320
|
<xsl:apply-templates />
|
199
|
-
<xsl:text>
|
200
|
-
|}
|
201
|
-
|
202
|
-
</xsl:text>
|
321
|
+
<xsl:text> |} </xsl:text>
|
203
322
|
</xsl:template>
|
204
323
|
|
205
324
|
<xsl:template match="a:tr">
|
206
325
|
<xsl:apply-templates />
|
207
|
-
<xsl:text>
|
208
|
-
|-</xsl:text>
|
326
|
+
<xsl:text> |-</xsl:text>
|
209
327
|
</xsl:template>
|
210
328
|
|
211
329
|
<xsl:template match="a:th|a:td">
|
212
330
|
<xsl:choose>
|
213
331
|
<xsl:when test="local-name(.) = 'th'">
|
214
|
-
<xsl:text>
|
215
|
-
! </xsl:text>
|
332
|
+
<xsl:text> ! </xsl:text>
|
216
333
|
</xsl:when>
|
217
334
|
<xsl:when test="local-name(.) = 'td'">
|
218
|
-
<xsl:text>
|
219
|
-
| </xsl:text>
|
335
|
+
<xsl:text> | </xsl:text>
|
220
336
|
</xsl:when>
|
221
337
|
</xsl:choose>
|
222
338
|
|
@@ -287,6 +403,12 @@
|
|
287
403
|
<xsl:text>^_</xsl:text>
|
288
404
|
</xsl:template>
|
289
405
|
|
406
|
+
<xsl:template match="a:u">
|
407
|
+
<xsl:text>__</xsl:text>
|
408
|
+
<xsl:apply-templates />
|
409
|
+
<xsl:text>__</xsl:text>
|
410
|
+
</xsl:template>
|
411
|
+
|
290
412
|
<xsl:template match="a:eol">
|
291
413
|
<xsl:text> </xsl:text>
|
292
414
|
</xsl:template>
|
data/lib/slaw/version.rb
CHANGED
data/slaw.gemspec
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
BODY
|
2
|
+
|
3
|
+
1. Section that tests escapes
|
4
|
+
|
5
|
+
text \\ with a single slash
|
6
|
+
|
7
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
8
|
+
|
9
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
10
|
+
|
11
|
+
refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
|
12
|
+
|
13
|
+
nested ** stars \*\* in bold \*\***
|
14
|
+
|
15
|
+
nested // slashes \/\/ in italics \/\///
|
16
|
+
|
17
|
+
nested ** stars in // italics \*\* // and bold **
|
18
|
+
|
19
|
+
super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
|
20
|
+
|
21
|
+
underlines __underline with _ underscores__ and \_\_escaped underlines \_\_
|
22
|
+
|
23
|
+
mixed __underline **and \_\_ bold**__
|
24
|
+
|
data/spec/generator_spec.rb
CHANGED
@@ -78,45 +78,81 @@ XML
|
|
78
78
|
|
79
79
|
1. Section
|
80
80
|
|
81
|
-
|
81
|
+
\\Chapter 2 ignored
|
82
82
|
|
83
83
|
Chapters
|
84
84
|
|
85
|
-
|
85
|
+
\\Part 2 ignored
|
86
86
|
|
87
87
|
participation
|
88
88
|
|
89
|
-
|
89
|
+
\\Schedule 2 ignored
|
90
90
|
|
91
91
|
Schedules
|
92
92
|
|
93
|
-
|
93
|
+
\\HEADING x
|
94
94
|
|
95
|
-
|
95
|
+
\\SUBHEADING x
|
96
96
|
|
97
97
|
BODY not escaped
|
98
98
|
|
99
|
-
|
99
|
+
\\BODY
|
100
100
|
|
101
101
|
PREAMBLE not escaped
|
102
102
|
|
103
|
-
|
103
|
+
\\PREAMBLE
|
104
104
|
|
105
105
|
PREFACE not escaped
|
106
106
|
|
107
|
-
|
107
|
+
\\PREFACE
|
108
108
|
|
109
|
-
|
109
|
+
\\2. ignored
|
110
110
|
|
111
|
-
|
111
|
+
\\2.1 ignored
|
112
112
|
|
113
|
-
|
113
|
+
\\(2) ignored
|
114
114
|
|
115
|
-
|
115
|
+
\\(a) ignored
|
116
116
|
|
117
|
-
|
117
|
+
\\(2a) ignored
|
118
118
|
|
119
|
-
|
119
|
+
\\{| ignored
|
120
|
+
|
121
|
+
'
|
122
|
+
end
|
123
|
+
|
124
|
+
it 'should escape inlines when unparsing' do
|
125
|
+
doc = xml2doc(section(<<'XML'
|
126
|
+
<num>1.</num>
|
127
|
+
<heading>Section</heading>
|
128
|
+
<paragraph id="section-1.paragraph-0">
|
129
|
+
<content>
|
130
|
+
<p>text \ with a single slash</p>
|
131
|
+
<p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
|
132
|
+
<p>inlines that ** should // be [[ escaped ![ and ]]</p>
|
133
|
+
<p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
|
134
|
+
<p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
|
135
|
+
</content>
|
136
|
+
</paragraph>
|
137
|
+
XML
|
138
|
+
))
|
139
|
+
|
140
|
+
text = subject.text_from_act(doc)
|
141
|
+
# NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
|
142
|
+
# which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
|
143
|
+
text.should == 'BODY
|
144
|
+
|
145
|
+
1. Section
|
146
|
+
|
147
|
+
text \\\\ with a single slash
|
148
|
+
|
149
|
+
some **inlines \/\/ [with \/\/ slashes](#foo)**
|
150
|
+
|
151
|
+
inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
|
152
|
+
|
153
|
+
refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
|
154
|
+
|
155
|
+
super ^^with \^\^^^ and sub _^\_^ with \^_^_
|
120
156
|
|
121
157
|
'
|
122
158
|
end
|
@@ -148,7 +184,7 @@ XML
|
|
148
184
|
|
149
185
|
1. Section
|
150
186
|
|
151
|
-
|
187
|
+
\\(2) A special meeting [[ foo ]]:
|
152
188
|
|
153
189
|
(a) the chairperson so directs; or
|
154
190
|
|
@@ -201,6 +237,27 @@ XML
|
|
201
237
|
|
202
238
|
Hello [there](/za/act/123) friend.
|
203
239
|
|
240
|
+
'
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'should unparse underlines correctly' do
|
244
|
+
doc = xml2doc(section(<<XML
|
245
|
+
<num>1.</num>
|
246
|
+
<paragraph id="section-19.paragraph-0">
|
247
|
+
<content>
|
248
|
+
<p>Hello <u>underlined</u>.</p>
|
249
|
+
</content>
|
250
|
+
</paragraph>
|
251
|
+
XML
|
252
|
+
))
|
253
|
+
|
254
|
+
text = subject.text_from_act(doc)
|
255
|
+
text.should == 'BODY
|
256
|
+
|
257
|
+
1.
|
258
|
+
|
259
|
+
Hello __underlined__.
|
260
|
+
|
204
261
|
'
|
205
262
|
end
|
206
263
|
|
@@ -269,4 +326,13 @@ Subject to approval in terms of this By-Law.
|
|
269
326
|
'
|
270
327
|
end
|
271
328
|
end
|
329
|
+
|
330
|
+
describe 'round trip' do
|
331
|
+
it 'should be idempotent for escapes' do
|
332
|
+
text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
|
333
|
+
act = subject.generate_from_text(text)
|
334
|
+
xml = act.to_xml(encoding: 'utf-8')
|
335
|
+
subject.text_from_act(act).should == text
|
336
|
+
end
|
337
|
+
end
|
272
338
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -117,16 +117,19 @@ EOS
|
|
117
117
|
it 'should handle escaped content' do
|
118
118
|
node = parse :body, <<EOS
|
119
119
|
\\1. ignored
|
120
|
+
foo \\\\bar
|
120
121
|
|
121
|
-
\\CROSSHEADING
|
122
|
+
\\CROSSHEADING cross\\heading
|
122
123
|
|
123
|
-
1.
|
124
|
+
1. Sec\\tion
|
124
125
|
\\Chapter 2 ignored
|
126
|
+
Some text with a \\\\real backslash
|
125
127
|
EOS
|
126
128
|
to_xml(node).should == '<body>
|
127
129
|
<hcontainer eId="hcontainer_1" name="hcontainer">
|
128
130
|
<content>
|
129
131
|
<p>1. ignored</p>
|
132
|
+
<p>foo \\bar</p>
|
130
133
|
<p>CROSSHEADING crossheading</p>
|
131
134
|
</content>
|
132
135
|
</hcontainer>
|
@@ -136,6 +139,7 @@ EOS
|
|
136
139
|
<hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
|
137
140
|
<content>
|
138
141
|
<p>Chapter 2 ignored</p>
|
142
|
+
<p>Some text with a \\real backslash</p>
|
139
143
|
</content>
|
140
144
|
</hcontainer>
|
141
145
|
</section>
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -325,6 +325,17 @@ EOS
|
|
325
325
|
<p>This statement has <remark status="editorial">[<ref href="/foo/bar">a link in</ref> a remark]</remark></p>
|
326
326
|
<p>This statement has <remark status="editorial">[a <ref href="/foo/bar">link in a remark</ref>]</remark></p>
|
327
327
|
</content>
|
328
|
+
</hcontainer>'
|
329
|
+
end
|
330
|
+
|
331
|
+
it 'should handle escapes in links' do
|
332
|
+
node = parse :generic_container, <<EOS
|
333
|
+
Visit the site [https:\\/\\/example.com](https://example.com) for more.
|
334
|
+
EOS
|
335
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
336
|
+
<content>
|
337
|
+
<p>Visit the site <ref href="https://example.com">https://example.com</ref> for more.</p>
|
338
|
+
</content>
|
328
339
|
</hcontainer>'
|
329
340
|
end
|
330
341
|
end
|
@@ -524,4 +535,17 @@ EOS
|
|
524
535
|
end
|
525
536
|
end
|
526
537
|
|
538
|
+
describe 'underline' do
|
539
|
+
it 'should handle underline' do
|
540
|
+
node = parse :generic_container, <<EOS
|
541
|
+
Text __with underline__ and _ under__scores__.
|
542
|
+
EOS
|
543
|
+
to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
|
544
|
+
<content>
|
545
|
+
<p>Text <u>with underline</u> and _ under<u>scores</u>.</p>
|
546
|
+
</content>
|
547
|
+
</hcontainer>'
|
548
|
+
end
|
549
|
+
end
|
550
|
+
|
527
551
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -94,20 +94,6 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0.20'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: mimemagic
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0.2'
|
104
|
-
type: :runtime
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0.2'
|
111
97
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
112
98
|
acts from plain text and PDF documents.
|
113
99
|
email:
|
@@ -117,9 +103,9 @@ executables:
|
|
117
103
|
extensions: []
|
118
104
|
extra_rdoc_files: []
|
119
105
|
files:
|
106
|
+
- ".github/workflows/test.yml"
|
120
107
|
- ".gitignore"
|
121
108
|
- ".rspec"
|
122
|
-
- ".travis.yml"
|
123
109
|
- Gemfile
|
124
110
|
- LICENSE.txt
|
125
111
|
- README.md
|
@@ -158,6 +144,7 @@ files:
|
|
158
144
|
- spec/counters_spec.rb
|
159
145
|
- spec/extract/extractor_spec.rb
|
160
146
|
- spec/fixtures/community-fire-safety.xml
|
147
|
+
- spec/fixtures/roundtrip-escapes.txt
|
161
148
|
- spec/generator_spec.rb
|
162
149
|
- spec/parse/blocklists_spec.rb
|
163
150
|
- spec/parse/builder_spec.rb
|
@@ -173,7 +160,7 @@ homepage: https://github.com/longhotsummer/slaw
|
|
173
160
|
licenses:
|
174
161
|
- MIT
|
175
162
|
metadata: {}
|
176
|
-
post_install_message:
|
163
|
+
post_install_message:
|
177
164
|
rdoc_options: []
|
178
165
|
require_paths:
|
179
166
|
- lib
|
@@ -189,13 +176,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
176
|
version: '0'
|
190
177
|
requirements: []
|
191
178
|
rubygems_version: 3.0.3
|
192
|
-
signing_key:
|
179
|
+
signing_key:
|
193
180
|
specification_version: 4
|
194
181
|
summary: A lightweight library for using Akoma Ntoso acts in Ruby.
|
195
182
|
test_files:
|
196
183
|
- spec/counters_spec.rb
|
197
184
|
- spec/extract/extractor_spec.rb
|
198
185
|
- spec/fixtures/community-fire-safety.xml
|
186
|
+
- spec/fixtures/roundtrip-escapes.txt
|
199
187
|
- spec/generator_spec.rb
|
200
188
|
- spec/parse/blocklists_spec.rb
|
201
189
|
- spec/parse/builder_spec.rb
|