slaw 10.3.1 → 10.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
4
- data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
3
+ metadata.gz: 6e4cdd070ad171d0d999cbaa93da4fb2723df0a2d430679833d3588903049575
4
+ data.tar.gz: 33bffb144c455bb8d35f7c8dc7e3593041eead5d1c76569575afa8df73bd0a8e
5
5
  SHA512:
6
- metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
7
- data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
6
+ metadata.gz: b69cf6e2303be5096d3697ced6fe3bb1538d46a5657c718f8e1c6e8e4edee1b1c24ea4b7a6f199920499412b716cfa21e0fddb72be46928377d02145236831cf
7
+ data.tar.gz: 13744a12c6e8f62d90cd6fd4b553e3d45f4dd7b38ca076c6da2d2dcea7e72a31feb9345f687d7d29fa5a0d9a8874c4a506929f5c1a73838f2b551df8cc063806
@@ -0,0 +1,22 @@
1
+ name: Test
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+
9
+ strategy:
10
+ matrix:
11
+ ruby-version: [2.7, 2.6]
12
+
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ - name: Set up Ruby ${{ matrix.ruby-version }}
16
+ uses: ruby/setup-ruby@v1
17
+ with:
18
+ ruby-version: ${{ matrix.ruby-version }}
19
+ - name: Install dependencies
20
+ run: bundle install
21
+ - name: Run tests
22
+ run: bundle exec rake
data/README.md CHANGED
@@ -86,6 +86,26 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.7.0 (11 June 2021)
90
+
91
+ * Support underlines with `__text__`
92
+
93
+ ### 10.6.0 (10 May 2021)
94
+
95
+ * Handle sup and sub when extracting from HTML.
96
+
97
+ ### 10.5.0 (20 April 2021)
98
+
99
+ * Handle escaping inlines when unparsing.
100
+
101
+ ### 10.4.1 (14 April 2021)
102
+
103
+ * Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
104
+
105
+ ### 10.4.0 (9 April 2021)
106
+
107
+ * Remove dependency on mimemagic. Guess file type based on filename instead.
108
+
89
109
  ### 10.3.1 (11 January 2021)
90
110
 
91
111
  * Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
@@ -1,5 +1,3 @@
1
- require 'mimemagic'
2
-
3
1
  module Slaw
4
2
  module Extract
5
3
 
@@ -13,15 +11,10 @@ module Slaw
13
11
  #
14
12
  # @return [String] extracted text
15
13
  def extract_from_file(filename)
16
- mimetype = get_mimetype(filename)
17
-
18
- case mimetype && mimetype.type
19
- when 'text/html'
14
+ if filename.end_with? '.html' or filename.end_with? '.htm'
20
15
  extract_from_html(filename)
21
- when 'text/plain', nil
22
- extract_from_text(filename)
23
16
  else
24
- raise ArgumentError.new("Unsupported file type #{mimetype || 'unknown'}")
17
+ extract_from_text(filename)
25
18
  end
26
19
  end
27
20
 
@@ -11,9 +11,10 @@
11
11
 
12
12
  <xsl:template match="head|style|script|link" />
13
13
 
14
- <xsl:template match="ul|ol">
14
+ <!-- block containers that end with newlines -->
15
+ <xsl:template match="ul|ol|section|article|h1|h2|h3|h4|h5">
15
16
  <xsl:apply-templates />
16
- <xsl:text>&#10;</xsl:text>
17
+ <xsl:text>&#10;&#10;</xsl:text>
17
18
  </xsl:template>
18
19
 
19
20
  <xsl:template match="ul/li">
@@ -23,20 +24,23 @@
23
24
  <xsl:text>&#10;</xsl:text>
24
25
  </xsl:template>
25
26
 
27
+ <!-- numbered lists should include a number -->
26
28
  <xsl:template match="ol/li">
27
- <!-- 1. foo -->
29
+ <!-- \1. foo -->
28
30
  <xsl:text>\</xsl:text>
29
- <xsl:value-of select="position()" />
31
+ <xsl:choose>
32
+ <xsl:when test="@value">
33
+ <xsl:value-of select="@value" />
34
+ </xsl:when>
35
+ <xsl:otherwise>
36
+ <xsl:value-of select="position()" />
37
+ </xsl:otherwise>
38
+ </xsl:choose>
30
39
  <xsl:text>. </xsl:text>
31
40
  <xsl:apply-templates />
32
41
  <xsl:text>&#10;</xsl:text>
33
42
  </xsl:template>
34
43
 
35
- <xsl:template match="h1|h2|h3|h4|h5">
36
- <xsl:apply-templates />
37
- <xsl:text>&#10;&#10;</xsl:text>
38
- </xsl:template>
39
-
40
44
  <xsl:template match="p|div">
41
45
  <xsl:choose>
42
46
  <xsl:when test="starts-with(., '[') and substring(., string-length(.)) = ']'">
@@ -51,32 +55,27 @@
51
55
  <xsl:text>&#10;&#10;</xsl:text>
52
56
  </xsl:template>
53
57
 
58
+ <!-- START tables -->
59
+
54
60
  <xsl:template match="table">
55
61
  <xsl:text>{| </xsl:text>
56
- <xsl:text>
57
- |-</xsl:text>
62
+ <xsl:text>&#10;|-</xsl:text>
58
63
  <xsl:apply-templates />
59
- <xsl:text>
60
- |}
61
-
62
- </xsl:text>
64
+ <xsl:text>&#10;|}&#10;&#10;</xsl:text>
63
65
  </xsl:template>
64
66
 
65
67
  <xsl:template match="tr">
66
68
  <xsl:apply-templates />
67
- <xsl:text>
68
- |-</xsl:text>
69
+ <xsl:text>&#10;|-</xsl:text>
69
70
  </xsl:template>
70
71
 
71
72
  <xsl:template match="th|td">
72
73
  <xsl:choose>
73
74
  <xsl:when test="local-name(.) = 'th'">
74
- <xsl:text>
75
- ! </xsl:text>
75
+ <xsl:text>&#10;! </xsl:text>
76
76
  </xsl:when>
77
77
  <xsl:when test="local-name(.) = 'td'">
78
- <xsl:text>
79
- | </xsl:text>
78
+ <xsl:text>&#10;| </xsl:text>
80
79
  </xsl:when>
81
80
  </xsl:choose>
82
81
 
@@ -118,8 +117,15 @@
118
117
  </xsl:template>
119
118
 
120
119
  <xsl:template match="br">
121
- <xsl:text>
122
- </xsl:text>
120
+ <xsl:text>&#10;</xsl:text>
121
+ </xsl:template>
122
+
123
+ <xsl:template match="sup">
124
+ <xsl:text>^^</xsl:text><xsl:apply-templates /><xsl:text>^^</xsl:text>
125
+ </xsl:template>
126
+
127
+ <xsl:template match="sub">
128
+ <xsl:text>_^</xsl:text><xsl:apply-templates /><xsl:text>^_</xsl:text>
123
129
  </xsl:template>
124
130
 
125
131
 
@@ -20,7 +20,7 @@ module Slaw
20
20
  end
21
21
 
22
22
  rule inline_item
23
- remark / image / ref / bold / italics / superscript / subscript / [^\n]
23
+ remark / image / ref / bold / italics / superscript / subscript / underline / '\\'? [^\n]
24
24
  <InlineItem>
25
25
  end
26
26
 
@@ -69,6 +69,12 @@ module Slaw
69
69
  <Subscript>
70
70
  end
71
71
 
72
+ rule underline
73
+ # __foo__
74
+ '__' content:(!'__' inline_item)+ '__'
75
+ <Underline>
76
+ end
77
+
72
78
  end
73
79
  end
74
80
  end
@@ -37,7 +37,12 @@ module Slaw
37
37
 
38
38
  class InlineItem < Treetop::Runtime::SyntaxNode
39
39
  def to_xml(b, idprefix)
40
- b.text(text_value)
40
+ if text_value.start_with? '\\'
41
+ # handle escaped characters: \a -> a
42
+ b.text(text_value[1..])
43
+ else
44
+ b.text(text_value)
45
+ end
41
46
  end
42
47
  end
43
48
 
@@ -91,6 +96,15 @@ module Slaw
91
96
  end
92
97
  end
93
98
 
99
+ class Underline < Treetop::Runtime::SyntaxNode
100
+ def to_xml(b, idprefix)
101
+ b.u { |b|
102
+ for e in content.elements
103
+ e.inline_item.to_xml(b, idprefix)
104
+ end
105
+ }
106
+ end
107
+ end
94
108
  end
95
109
  end
96
110
  end
@@ -9,31 +9,147 @@
9
9
  <xsl:strip-space elements="*"/>
10
10
  <xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
11
11
 
12
+ <!-- replaces "value" in "text" with "replacement" -->
13
+ <xsl:template name="string-replace-all">
14
+ <xsl:param name="text" />
15
+ <xsl:param name="value" />
16
+ <xsl:param name="replacement" />
17
+
18
+ <xsl:choose>
19
+ <xsl:when test="$text = '' or $value = '' or not($value)">
20
+ <xsl:value-of select="$text" />
21
+ </xsl:when>
22
+ <xsl:when test="contains($text, $value)">
23
+ <xsl:value-of select="substring-before($text, $value)"/>
24
+ <xsl:value-of select="$replacement" />
25
+ <xsl:call-template name="string-replace-all">
26
+ <xsl:with-param name="text" select="substring-after($text, $value)" />
27
+ <xsl:with-param name="value" select="$value" />
28
+ <xsl:with-param name="replacement" select="$replacement" />
29
+ </xsl:call-template>
30
+ </xsl:when>
31
+ <xsl:otherwise>
32
+ <xsl:value-of select="$text" />
33
+ </xsl:otherwise>
34
+ </xsl:choose>
35
+ </xsl:template>
36
+
37
+ <!-- Escape inline markers with a backslash -->
38
+ <xsl:template name="escape-inlines">
39
+ <xsl:param name="text" />
40
+
41
+ <!-- This works from the inside out, first escaping backslash chars themselves, then escaping
42
+ the different types of inline markers -->
43
+ <xsl:call-template name="string-replace-all">
44
+ <xsl:with-param name="text">
45
+ <xsl:call-template name="string-replace-all">
46
+ <xsl:with-param name="text">
47
+ <xsl:call-template name="string-replace-all">
48
+ <xsl:with-param name="text">
49
+ <xsl:call-template name="string-replace-all">
50
+ <xsl:with-param name="text">
51
+ <xsl:call-template name="string-replace-all">
52
+ <xsl:with-param name="text">
53
+ <xsl:call-template name="string-replace-all">
54
+ <xsl:with-param name="text">
55
+ <xsl:call-template name="string-replace-all">
56
+ <xsl:with-param name="text">
57
+ <xsl:call-template name="string-replace-all">
58
+ <xsl:with-param name="text">
59
+ <xsl:call-template name="string-replace-all">
60
+ <xsl:with-param name="text">
61
+ <xsl:call-template name="string-replace-all">
62
+ <xsl:with-param name="text">
63
+ <xsl:call-template name="string-replace-all">
64
+ <xsl:with-param name="text" select="$text" />
65
+ <xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
66
+ <xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
67
+ </xsl:call-template>
68
+ </xsl:with-param>
69
+ <xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
70
+ <xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
71
+ </xsl:call-template>
72
+ </xsl:with-param>
73
+ <xsl:with-param name="value"><xsl:value-of select="'__'" /></xsl:with-param>
74
+ <xsl:with-param name="replacement"><xsl:value-of select="'\_\_'" /></xsl:with-param>
75
+ </xsl:call-template>
76
+ </xsl:with-param>
77
+ <xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
78
+ <xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
79
+ </xsl:call-template>
80
+ </xsl:with-param>
81
+ <xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
82
+ <xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
83
+ </xsl:call-template>
84
+ </xsl:with-param>
85
+ <xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
86
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
87
+ </xsl:call-template>
88
+ </xsl:with-param>
89
+ <xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
90
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
91
+ </xsl:call-template>
92
+ </xsl:with-param>
93
+ <xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
94
+ <xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
95
+ </xsl:call-template>
96
+ </xsl:with-param>
97
+ <xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
98
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
99
+ </xsl:call-template>
100
+ </xsl:with-param>
101
+ <xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
102
+ <xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
103
+ </xsl:call-template>
104
+ </xsl:with-param>
105
+ <xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
106
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
107
+ </xsl:call-template>
108
+ </xsl:template>
109
+
12
110
  <!-- adds a backslash to the start of the value param, if necessary -->
13
- <xsl:template name="escape">
111
+ <xsl:template name="escape-prefixes">
14
112
  <xsl:param name="value"/>
15
113
 
16
114
  <xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
17
115
  <!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
18
116
  <xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
19
117
 
20
- <!-- p tags must escape initial content that looks like a block element marker -->
21
- <xsl:if test="$prefix = 'BODY' or
22
- $prefix = 'PREAMBLE' or
23
- $prefix = 'PREFACE' or
24
- starts-with($prefix, 'CHAPTER ') or
25
- starts-with($prefix, 'PART ') or
26
- starts-with($prefix, 'SUBPART ') or
27
- starts-with($prefix, 'SCHEDULE ') or
28
- starts-with($prefix, 'HEADING ') or
29
- starts-with($prefix, 'SUBHEADING ') or
30
- starts-with($prefix, 'LONGTITLE ') or
31
- starts-with($prefix, 'CROSSHEADING ') or
32
- starts-with($prefix, '{|') or
33
- starts-with($numprefix, '(')">
34
- <xsl:text>\</xsl:text>
35
- </xsl:if>
36
- <xsl:value-of select="$value"/>
118
+ <xsl:variable name="slash">
119
+ <!-- p tags must escape initial content that looks like a block element marker -->
120
+ <xsl:if test="$prefix = 'BODY' or
121
+ $prefix = 'PREAMBLE' or
122
+ $prefix = 'PREFACE' or
123
+ starts-with($prefix, 'CHAPTER ') or
124
+ starts-with($prefix, 'PART ') or
125
+ starts-with($prefix, 'SUBPART ') or
126
+ starts-with($prefix, 'SCHEDULE ') or
127
+ starts-with($prefix, 'HEADING ') or
128
+ starts-with($prefix, 'SUBHEADING ') or
129
+ starts-with($prefix, 'LONGTITLE ') or
130
+ starts-with($prefix, 'CROSSHEADING ') or
131
+ starts-with($prefix, '{|') or
132
+ starts-with($numprefix, '(')">
133
+ <xsl:value-of select="'\'" />
134
+ </xsl:if>
135
+ </xsl:variable>
136
+
137
+ <xsl:value-of select="concat($slash, $value)" />
138
+ </xsl:template>
139
+
140
+ <!-- adds a backslash to the start of the text param, if necessary -->
141
+ <xsl:template name="escape">
142
+ <xsl:param name="value"/>
143
+
144
+ <xsl:variable name="escaped">
145
+ <xsl:call-template name="escape-inlines">
146
+ <xsl:with-param name="text" select="$value" />
147
+ </xsl:call-template>
148
+ </xsl:variable>
149
+
150
+ <xsl:call-template name="escape-prefixes">
151
+ <xsl:with-param name="value" select="$escaped" />
152
+ </xsl:call-template>
37
153
  </xsl:template>
38
154
 
39
155
  <xsl:template match="a:act">
@@ -157,12 +273,19 @@
157
273
  </xsl:template>
158
274
 
159
275
  <!-- first text nodes of these elems must be escaped if they have special chars -->
160
- <xsl:template match="a:p[not(ancestor::a:table)]/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
276
+ <xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
161
277
  <xsl:call-template name="escape">
162
278
  <xsl:with-param name="value" select="." />
163
279
  </xsl:call-template>
164
280
  </xsl:template>
165
281
 
282
+ <!-- escape inlines in text nodes -->
283
+ <xsl:template match="text()">
284
+ <xsl:call-template name="escape-inlines">
285
+ <xsl:with-param name="text" select="." />
286
+ </xsl:call-template>
287
+ </xsl:template>
288
+
166
289
 
167
290
  <!-- attachments/schedules -->
168
291
  <xsl:template match="a:attachment">
@@ -176,7 +299,7 @@
176
299
  <xsl:text>&#10;</xsl:text>
177
300
  </xsl:if>
178
301
 
179
- <xsl:text>&#10;&#10;</xsl:text>
302
+ <xsl:text>&#10;</xsl:text>
180
303
  <xsl:apply-templates select="a:doc/a:mainBody" />
181
304
  </xsl:template>
182
305
 
@@ -192,31 +315,24 @@
192
315
  <xsl:value-of select="." />
193
316
  <xsl:text>" </xsl:text>
194
317
  </xsl:for-each>
195
- <xsl:text>
196
- |-</xsl:text>
318
+ <xsl:text>&#10;|-</xsl:text>
197
319
 
198
320
  <xsl:apply-templates />
199
- <xsl:text>
200
- |}
201
-
202
- </xsl:text>
321
+ <xsl:text>&#10;|}&#10;&#10;</xsl:text>
203
322
  </xsl:template>
204
323
 
205
324
  <xsl:template match="a:tr">
206
325
  <xsl:apply-templates />
207
- <xsl:text>
208
- |-</xsl:text>
326
+ <xsl:text>&#10;|-</xsl:text>
209
327
  </xsl:template>
210
328
 
211
329
  <xsl:template match="a:th|a:td">
212
330
  <xsl:choose>
213
331
  <xsl:when test="local-name(.) = 'th'">
214
- <xsl:text>
215
- ! </xsl:text>
332
+ <xsl:text>&#10;! </xsl:text>
216
333
  </xsl:when>
217
334
  <xsl:when test="local-name(.) = 'td'">
218
- <xsl:text>
219
- | </xsl:text>
335
+ <xsl:text>&#10;| </xsl:text>
220
336
  </xsl:when>
221
337
  </xsl:choose>
222
338
 
@@ -287,6 +403,12 @@
287
403
  <xsl:text>^_</xsl:text>
288
404
  </xsl:template>
289
405
 
406
+ <xsl:template match="a:u">
407
+ <xsl:text>__</xsl:text>
408
+ <xsl:apply-templates />
409
+ <xsl:text>__</xsl:text>
410
+ </xsl:template>
411
+
290
412
  <xsl:template match="a:eol">
291
413
  <xsl:text>&#10;</xsl:text>
292
414
  </xsl:template>
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "10.3.1"
2
+ VERSION = "10.7.0"
3
3
  end
data/slaw.gemspec CHANGED
@@ -25,5 +25,4 @@ Gem::Specification.new do |spec|
25
25
  spec.add_runtime_dependency "treetop", "~> 1.5"
26
26
  spec.add_runtime_dependency "log4r", "~> 1.1"
27
27
  spec.add_runtime_dependency "thor", "~> 0.20"
28
- spec.add_runtime_dependency "mimemagic", "~> 0.2"
29
28
  end
@@ -0,0 +1,24 @@
1
+ BODY
2
+
3
+ 1. Section that tests escapes
4
+
5
+ text \\ with a single slash
6
+
7
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
8
+
9
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
10
+
11
+ refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
12
+
13
+ nested ** stars \*\* in bold \*\***
14
+
15
+ nested // slashes \/\/ in italics \/\///
16
+
17
+ nested ** stars in // italics \*\* // and bold **
18
+
19
+ super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
20
+
21
+ underlines __underline with _ underscores__ and \_\_escaped underlines \_\_
22
+
23
+ mixed __underline **and \_\_ bold**__
24
+
@@ -78,45 +78,81 @@ XML
78
78
 
79
79
  1. Section
80
80
 
81
- \Chapter 2 ignored
81
+ \\Chapter 2 ignored
82
82
 
83
83
  Chapters
84
84
 
85
- \Part 2 ignored
85
+ \\Part 2 ignored
86
86
 
87
87
  participation
88
88
 
89
- \Schedule 2 ignored
89
+ \\Schedule 2 ignored
90
90
 
91
91
  Schedules
92
92
 
93
- \HEADING x
93
+ \\HEADING x
94
94
 
95
- \SUBHEADING x
95
+ \\SUBHEADING x
96
96
 
97
97
  BODY not escaped
98
98
 
99
- \BODY
99
+ \\BODY
100
100
 
101
101
  PREAMBLE not escaped
102
102
 
103
- \PREAMBLE
103
+ \\PREAMBLE
104
104
 
105
105
  PREFACE not escaped
106
106
 
107
- \PREFACE
107
+ \\PREFACE
108
108
 
109
- \2. ignored
109
+ \\2. ignored
110
110
 
111
- \2.1 ignored
111
+ \\2.1 ignored
112
112
 
113
- \(2) ignored
113
+ \\(2) ignored
114
114
 
115
- \(a) ignored
115
+ \\(a) ignored
116
116
 
117
- \(2a) ignored
117
+ \\(2a) ignored
118
118
 
119
- \{| ignored
119
+ \\{| ignored
120
+
121
+ '
122
+ end
123
+
124
+ it 'should escape inlines when unparsing' do
125
+ doc = xml2doc(section(<<'XML'
126
+ <num>1.</num>
127
+ <heading>Section</heading>
128
+ <paragraph id="section-1.paragraph-0">
129
+ <content>
130
+ <p>text \ with a single slash</p>
131
+ <p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
132
+ <p>inlines that ** should // be [[ escaped ![ and ]]</p>
133
+ <p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
134
+ <p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
135
+ </content>
136
+ </paragraph>
137
+ XML
138
+ ))
139
+
140
+ text = subject.text_from_act(doc)
141
+ # NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
142
+ # which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
143
+ text.should == 'BODY
144
+
145
+ 1. Section
146
+
147
+ text \\\\ with a single slash
148
+
149
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
150
+
151
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
152
+
153
+ refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
154
+
155
+ super ^^with \^\^^^ and sub _^\_^ with \^_^_
120
156
 
121
157
  '
122
158
  end
@@ -148,7 +184,7 @@ XML
148
184
 
149
185
  1. Section
150
186
 
151
- \(2) A special meeting [[ foo ]]:
187
+ \\(2) A special meeting [[ foo ]]:
152
188
 
153
189
  (a) the chairperson so directs; or
154
190
 
@@ -201,6 +237,27 @@ XML
201
237
 
202
238
  Hello [there](/za/act/123) friend.
203
239
 
240
+ '
241
+ end
242
+
243
+ it 'should unparse underlines correctly' do
244
+ doc = xml2doc(section(<<XML
245
+ <num>1.</num>
246
+ <paragraph id="section-19.paragraph-0">
247
+ <content>
248
+ <p>Hello <u>underlined</u>.</p>
249
+ </content>
250
+ </paragraph>
251
+ XML
252
+ ))
253
+
254
+ text = subject.text_from_act(doc)
255
+ text.should == 'BODY
256
+
257
+ 1.
258
+
259
+ Hello __underlined__.
260
+
204
261
  '
205
262
  end
206
263
 
@@ -269,4 +326,13 @@ Subject to approval in terms of this By-Law.
269
326
  '
270
327
  end
271
328
  end
329
+
330
+ describe 'round trip' do
331
+ it 'should be idempotent for escapes' do
332
+ text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
333
+ act = subject.generate_from_text(text)
334
+ xml = act.to_xml(encoding: 'utf-8')
335
+ subject.text_from_act(act).should == text
336
+ end
337
+ end
272
338
  end
@@ -117,16 +117,19 @@ EOS
117
117
  it 'should handle escaped content' do
118
118
  node = parse :body, <<EOS
119
119
  \\1. ignored
120
+ foo \\\\bar
120
121
 
121
- \\CROSSHEADING crossheading
122
+ \\CROSSHEADING cross\\heading
122
123
 
123
- 1. Section
124
+ 1. Sec\\tion
124
125
  \\Chapter 2 ignored
126
+ Some text with a \\\\real backslash
125
127
  EOS
126
128
  to_xml(node).should == '<body>
127
129
  <hcontainer eId="hcontainer_1" name="hcontainer">
128
130
  <content>
129
131
  <p>1. ignored</p>
132
+ <p>foo \\bar</p>
130
133
  <p>CROSSHEADING crossheading</p>
131
134
  </content>
132
135
  </hcontainer>
@@ -136,6 +139,7 @@ EOS
136
139
  <hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
137
140
  <content>
138
141
  <p>Chapter 2 ignored</p>
142
+ <p>Some text with a \\real backslash</p>
139
143
  </content>
140
144
  </hcontainer>
141
145
  </section>
@@ -325,6 +325,17 @@ EOS
325
325
  <p>This statement has <remark status="editorial">[<ref href="/foo/bar">a link in</ref> a remark]</remark></p>
326
326
  <p>This statement has <remark status="editorial">[a <ref href="/foo/bar">link in a remark</ref>]</remark></p>
327
327
  </content>
328
+ </hcontainer>'
329
+ end
330
+
331
+ it 'should handle escapes in links' do
332
+ node = parse :generic_container, <<EOS
333
+ Visit the site [https:\\/\\/example.com](https://example.com) for more.
334
+ EOS
335
+ to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
336
+ <content>
337
+ <p>Visit the site <ref href="https://example.com">https://example.com</ref> for more.</p>
338
+ </content>
328
339
  </hcontainer>'
329
340
  end
330
341
  end
@@ -524,4 +535,17 @@ EOS
524
535
  end
525
536
  end
526
537
 
538
+ describe 'underline' do
539
+ it 'should handle underline' do
540
+ node = parse :generic_container, <<EOS
541
+ Text __with underline__ and _ under__scores__.
542
+ EOS
543
+ to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
544
+ <content>
545
+ <p>Text <u>with underline</u> and _ under<u>scores</u>.</p>
546
+ </content>
547
+ </hcontainer>'
548
+ end
549
+ end
550
+
527
551
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.3.1
4
+ version: 10.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-11 00:00:00.000000000 Z
11
+ date: 2021-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0.20'
97
- - !ruby/object:Gem::Dependency
98
- name: mimemagic
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '0.2'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '0.2'
111
97
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
112
98
  acts from plain text and PDF documents.
113
99
  email:
@@ -117,9 +103,9 @@ executables:
117
103
  extensions: []
118
104
  extra_rdoc_files: []
119
105
  files:
106
+ - ".github/workflows/test.yml"
120
107
  - ".gitignore"
121
108
  - ".rspec"
122
- - ".travis.yml"
123
109
  - Gemfile
124
110
  - LICENSE.txt
125
111
  - README.md
@@ -158,6 +144,7 @@ files:
158
144
  - spec/counters_spec.rb
159
145
  - spec/extract/extractor_spec.rb
160
146
  - spec/fixtures/community-fire-safety.xml
147
+ - spec/fixtures/roundtrip-escapes.txt
161
148
  - spec/generator_spec.rb
162
149
  - spec/parse/blocklists_spec.rb
163
150
  - spec/parse/builder_spec.rb
@@ -173,7 +160,7 @@ homepage: https://github.com/longhotsummer/slaw
173
160
  licenses:
174
161
  - MIT
175
162
  metadata: {}
176
- post_install_message:
163
+ post_install_message:
177
164
  rdoc_options: []
178
165
  require_paths:
179
166
  - lib
@@ -189,13 +176,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
176
  version: '0'
190
177
  requirements: []
191
178
  rubygems_version: 3.0.3
192
- signing_key:
179
+ signing_key:
193
180
  specification_version: 4
194
181
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.
195
182
  test_files:
196
183
  - spec/counters_spec.rb
197
184
  - spec/extract/extractor_spec.rb
198
185
  - spec/fixtures/community-fire-safety.xml
186
+ - spec/fixtures/roundtrip-escapes.txt
199
187
  - spec/generator_spec.rb
200
188
  - spec/parse/blocklists_spec.rb
201
189
  - spec/parse/builder_spec.rb
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.7.0
4
- - 2.6.2
5
- - 2.5.4
6
- before_install:
7
- - gem update bundler