slaw 10.3.1 → 10.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 85950871314a64110b980b50860cae66fce1bc9536374fca99204506150da04b
4
- data.tar.gz: 8dc252e644f14502f652b0daa4e18bcaadaf9d310be23b9a8f7cbffe3b85123f
3
+ metadata.gz: 6e4cdd070ad171d0d999cbaa93da4fb2723df0a2d430679833d3588903049575
4
+ data.tar.gz: 33bffb144c455bb8d35f7c8dc7e3593041eead5d1c76569575afa8df73bd0a8e
5
5
  SHA512:
6
- metadata.gz: d56cf86ff1e6502b82d6d79e76a4630e9259babcf57aaee54cc5ef2435b59a1a8ec5f6a60d58b81992cac392352f6db1acdce11aac18fc8ec461f2f8e1a92ef4
7
- data.tar.gz: 19f716a0069a1d157a6f2407b1f2d9ef4610447b31b5c7113a4b7ec090e19b6e6981573c4f37baa08f2e60e8ad5864122827a3e0454b29df68f958a81500f8b9
6
+ metadata.gz: b69cf6e2303be5096d3697ced6fe3bb1538d46a5657c718f8e1c6e8e4edee1b1c24ea4b7a6f199920499412b716cfa21e0fddb72be46928377d02145236831cf
7
+ data.tar.gz: 13744a12c6e8f62d90cd6fd4b553e3d45f4dd7b38ca076c6da2d2dcea7e72a31feb9345f687d7d29fa5a0d9a8874c4a506929f5c1a73838f2b551df8cc063806
@@ -0,0 +1,22 @@
1
+ name: Test
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+
9
+ strategy:
10
+ matrix:
11
+ ruby-version: [2.7, 2.6]
12
+
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ - name: Set up Ruby ${{ matrix.ruby-version }}
16
+ uses: ruby/setup-ruby@v1
17
+ with:
18
+ ruby-version: ${{ matrix.ruby-version }}
19
+ - name: Install dependencies
20
+ run: bundle install
21
+ - name: Run tests
22
+ run: bundle exec rake
data/README.md CHANGED
@@ -86,6 +86,26 @@ You can create your own grammar by creating a gem that provides these files and
86
86
 
87
87
  ## Changelog
88
88
 
89
+ ### 10.7.0 (11 June 2021)
90
+
91
+ * Support underlines with `__text__`
92
+
93
+ ### 10.6.0 (10 May 2021)
94
+
95
+ * Handle sup and sub when extracting from HTML.
96
+
97
+ ### 10.5.0 (20 April 2021)
98
+
99
+ * Handle escaping inlines when unparsing.
100
+
101
+ ### 10.4.1 (14 April 2021)
102
+
103
+ * Handle escaping in inlines, so that forward slashes in link text are unescaped correctly, eg `[https:\/\/example.com](https://example.com)`
104
+
105
+ ### 10.4.0 (9 April 2021)
106
+
107
+ * Remove dependency on mimemagic. Guess file type based on filename instead.
108
+
89
109
  ### 10.3.1 (11 January 2021)
90
110
 
91
111
  * Strip ascii, unicode general and unicode supplemental punctuation from num elements when building eIds
@@ -1,5 +1,3 @@
1
- require 'mimemagic'
2
-
3
1
  module Slaw
4
2
  module Extract
5
3
 
@@ -13,15 +11,10 @@ module Slaw
13
11
  #
14
12
  # @return [String] extracted text
15
13
  def extract_from_file(filename)
16
- mimetype = get_mimetype(filename)
17
-
18
- case mimetype && mimetype.type
19
- when 'text/html'
14
+ if filename.end_with? '.html' or filename.end_with? '.htm'
20
15
  extract_from_html(filename)
21
- when 'text/plain', nil
22
- extract_from_text(filename)
23
16
  else
24
- raise ArgumentError.new("Unsupported file type #{mimetype || 'unknown'}")
17
+ extract_from_text(filename)
25
18
  end
26
19
  end
27
20
 
@@ -11,9 +11,10 @@
11
11
 
12
12
  <xsl:template match="head|style|script|link" />
13
13
 
14
- <xsl:template match="ul|ol">
14
+ <!-- block containers that end with newlines -->
15
+ <xsl:template match="ul|ol|section|article|h1|h2|h3|h4|h5">
15
16
  <xsl:apply-templates />
16
- <xsl:text>&#10;</xsl:text>
17
+ <xsl:text>&#10;&#10;</xsl:text>
17
18
  </xsl:template>
18
19
 
19
20
  <xsl:template match="ul/li">
@@ -23,20 +24,23 @@
23
24
  <xsl:text>&#10;</xsl:text>
24
25
  </xsl:template>
25
26
 
27
+ <!-- numbered lists should include a number -->
26
28
  <xsl:template match="ol/li">
27
- <!-- 1. foo -->
29
+ <!-- \1. foo -->
28
30
  <xsl:text>\</xsl:text>
29
- <xsl:value-of select="position()" />
31
+ <xsl:choose>
32
+ <xsl:when test="@value">
33
+ <xsl:value-of select="@value" />
34
+ </xsl:when>
35
+ <xsl:otherwise>
36
+ <xsl:value-of select="position()" />
37
+ </xsl:otherwise>
38
+ </xsl:choose>
30
39
  <xsl:text>. </xsl:text>
31
40
  <xsl:apply-templates />
32
41
  <xsl:text>&#10;</xsl:text>
33
42
  </xsl:template>
34
43
 
35
- <xsl:template match="h1|h2|h3|h4|h5">
36
- <xsl:apply-templates />
37
- <xsl:text>&#10;&#10;</xsl:text>
38
- </xsl:template>
39
-
40
44
  <xsl:template match="p|div">
41
45
  <xsl:choose>
42
46
  <xsl:when test="starts-with(., '[') and substring(., string-length(.)) = ']'">
@@ -51,32 +55,27 @@
51
55
  <xsl:text>&#10;&#10;</xsl:text>
52
56
  </xsl:template>
53
57
 
58
+ <!-- START tables -->
59
+
54
60
  <xsl:template match="table">
55
61
  <xsl:text>{| </xsl:text>
56
- <xsl:text>
57
- |-</xsl:text>
62
+ <xsl:text>&#10;|-</xsl:text>
58
63
  <xsl:apply-templates />
59
- <xsl:text>
60
- |}
61
-
62
- </xsl:text>
64
+ <xsl:text>&#10;|}&#10;&#10;</xsl:text>
63
65
  </xsl:template>
64
66
 
65
67
  <xsl:template match="tr">
66
68
  <xsl:apply-templates />
67
- <xsl:text>
68
- |-</xsl:text>
69
+ <xsl:text>&#10;|-</xsl:text>
69
70
  </xsl:template>
70
71
 
71
72
  <xsl:template match="th|td">
72
73
  <xsl:choose>
73
74
  <xsl:when test="local-name(.) = 'th'">
74
- <xsl:text>
75
- ! </xsl:text>
75
+ <xsl:text>&#10;! </xsl:text>
76
76
  </xsl:when>
77
77
  <xsl:when test="local-name(.) = 'td'">
78
- <xsl:text>
79
- | </xsl:text>
78
+ <xsl:text>&#10;| </xsl:text>
80
79
  </xsl:when>
81
80
  </xsl:choose>
82
81
 
@@ -118,8 +117,15 @@
118
117
  </xsl:template>
119
118
 
120
119
  <xsl:template match="br">
121
- <xsl:text>
122
- </xsl:text>
120
+ <xsl:text>&#10;</xsl:text>
121
+ </xsl:template>
122
+
123
+ <xsl:template match="sup">
124
+ <xsl:text>^^</xsl:text><xsl:apply-templates /><xsl:text>^^</xsl:text>
125
+ </xsl:template>
126
+
127
+ <xsl:template match="sub">
128
+ <xsl:text>_^</xsl:text><xsl:apply-templates /><xsl:text>^_</xsl:text>
123
129
  </xsl:template>
124
130
 
125
131
 
@@ -20,7 +20,7 @@ module Slaw
20
20
  end
21
21
 
22
22
  rule inline_item
23
- remark / image / ref / bold / italics / superscript / subscript / [^\n]
23
+ remark / image / ref / bold / italics / superscript / subscript / underline / '\\'? [^\n]
24
24
  <InlineItem>
25
25
  end
26
26
 
@@ -69,6 +69,12 @@ module Slaw
69
69
  <Subscript>
70
70
  end
71
71
 
72
+ rule underline
73
+ # __foo__
74
+ '__' content:(!'__' inline_item)+ '__'
75
+ <Underline>
76
+ end
77
+
72
78
  end
73
79
  end
74
80
  end
@@ -37,7 +37,12 @@ module Slaw
37
37
 
38
38
  class InlineItem < Treetop::Runtime::SyntaxNode
39
39
  def to_xml(b, idprefix)
40
- b.text(text_value)
40
+ if text_value.start_with? '\\'
41
+ # handle escaped characters: \a -> a
42
+ b.text(text_value[1..])
43
+ else
44
+ b.text(text_value)
45
+ end
41
46
  end
42
47
  end
43
48
 
@@ -91,6 +96,15 @@ module Slaw
91
96
  end
92
97
  end
93
98
 
99
+ class Underline < Treetop::Runtime::SyntaxNode
100
+ def to_xml(b, idprefix)
101
+ b.u { |b|
102
+ for e in content.elements
103
+ e.inline_item.to_xml(b, idprefix)
104
+ end
105
+ }
106
+ end
107
+ end
94
108
  end
95
109
  end
96
110
  end
@@ -9,31 +9,147 @@
9
9
  <xsl:strip-space elements="*"/>
10
10
  <xsl:preserve-space elements="a:a a:affectedDocument a:b a:block a:caption a:change a:concept a:courtType a:date a:def a:del a:docCommittee a:docDate a:docIntroducer a:docJurisdiction a:docNumber a:docProponent a:docPurpose a:docStage a:docStatus a:docTitle a:docType a:docketNumber a:entity a:event a:extractText a:fillIn a:from a:heading a:i a:inline a:ins a:judge a:lawyer a:legislature a:li a:listConclusion a:listIntroduction a:location a:mmod a:mod a:mref a:narrative a:neutralCitation a:num a:object a:omissis a:opinion a:organization a:outcome a:p a:party a:person a:placeholder a:process a:quantity a:quotedText a:recordedTime a:ref a:relatedDocument a:remark a:rmod a:role a:rref a:scene a:session a:shortTitle a:signature a:span a:sub a:subheading a:summary a:sup a:term a:tocItem a:u a:vote"/>
11
11
 
12
+ <!-- replaces "value" in "text" with "replacement" -->
13
+ <xsl:template name="string-replace-all">
14
+ <xsl:param name="text" />
15
+ <xsl:param name="value" />
16
+ <xsl:param name="replacement" />
17
+
18
+ <xsl:choose>
19
+ <xsl:when test="$text = '' or $value = '' or not($value)">
20
+ <xsl:value-of select="$text" />
21
+ </xsl:when>
22
+ <xsl:when test="contains($text, $value)">
23
+ <xsl:value-of select="substring-before($text, $value)"/>
24
+ <xsl:value-of select="$replacement" />
25
+ <xsl:call-template name="string-replace-all">
26
+ <xsl:with-param name="text" select="substring-after($text, $value)" />
27
+ <xsl:with-param name="value" select="$value" />
28
+ <xsl:with-param name="replacement" select="$replacement" />
29
+ </xsl:call-template>
30
+ </xsl:when>
31
+ <xsl:otherwise>
32
+ <xsl:value-of select="$text" />
33
+ </xsl:otherwise>
34
+ </xsl:choose>
35
+ </xsl:template>
36
+
37
+ <!-- Escape inline markers with a backslash -->
38
+ <xsl:template name="escape-inlines">
39
+ <xsl:param name="text" />
40
+
41
+ <!-- This works from the inside out, first escaping backslash chars themselves, then escaping
42
+ the different types of inline markers -->
43
+ <xsl:call-template name="string-replace-all">
44
+ <xsl:with-param name="text">
45
+ <xsl:call-template name="string-replace-all">
46
+ <xsl:with-param name="text">
47
+ <xsl:call-template name="string-replace-all">
48
+ <xsl:with-param name="text">
49
+ <xsl:call-template name="string-replace-all">
50
+ <xsl:with-param name="text">
51
+ <xsl:call-template name="string-replace-all">
52
+ <xsl:with-param name="text">
53
+ <xsl:call-template name="string-replace-all">
54
+ <xsl:with-param name="text">
55
+ <xsl:call-template name="string-replace-all">
56
+ <xsl:with-param name="text">
57
+ <xsl:call-template name="string-replace-all">
58
+ <xsl:with-param name="text">
59
+ <xsl:call-template name="string-replace-all">
60
+ <xsl:with-param name="text">
61
+ <xsl:call-template name="string-replace-all">
62
+ <xsl:with-param name="text">
63
+ <xsl:call-template name="string-replace-all">
64
+ <xsl:with-param name="text" select="$text" />
65
+ <xsl:with-param name="value"><xsl:value-of select="'\'" /></xsl:with-param>
66
+ <xsl:with-param name="replacement"><xsl:value-of select="'\\'" /></xsl:with-param>
67
+ </xsl:call-template>
68
+ </xsl:with-param>
69
+ <xsl:with-param name="value"><xsl:value-of select="'**'" /></xsl:with-param>
70
+ <xsl:with-param name="replacement"><xsl:value-of select="'\*\*'" /></xsl:with-param>
71
+ </xsl:call-template>
72
+ </xsl:with-param>
73
+ <xsl:with-param name="value"><xsl:value-of select="'__'" /></xsl:with-param>
74
+ <xsl:with-param name="replacement"><xsl:value-of select="'\_\_'" /></xsl:with-param>
75
+ </xsl:call-template>
76
+ </xsl:with-param>
77
+ <xsl:with-param name="value"><xsl:value-of select="'//'" /></xsl:with-param>
78
+ <xsl:with-param name="replacement"><xsl:value-of select="'\/\/'" /></xsl:with-param>
79
+ </xsl:call-template>
80
+ </xsl:with-param>
81
+ <xsl:with-param name="value"><xsl:value-of select="'_^'" /></xsl:with-param>
82
+ <xsl:with-param name="replacement"><xsl:value-of select="'\_^'" /></xsl:with-param>
83
+ </xsl:call-template>
84
+ </xsl:with-param>
85
+ <xsl:with-param name="value"><xsl:value-of select="'^_'" /></xsl:with-param>
86
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^_'" /></xsl:with-param>
87
+ </xsl:call-template>
88
+ </xsl:with-param>
89
+ <xsl:with-param name="value"><xsl:value-of select="'^^'" /></xsl:with-param>
90
+ <xsl:with-param name="replacement"><xsl:value-of select="'\^\^'" /></xsl:with-param>
91
+ </xsl:call-template>
92
+ </xsl:with-param>
93
+ <xsl:with-param name="value"><xsl:value-of select="'!['" /></xsl:with-param>
94
+ <xsl:with-param name="replacement"><xsl:value-of select="'\!['" /></xsl:with-param>
95
+ </xsl:call-template>
96
+ </xsl:with-param>
97
+ <xsl:with-param name="value"><xsl:value-of select="']('" /></xsl:with-param>
98
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]('" /></xsl:with-param>
99
+ </xsl:call-template>
100
+ </xsl:with-param>
101
+ <xsl:with-param name="value"><xsl:value-of select="'[['" /></xsl:with-param>
102
+ <xsl:with-param name="replacement"><xsl:value-of select="'\[\['" /></xsl:with-param>
103
+ </xsl:call-template>
104
+ </xsl:with-param>
105
+ <xsl:with-param name="value"><xsl:value-of select="']]'" /></xsl:with-param>
106
+ <xsl:with-param name="replacement"><xsl:value-of select="'\]\]'" /></xsl:with-param>
107
+ </xsl:call-template>
108
+ </xsl:template>
109
+
12
110
  <!-- adds a backslash to the start of the value param, if necessary -->
13
- <xsl:template name="escape">
111
+ <xsl:template name="escape-prefixes">
14
112
  <xsl:param name="value"/>
15
113
 
16
114
  <xsl:variable name="prefix" select="translate(substring($value, 1, 13), 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')" />
17
115
  <!-- '(' is considered special, so translate numbers into '(' so we can find and escape them -->
18
116
  <xsl:variable name="numprefix" select="translate(substring($value, 1, 3), '1234567890', '((((((((((')" />
19
117
 
20
- <!-- p tags must escape initial content that looks like a block element marker -->
21
- <xsl:if test="$prefix = 'BODY' or
22
- $prefix = 'PREAMBLE' or
23
- $prefix = 'PREFACE' or
24
- starts-with($prefix, 'CHAPTER ') or
25
- starts-with($prefix, 'PART ') or
26
- starts-with($prefix, 'SUBPART ') or
27
- starts-with($prefix, 'SCHEDULE ') or
28
- starts-with($prefix, 'HEADING ') or
29
- starts-with($prefix, 'SUBHEADING ') or
30
- starts-with($prefix, 'LONGTITLE ') or
31
- starts-with($prefix, 'CROSSHEADING ') or
32
- starts-with($prefix, '{|') or
33
- starts-with($numprefix, '(')">
34
- <xsl:text>\</xsl:text>
35
- </xsl:if>
36
- <xsl:value-of select="$value"/>
118
+ <xsl:variable name="slash">
119
+ <!-- p tags must escape initial content that looks like a block element marker -->
120
+ <xsl:if test="$prefix = 'BODY' or
121
+ $prefix = 'PREAMBLE' or
122
+ $prefix = 'PREFACE' or
123
+ starts-with($prefix, 'CHAPTER ') or
124
+ starts-with($prefix, 'PART ') or
125
+ starts-with($prefix, 'SUBPART ') or
126
+ starts-with($prefix, 'SCHEDULE ') or
127
+ starts-with($prefix, 'HEADING ') or
128
+ starts-with($prefix, 'SUBHEADING ') or
129
+ starts-with($prefix, 'LONGTITLE ') or
130
+ starts-with($prefix, 'CROSSHEADING ') or
131
+ starts-with($prefix, '{|') or
132
+ starts-with($numprefix, '(')">
133
+ <xsl:value-of select="'\'" />
134
+ </xsl:if>
135
+ </xsl:variable>
136
+
137
+ <xsl:value-of select="concat($slash, $value)" />
138
+ </xsl:template>
139
+
140
+ <!-- adds a backslash to the start of the text param, if necessary -->
141
+ <xsl:template name="escape">
142
+ <xsl:param name="value"/>
143
+
144
+ <xsl:variable name="escaped">
145
+ <xsl:call-template name="escape-inlines">
146
+ <xsl:with-param name="text" select="$value" />
147
+ </xsl:call-template>
148
+ </xsl:variable>
149
+
150
+ <xsl:call-template name="escape-prefixes">
151
+ <xsl:with-param name="value" select="$escaped" />
152
+ </xsl:call-template>
37
153
  </xsl:template>
38
154
 
39
155
  <xsl:template match="a:act">
@@ -157,12 +273,19 @@
157
273
  </xsl:template>
158
274
 
159
275
  <!-- first text nodes of these elems must be escaped if they have special chars -->
160
- <xsl:template match="a:p[not(ancestor::a:table)]/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
276
+ <xsl:template match="a:p[not(ancestor::a:table)]/text()[not(preceding-sibling::*)] | a:listIntroduction/text()[not(preceding-sibling::*)] | a:intro/text()[not(preceding-sibling::*)]">
161
277
  <xsl:call-template name="escape">
162
278
  <xsl:with-param name="value" select="." />
163
279
  </xsl:call-template>
164
280
  </xsl:template>
165
281
 
282
+ <!-- escape inlines in text nodes -->
283
+ <xsl:template match="text()">
284
+ <xsl:call-template name="escape-inlines">
285
+ <xsl:with-param name="text" select="." />
286
+ </xsl:call-template>
287
+ </xsl:template>
288
+
166
289
 
167
290
  <!-- attachments/schedules -->
168
291
  <xsl:template match="a:attachment">
@@ -176,7 +299,7 @@
176
299
  <xsl:text>&#10;</xsl:text>
177
300
  </xsl:if>
178
301
 
179
- <xsl:text>&#10;&#10;</xsl:text>
302
+ <xsl:text>&#10;</xsl:text>
180
303
  <xsl:apply-templates select="a:doc/a:mainBody" />
181
304
  </xsl:template>
182
305
 
@@ -192,31 +315,24 @@
192
315
  <xsl:value-of select="." />
193
316
  <xsl:text>" </xsl:text>
194
317
  </xsl:for-each>
195
- <xsl:text>
196
- |-</xsl:text>
318
+ <xsl:text>&#10;|-</xsl:text>
197
319
 
198
320
  <xsl:apply-templates />
199
- <xsl:text>
200
- |}
201
-
202
- </xsl:text>
321
+ <xsl:text>&#10;|}&#10;&#10;</xsl:text>
203
322
  </xsl:template>
204
323
 
205
324
  <xsl:template match="a:tr">
206
325
  <xsl:apply-templates />
207
- <xsl:text>
208
- |-</xsl:text>
326
+ <xsl:text>&#10;|-</xsl:text>
209
327
  </xsl:template>
210
328
 
211
329
  <xsl:template match="a:th|a:td">
212
330
  <xsl:choose>
213
331
  <xsl:when test="local-name(.) = 'th'">
214
- <xsl:text>
215
- ! </xsl:text>
332
+ <xsl:text>&#10;! </xsl:text>
216
333
  </xsl:when>
217
334
  <xsl:when test="local-name(.) = 'td'">
218
- <xsl:text>
219
- | </xsl:text>
335
+ <xsl:text>&#10;| </xsl:text>
220
336
  </xsl:when>
221
337
  </xsl:choose>
222
338
 
@@ -287,6 +403,12 @@
287
403
  <xsl:text>^_</xsl:text>
288
404
  </xsl:template>
289
405
 
406
+ <xsl:template match="a:u">
407
+ <xsl:text>__</xsl:text>
408
+ <xsl:apply-templates />
409
+ <xsl:text>__</xsl:text>
410
+ </xsl:template>
411
+
290
412
  <xsl:template match="a:eol">
291
413
  <xsl:text>&#10;</xsl:text>
292
414
  </xsl:template>
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "10.3.1"
2
+ VERSION = "10.7.0"
3
3
  end
data/slaw.gemspec CHANGED
@@ -25,5 +25,4 @@ Gem::Specification.new do |spec|
25
25
  spec.add_runtime_dependency "treetop", "~> 1.5"
26
26
  spec.add_runtime_dependency "log4r", "~> 1.1"
27
27
  spec.add_runtime_dependency "thor", "~> 0.20"
28
- spec.add_runtime_dependency "mimemagic", "~> 0.2"
29
28
  end
@@ -0,0 +1,24 @@
1
+ BODY
2
+
3
+ 1. Section that tests escapes
4
+
5
+ text \\ with a single slash
6
+
7
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
8
+
9
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
10
+
11
+ refs [https:\/\/example.com with ] and and \]( and **nested \*\* stars \*\***](#foo)
12
+
13
+ nested ** stars \*\* in bold \*\***
14
+
15
+ nested // slashes \/\/ in italics \/\///
16
+
17
+ nested ** stars in // italics \*\* // and bold **
18
+
19
+ super ^^with \^\^ hats \^\^^^ and sub _^\_^ with \^_ end tokens \^_^_
20
+
21
+ underlines __underline with _ underscores__ and \_\_escaped underlines \_\_
22
+
23
+ mixed __underline **and \_\_ bold**__
24
+
@@ -78,45 +78,81 @@ XML
78
78
 
79
79
  1. Section
80
80
 
81
- \Chapter 2 ignored
81
+ \\Chapter 2 ignored
82
82
 
83
83
  Chapters
84
84
 
85
- \Part 2 ignored
85
+ \\Part 2 ignored
86
86
 
87
87
  participation
88
88
 
89
- \Schedule 2 ignored
89
+ \\Schedule 2 ignored
90
90
 
91
91
  Schedules
92
92
 
93
- \HEADING x
93
+ \\HEADING x
94
94
 
95
- \SUBHEADING x
95
+ \\SUBHEADING x
96
96
 
97
97
  BODY not escaped
98
98
 
99
- \BODY
99
+ \\BODY
100
100
 
101
101
  PREAMBLE not escaped
102
102
 
103
- \PREAMBLE
103
+ \\PREAMBLE
104
104
 
105
105
  PREFACE not escaped
106
106
 
107
- \PREFACE
107
+ \\PREFACE
108
108
 
109
- \2. ignored
109
+ \\2. ignored
110
110
 
111
- \2.1 ignored
111
+ \\2.1 ignored
112
112
 
113
- \(2) ignored
113
+ \\(2) ignored
114
114
 
115
- \(a) ignored
115
+ \\(a) ignored
116
116
 
117
- \(2a) ignored
117
+ \\(2a) ignored
118
118
 
119
- \{| ignored
119
+ \\{| ignored
120
+
121
+ '
122
+ end
123
+
124
+ it 'should escape inlines when unparsing' do
125
+ doc = xml2doc(section(<<'XML'
126
+ <num>1.</num>
127
+ <heading>Section</heading>
128
+ <paragraph id="section-1.paragraph-0">
129
+ <content>
130
+ <p>text \ with a single slash</p>
131
+ <p>some <b>inlines // <ref href="#foo">with // slashes</ref></b></p>
132
+ <p>inlines that ** should // be [[ escaped ![ and ]]</p>
133
+ <p>refs <ref href="#foo">https://example.com with ] and ]( and <b>nested **</b></ref></p>
134
+ <p>super <sup>with ^^</sup> and sub <sub>_^ with ^_</sub></p>
135
+ </content>
136
+ </paragraph>
137
+ XML
138
+ ))
139
+
140
+ text = subject.text_from_act(doc)
141
+ # NOTE: in single quoted strings, backslash sequences aren't considered special, EXCEPT a double backslash
142
+ # which is actually a single backslash. So \\ needs to be \\\\ while \* is just \*. The mind boggles.
143
+ text.should == 'BODY
144
+
145
+ 1. Section
146
+
147
+ text \\\\ with a single slash
148
+
149
+ some **inlines \/\/ [with \/\/ slashes](#foo)**
150
+
151
+ inlines that \*\* should \/\/ be \[\[ escaped \![ and \]\]
152
+
153
+ refs [https:\/\/example.com with ] and \]( and **nested \*\***](#foo)
154
+
155
+ super ^^with \^\^^^ and sub _^\_^ with \^_^_
120
156
 
121
157
  '
122
158
  end
@@ -148,7 +184,7 @@ XML
148
184
 
149
185
  1. Section
150
186
 
151
- \(2) A special meeting [[ foo ]]:
187
+ \\(2) A special meeting [[ foo ]]:
152
188
 
153
189
  (a) the chairperson so directs; or
154
190
 
@@ -201,6 +237,27 @@ XML
201
237
 
202
238
  Hello [there](/za/act/123) friend.
203
239
 
240
+ '
241
+ end
242
+
243
+ it 'should unparse underlines correctly' do
244
+ doc = xml2doc(section(<<XML
245
+ <num>1.</num>
246
+ <paragraph id="section-19.paragraph-0">
247
+ <content>
248
+ <p>Hello <u>underlined</u>.</p>
249
+ </content>
250
+ </paragraph>
251
+ XML
252
+ ))
253
+
254
+ text = subject.text_from_act(doc)
255
+ text.should == 'BODY
256
+
257
+ 1.
258
+
259
+ Hello __underlined__.
260
+
204
261
  '
205
262
  end
206
263
 
@@ -269,4 +326,13 @@ Subject to approval in terms of this By-Law.
269
326
  '
270
327
  end
271
328
  end
329
+
330
+ describe 'round trip' do
331
+ it 'should be idempotent for escapes' do
332
+ text = File.open('spec/fixtures/roundtrip-escapes.txt', 'r').read()
333
+ act = subject.generate_from_text(text)
334
+ xml = act.to_xml(encoding: 'utf-8')
335
+ subject.text_from_act(act).should == text
336
+ end
337
+ end
272
338
  end
@@ -117,16 +117,19 @@ EOS
117
117
  it 'should handle escaped content' do
118
118
  node = parse :body, <<EOS
119
119
  \\1. ignored
120
+ foo \\\\bar
120
121
 
121
- \\CROSSHEADING crossheading
122
+ \\CROSSHEADING cross\\heading
122
123
 
123
- 1. Section
124
+ 1. Sec\\tion
124
125
  \\Chapter 2 ignored
126
+ Some text with a \\\\real backslash
125
127
  EOS
126
128
  to_xml(node).should == '<body>
127
129
  <hcontainer eId="hcontainer_1" name="hcontainer">
128
130
  <content>
129
131
  <p>1. ignored</p>
132
+ <p>foo \\bar</p>
130
133
  <p>CROSSHEADING crossheading</p>
131
134
  </content>
132
135
  </hcontainer>
@@ -136,6 +139,7 @@ EOS
136
139
  <hcontainer eId="sec_1__hcontainer_1" name="hcontainer">
137
140
  <content>
138
141
  <p>Chapter 2 ignored</p>
142
+ <p>Some text with a \\real backslash</p>
139
143
  </content>
140
144
  </hcontainer>
141
145
  </section>
@@ -325,6 +325,17 @@ EOS
325
325
  <p>This statement has <remark status="editorial">[<ref href="/foo/bar">a link in</ref> a remark]</remark></p>
326
326
  <p>This statement has <remark status="editorial">[a <ref href="/foo/bar">link in a remark</ref>]</remark></p>
327
327
  </content>
328
+ </hcontainer>'
329
+ end
330
+
331
+ it 'should handle escapes in links' do
332
+ node = parse :generic_container, <<EOS
333
+ Visit the site [https:\\/\\/example.com](https://example.com) for more.
334
+ EOS
335
+ to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
336
+ <content>
337
+ <p>Visit the site <ref href="https://example.com">https://example.com</ref> for more.</p>
338
+ </content>
328
339
  </hcontainer>'
329
340
  end
330
341
  end
@@ -524,4 +535,17 @@ EOS
524
535
  end
525
536
  end
526
537
 
538
+ describe 'underline' do
539
+ it 'should handle underline' do
540
+ node = parse :generic_container, <<EOS
541
+ Text __with underline__ and _ under__scores__.
542
+ EOS
543
+ to_xml(node, "").should == '<hcontainer eId="hcontainer_1" name="hcontainer">
544
+ <content>
545
+ <p>Text <u>with underline</u> and _ under<u>scores</u>.</p>
546
+ </content>
547
+ </hcontainer>'
548
+ end
549
+ end
550
+
527
551
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.3.1
4
+ version: 10.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-11 00:00:00.000000000 Z
11
+ date: 2021-06-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0.20'
97
- - !ruby/object:Gem::Dependency
98
- name: mimemagic
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: '0.2'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: '0.2'
111
97
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
112
98
  acts from plain text and PDF documents.
113
99
  email:
@@ -117,9 +103,9 @@ executables:
117
103
  extensions: []
118
104
  extra_rdoc_files: []
119
105
  files:
106
+ - ".github/workflows/test.yml"
120
107
  - ".gitignore"
121
108
  - ".rspec"
122
- - ".travis.yml"
123
109
  - Gemfile
124
110
  - LICENSE.txt
125
111
  - README.md
@@ -158,6 +144,7 @@ files:
158
144
  - spec/counters_spec.rb
159
145
  - spec/extract/extractor_spec.rb
160
146
  - spec/fixtures/community-fire-safety.xml
147
+ - spec/fixtures/roundtrip-escapes.txt
161
148
  - spec/generator_spec.rb
162
149
  - spec/parse/blocklists_spec.rb
163
150
  - spec/parse/builder_spec.rb
@@ -173,7 +160,7 @@ homepage: https://github.com/longhotsummer/slaw
173
160
  licenses:
174
161
  - MIT
175
162
  metadata: {}
176
- post_install_message:
163
+ post_install_message:
177
164
  rdoc_options: []
178
165
  require_paths:
179
166
  - lib
@@ -189,13 +176,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
176
  version: '0'
190
177
  requirements: []
191
178
  rubygems_version: 3.0.3
192
- signing_key:
179
+ signing_key:
193
180
  specification_version: 4
194
181
  summary: A lightweight library for using Akoma Ntoso acts in Ruby.
195
182
  test_files:
196
183
  - spec/counters_spec.rb
197
184
  - spec/extract/extractor_spec.rb
198
185
  - spec/fixtures/community-fire-safety.xml
186
+ - spec/fixtures/roundtrip-escapes.txt
199
187
  - spec/generator_spec.rb
200
188
  - spec/parse/blocklists_spec.rb
201
189
  - spec/parse/builder_spec.rb
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.7.0
4
- - 2.6.2
5
- - 2.5.4
6
- before_install:
7
- - gem update bundler