slaw 0.10.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/lib/slaw/parse/cleanser.rb +2 -2
- data/lib/slaw/render/xsl/elements.xsl +4 -0
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act_nodes.rb +15 -2
- data/lib/slaw/za/act_text.xsl +13 -9
- data/slaw.gemspec +3 -0
- data/spec/generator_spec.rb +65 -2
- data/spec/za/act_spec.rb +26 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9bf7237f83956f343675d24a55a77043dd4eaed
|
4
|
+
data.tar.gz: 4dbde167eb2194d5f8786c2a02b97204085b031d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e204d080a241d971c49fcf06e4ff17fdd22418fcfaca3238f985c59a710895ce0d5ab61c32fde518f9885806d9c5bbf031b461b940e82c5acff85bd8daa6f3a
|
7
|
+
data.tar.gz: ce9e7b1b28a9799ad4d8451a0225be98707fd0ab144f10cae7a61cfb16a20a3720c975d0db08b9b5bfa36802ba874cff73f1d4fa41f46048e3f70453df99e61f
|
data/README.md
CHANGED
@@ -218,6 +218,11 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
|
|
218
218
|
|
219
219
|
## Changelog
|
220
220
|
|
221
|
+
### 0.11.0
|
222
|
+
|
223
|
+
* Support newlines in table cells as EOL (or BR in HTML)
|
224
|
+
* FIX unparsing of remarks, introduced in 0.10.0
|
225
|
+
|
221
226
|
### 0.10.1
|
222
227
|
|
223
228
|
* Ensure backslash escaping handles listIntroductions and partial words correctly
|
data/lib/slaw/parse/cleanser.rb
CHANGED
@@ -18,13 +18,13 @@ module Slaw
|
|
18
18
|
s = expand_tabs(s)
|
19
19
|
s = chomp(s)
|
20
20
|
s = enforce_newline(s)
|
21
|
-
s = remove_boilerplate(s)
|
22
21
|
end
|
23
22
|
|
24
23
|
# Run deeper introspections and reformat the text, such as
|
25
24
|
# unwrapping/re-wrapping lines. These may not be safe to run
|
26
25
|
# multiple times.
|
27
26
|
def reformat(s)
|
27
|
+
s = remove_boilerplate(s)
|
28
28
|
s = unbreak_lines(s)
|
29
29
|
s = break_lines(s)
|
30
30
|
s = strip_toc(s)
|
@@ -71,7 +71,7 @@ module Slaw
|
|
71
71
|
.gsub(/^\s*\d+\s*$/, '')\
|
72
72
|
.gsub(/^.*This gazette is also available.*$/, '')\
|
73
73
|
# get rid of date lines
|
74
|
-
.gsub(/^\d
|
74
|
+
.gsub(/^\d{1,2}\s+\w+\s+\d{4}$/, '')\
|
75
75
|
# get rid of page number lines
|
76
76
|
.gsub(/^\s*page \d+( of \d+)?\s*\n/i, '')\
|
77
77
|
.gsub(/^\s*\d*\s*No\. \d+$/, '')\
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -342,7 +342,9 @@ module Slaw
|
|
342
342
|
class Table < Treetop::Runtime::SyntaxNode
|
343
343
|
def to_xml(b, idprefix, i=0)
|
344
344
|
# parse the table using wikicloth
|
345
|
-
|
345
|
+
# strip whitespace at the start of lines, to avoid wikicloth from treating it as PRE
|
346
|
+
text = self.text_value.strip.gsub(/^[ \t]+/, '')
|
347
|
+
html = WikiCloth::Parser.new({data: text}).to_html
|
346
348
|
|
347
349
|
# we need to strip any surrounding p tags and add
|
348
350
|
# an id to the table
|
@@ -355,12 +357,23 @@ module Slaw
|
|
355
357
|
p = Nokogiri::XML::Node.new("p", html)
|
356
358
|
p.children = cell.children
|
357
359
|
p.parent = cell
|
360
|
+
|
361
|
+
# replace newlines with <eol>
|
362
|
+
p.search("text()").each do |text|
|
363
|
+
lines = text.content.strip.split(/\n/)
|
364
|
+
text.content = lines.shift
|
365
|
+
|
366
|
+
for line in lines
|
367
|
+
eol = text.add_next_sibling(Nokogiri::XML::Node.new("eol", html))
|
368
|
+
text = eol.add_next_sibling(Nokogiri::XML::Text.new(line, html))
|
369
|
+
end
|
370
|
+
end
|
358
371
|
end
|
359
372
|
|
360
373
|
table.xpath('//text()[1]').each{ |t| t.content = t.content.lstrip }
|
361
374
|
table.xpath('//text()[last()]').each{ |t| t.content = t.content.rstrip }
|
362
375
|
|
363
|
-
b << table
|
376
|
+
b.parent << table
|
364
377
|
end
|
365
378
|
end
|
366
379
|
|
data/lib/slaw/za/act_text.xsl
CHANGED
@@ -103,21 +103,13 @@
|
|
103
103
|
</xsl:template>
|
104
104
|
|
105
105
|
<xsl:template match="a:p">
|
106
|
-
<xsl:
|
107
|
-
<xsl:with-param name="value" select="." />
|
108
|
-
</xsl:call-template>
|
106
|
+
<xsl:apply-templates/>
|
109
107
|
<!-- p tags must end with a newline -->
|
110
108
|
<xsl:text>
|
111
109
|
|
112
110
|
</xsl:text>
|
113
111
|
</xsl:template>
|
114
112
|
|
115
|
-
<xsl:template match="a:listIntroduction|a:intro">
|
116
|
-
<xsl:call-template name="escape">
|
117
|
-
<xsl:with-param name="value" select="." />
|
118
|
-
</xsl:call-template>
|
119
|
-
</xsl:template>
|
120
|
-
|
121
113
|
<xsl:template match="a:blockList">
|
122
114
|
<xsl:if test="a:listIntroduction != ''">
|
123
115
|
<xsl:apply-templates select="a:listIntroduction" />
|
@@ -144,6 +136,13 @@
|
|
144
136
|
<xsl:apply-templates select="./*[not(self::a:intro)]" />
|
145
137
|
</xsl:template>
|
146
138
|
|
139
|
+
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
140
|
+
<xsl:template match="a:p/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
|
141
|
+
<xsl:call-template name="escape">
|
142
|
+
<xsl:with-param name="value" select="." />
|
143
|
+
</xsl:call-template>
|
144
|
+
</xsl:template>
|
145
|
+
|
147
146
|
<!-- components/schedules -->
|
148
147
|
<xsl:template match="a:doc">
|
149
148
|
<xsl:text>Schedule - </xsl:text>
|
@@ -231,6 +230,11 @@
|
|
231
230
|
<xsl:text>]</xsl:text>
|
232
231
|
</xsl:template>
|
233
232
|
|
233
|
+
<xsl:template match="a:eol">
|
234
|
+
<xsl:text>
|
235
|
+
</xsl:text>
|
236
|
+
</xsl:template>
|
237
|
+
|
234
238
|
|
235
239
|
<!-- for most nodes, just dump their text content -->
|
236
240
|
<xsl:template match="*">
|
data/slaw.gemspec
CHANGED
@@ -29,4 +29,7 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
|
30
30
|
spec.add_runtime_dependency 'yomu', '~> 0.2.2'
|
31
31
|
spec.add_runtime_dependency 'wikicloth', '~> 0.8.3'
|
32
|
+
# anchor twitter-text to avoid bug in 1.14.3
|
33
|
+
# https://github.com/twitter/twitter-text/issues/162
|
34
|
+
spec.add_runtime_dependency 'twitter-text', '~> 1.12.0'
|
32
35
|
end
|
data/spec/generator_spec.rb
CHANGED
@@ -118,7 +118,7 @@ PREFACE not escaped
|
|
118
118
|
<paragraph id="section-9.paragraph-0">
|
119
119
|
<content>
|
120
120
|
<blockList id="section-9.paragraph-0.list1">
|
121
|
-
<listIntroduction>(2) A special meeting
|
121
|
+
<listIntroduction>(2) A special meeting <remark>[ foo ]</remark>:</listIntroduction>
|
122
122
|
<item id="section-9.paragraph-0.list1.a">
|
123
123
|
<num>(a)</num>
|
124
124
|
<p>the chairperson so directs; or</p>
|
@@ -136,12 +136,75 @@ XML
|
|
136
136
|
text = subject.text_from_act(doc)
|
137
137
|
text.should == '1. Section
|
138
138
|
|
139
|
-
\(2) A special meeting:
|
139
|
+
\(2) A special meeting [[ foo ]]:
|
140
140
|
|
141
141
|
(a) the chairperson so directs; or
|
142
142
|
|
143
143
|
(b) a majority of the members
|
144
144
|
|
145
|
+
'
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'should unparse remarks correctly' do
|
149
|
+
doc = xml2doc(section(<<XML
|
150
|
+
<num>1.</num>
|
151
|
+
<paragraph id="section-19.paragraph-0">
|
152
|
+
<content>
|
153
|
+
<p>
|
154
|
+
<remark status="editorial">[ foo ]</remark>
|
155
|
+
</p>
|
156
|
+
<p>Section 1 <remark status="editorial">[ foo ]</remark></p>
|
157
|
+
</content>
|
158
|
+
</paragraph>
|
159
|
+
XML
|
160
|
+
))
|
161
|
+
|
162
|
+
text = subject.text_from_act(doc)
|
163
|
+
text.should == '1.
|
164
|
+
|
165
|
+
[[ foo ]]
|
166
|
+
|
167
|
+
Section 1 [[ foo ]]
|
168
|
+
|
169
|
+
'
|
170
|
+
end
|
171
|
+
|
172
|
+
it 'should replace eol with newlines in tables' do
|
173
|
+
doc = xml2doc(section(<<XML
|
174
|
+
<num>1.</num>
|
175
|
+
<table id="section-21.paragraph-0.table1">
|
176
|
+
<tr>
|
177
|
+
<td>
|
178
|
+
<p>foo<eol/>bar<eol/>baz</p>
|
179
|
+
</td>
|
180
|
+
<td>
|
181
|
+
<p>
|
182
|
+
one<eol/>two<eol/>three
|
183
|
+
|
184
|
+
</p>
|
185
|
+
</td>
|
186
|
+
</tr>
|
187
|
+
</table>'
|
188
|
+
XML
|
189
|
+
))
|
190
|
+
|
191
|
+
text = subject.text_from_act(doc)
|
192
|
+
text.should == '1.
|
193
|
+
|
194
|
+
{|
|
195
|
+
|-
|
196
|
+
| foo
|
197
|
+
bar
|
198
|
+
baz
|
199
|
+
|
|
200
|
+
one
|
201
|
+
two
|
202
|
+
three
|
203
|
+
|
204
|
+
|
205
|
+
|-
|
206
|
+
|}
|
207
|
+
|
145
208
|
'
|
146
209
|
end
|
147
210
|
end
|
data/spec/za/act_spec.rb
CHANGED
@@ -1876,6 +1876,32 @@ EOS
|
|
1876
1876
|
<td><p>r2c2</p></td></tr></table>'
|
1877
1877
|
end
|
1878
1878
|
|
1879
|
+
it 'should allow newlines in table cells' do
|
1880
|
+
node = parse :table, <<EOS
|
1881
|
+
{|
|
1882
|
+
| foo
|
1883
|
+
bar
|
1884
|
+
|
1885
|
+
baz
|
1886
|
+
|
|
1887
|
+
one
|
1888
|
+
two
|
1889
|
+
|
1890
|
+
three
|
1891
|
+
|
|
1892
|
+
four
|
1893
|
+
|
1894
|
+
|-
|
1895
|
+
|}
|
1896
|
+
EOS
|
1897
|
+
|
1898
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
1899
|
+
<tr><td><p>foo<eol/>bar<eol/><eol/>baz</p></td>
|
1900
|
+
<td><p>one<eol/>two<eol/><eol/>three</p></td>
|
1901
|
+
<td><p>four</p></td></tr>
|
1902
|
+
</table>'
|
1903
|
+
end
|
1904
|
+
|
1879
1905
|
it 'should parse a table in a section' do
|
1880
1906
|
node = parse :section, <<EOS
|
1881
1907
|
10. A section title
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.8.3
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: twitter-text
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 1.12.0
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.12.0
|
153
167
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
154
168
|
acts from plain text and PDF documents.
|
155
169
|
email:
|