slaw 0.10.1 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -0
- data/lib/slaw/parse/cleanser.rb +2 -2
- data/lib/slaw/render/xsl/elements.xsl +4 -0
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act_nodes.rb +15 -2
- data/lib/slaw/za/act_text.xsl +13 -9
- data/slaw.gemspec +3 -0
- data/spec/generator_spec.rb +65 -2
- data/spec/za/act_spec.rb +26 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9bf7237f83956f343675d24a55a77043dd4eaed
|
4
|
+
data.tar.gz: 4dbde167eb2194d5f8786c2a02b97204085b031d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3e204d080a241d971c49fcf06e4ff17fdd22418fcfaca3238f985c59a710895ce0d5ab61c32fde518f9885806d9c5bbf031b461b940e82c5acff85bd8daa6f3a
|
7
|
+
data.tar.gz: ce9e7b1b28a9799ad4d8451a0225be98707fd0ab144f10cae7a61cfb16a20a3720c975d0db08b9b5bfa36802ba874cff73f1d4fa41f46048e3f70453df99e61f
|
data/README.md
CHANGED
@@ -218,6 +218,11 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
|
|
218
218
|
|
219
219
|
## Changelog
|
220
220
|
|
221
|
+
### 0.11.0
|
222
|
+
|
223
|
+
* Support newlines in table cells as EOL (or BR in HTML)
|
224
|
+
* FIX unparsing of remarks, introduced in 0.10.0
|
225
|
+
|
221
226
|
### 0.10.1
|
222
227
|
|
223
228
|
* Ensure backslash escaping handles listIntroductions and partial words correctly
|
data/lib/slaw/parse/cleanser.rb
CHANGED
@@ -18,13 +18,13 @@ module Slaw
|
|
18
18
|
s = expand_tabs(s)
|
19
19
|
s = chomp(s)
|
20
20
|
s = enforce_newline(s)
|
21
|
-
s = remove_boilerplate(s)
|
22
21
|
end
|
23
22
|
|
24
23
|
# Run deeper introspections and reformat the text, such as
|
25
24
|
# unwrapping/re-wrapping lines. These may not be safe to run
|
26
25
|
# multiple times.
|
27
26
|
def reformat(s)
|
27
|
+
s = remove_boilerplate(s)
|
28
28
|
s = unbreak_lines(s)
|
29
29
|
s = break_lines(s)
|
30
30
|
s = strip_toc(s)
|
@@ -71,7 +71,7 @@ module Slaw
|
|
71
71
|
.gsub(/^\s*\d+\s*$/, '')\
|
72
72
|
.gsub(/^.*This gazette is also available.*$/, '')\
|
73
73
|
# get rid of date lines
|
74
|
-
.gsub(/^\d
|
74
|
+
.gsub(/^\d{1,2}\s+\w+\s+\d{4}$/, '')\
|
75
75
|
# get rid of page number lines
|
76
76
|
.gsub(/^\s*page \d+( of \d+)?\s*\n/i, '')\
|
77
77
|
.gsub(/^\s*\d*\s*No\. \d+$/, '')\
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -342,7 +342,9 @@ module Slaw
|
|
342
342
|
class Table < Treetop::Runtime::SyntaxNode
|
343
343
|
def to_xml(b, idprefix, i=0)
|
344
344
|
# parse the table using wikicloth
|
345
|
-
|
345
|
+
# strip whitespace at the start of lines, to avoid wikicloth from treating it as PRE
|
346
|
+
text = self.text_value.strip.gsub(/^[ \t]+/, '')
|
347
|
+
html = WikiCloth::Parser.new({data: text}).to_html
|
346
348
|
|
347
349
|
# we need to strip any surrounding p tags and add
|
348
350
|
# an id to the table
|
@@ -355,12 +357,23 @@ module Slaw
|
|
355
357
|
p = Nokogiri::XML::Node.new("p", html)
|
356
358
|
p.children = cell.children
|
357
359
|
p.parent = cell
|
360
|
+
|
361
|
+
# replace newlines with <eol>
|
362
|
+
p.search("text()").each do |text|
|
363
|
+
lines = text.content.strip.split(/\n/)
|
364
|
+
text.content = lines.shift
|
365
|
+
|
366
|
+
for line in lines
|
367
|
+
eol = text.add_next_sibling(Nokogiri::XML::Node.new("eol", html))
|
368
|
+
text = eol.add_next_sibling(Nokogiri::XML::Text.new(line, html))
|
369
|
+
end
|
370
|
+
end
|
358
371
|
end
|
359
372
|
|
360
373
|
table.xpath('//text()[1]').each{ |t| t.content = t.content.lstrip }
|
361
374
|
table.xpath('//text()[last()]').each{ |t| t.content = t.content.rstrip }
|
362
375
|
|
363
|
-
b << table
|
376
|
+
b.parent << table
|
364
377
|
end
|
365
378
|
end
|
366
379
|
|
data/lib/slaw/za/act_text.xsl
CHANGED
@@ -103,21 +103,13 @@
|
|
103
103
|
</xsl:template>
|
104
104
|
|
105
105
|
<xsl:template match="a:p">
|
106
|
-
<xsl:
|
107
|
-
<xsl:with-param name="value" select="." />
|
108
|
-
</xsl:call-template>
|
106
|
+
<xsl:apply-templates/>
|
109
107
|
<!-- p tags must end with a newline -->
|
110
108
|
<xsl:text>
|
111
109
|
|
112
110
|
</xsl:text>
|
113
111
|
</xsl:template>
|
114
112
|
|
115
|
-
<xsl:template match="a:listIntroduction|a:intro">
|
116
|
-
<xsl:call-template name="escape">
|
117
|
-
<xsl:with-param name="value" select="." />
|
118
|
-
</xsl:call-template>
|
119
|
-
</xsl:template>
|
120
|
-
|
121
113
|
<xsl:template match="a:blockList">
|
122
114
|
<xsl:if test="a:listIntroduction != ''">
|
123
115
|
<xsl:apply-templates select="a:listIntroduction" />
|
@@ -144,6 +136,13 @@
|
|
144
136
|
<xsl:apply-templates select="./*[not(self::a:intro)]" />
|
145
137
|
</xsl:template>
|
146
138
|
|
139
|
+
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
140
|
+
<xsl:template match="a:p/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
|
141
|
+
<xsl:call-template name="escape">
|
142
|
+
<xsl:with-param name="value" select="." />
|
143
|
+
</xsl:call-template>
|
144
|
+
</xsl:template>
|
145
|
+
|
147
146
|
<!-- components/schedules -->
|
148
147
|
<xsl:template match="a:doc">
|
149
148
|
<xsl:text>Schedule - </xsl:text>
|
@@ -231,6 +230,11 @@
|
|
231
230
|
<xsl:text>]</xsl:text>
|
232
231
|
</xsl:template>
|
233
232
|
|
233
|
+
<xsl:template match="a:eol">
|
234
|
+
<xsl:text>
|
235
|
+
</xsl:text>
|
236
|
+
</xsl:template>
|
237
|
+
|
234
238
|
|
235
239
|
<!-- for most nodes, just dump their text content -->
|
236
240
|
<xsl:template match="*">
|
data/slaw.gemspec
CHANGED
@@ -29,4 +29,7 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
|
30
30
|
spec.add_runtime_dependency 'yomu', '~> 0.2.2'
|
31
31
|
spec.add_runtime_dependency 'wikicloth', '~> 0.8.3'
|
32
|
+
# anchor twitter-text to avoid bug in 1.14.3
|
33
|
+
# https://github.com/twitter/twitter-text/issues/162
|
34
|
+
spec.add_runtime_dependency 'twitter-text', '~> 1.12.0'
|
32
35
|
end
|
data/spec/generator_spec.rb
CHANGED
@@ -118,7 +118,7 @@ PREFACE not escaped
|
|
118
118
|
<paragraph id="section-9.paragraph-0">
|
119
119
|
<content>
|
120
120
|
<blockList id="section-9.paragraph-0.list1">
|
121
|
-
<listIntroduction>(2) A special meeting
|
121
|
+
<listIntroduction>(2) A special meeting <remark>[ foo ]</remark>:</listIntroduction>
|
122
122
|
<item id="section-9.paragraph-0.list1.a">
|
123
123
|
<num>(a)</num>
|
124
124
|
<p>the chairperson so directs; or</p>
|
@@ -136,12 +136,75 @@ XML
|
|
136
136
|
text = subject.text_from_act(doc)
|
137
137
|
text.should == '1. Section
|
138
138
|
|
139
|
-
\(2) A special meeting:
|
139
|
+
\(2) A special meeting [[ foo ]]:
|
140
140
|
|
141
141
|
(a) the chairperson so directs; or
|
142
142
|
|
143
143
|
(b) a majority of the members
|
144
144
|
|
145
|
+
'
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'should unparse remarks correctly' do
|
149
|
+
doc = xml2doc(section(<<XML
|
150
|
+
<num>1.</num>
|
151
|
+
<paragraph id="section-19.paragraph-0">
|
152
|
+
<content>
|
153
|
+
<p>
|
154
|
+
<remark status="editorial">[ foo ]</remark>
|
155
|
+
</p>
|
156
|
+
<p>Section 1 <remark status="editorial">[ foo ]</remark></p>
|
157
|
+
</content>
|
158
|
+
</paragraph>
|
159
|
+
XML
|
160
|
+
))
|
161
|
+
|
162
|
+
text = subject.text_from_act(doc)
|
163
|
+
text.should == '1.
|
164
|
+
|
165
|
+
[[ foo ]]
|
166
|
+
|
167
|
+
Section 1 [[ foo ]]
|
168
|
+
|
169
|
+
'
|
170
|
+
end
|
171
|
+
|
172
|
+
it 'should replace eol with newlines in tables' do
|
173
|
+
doc = xml2doc(section(<<XML
|
174
|
+
<num>1.</num>
|
175
|
+
<table id="section-21.paragraph-0.table1">
|
176
|
+
<tr>
|
177
|
+
<td>
|
178
|
+
<p>foo<eol/>bar<eol/>baz</p>
|
179
|
+
</td>
|
180
|
+
<td>
|
181
|
+
<p>
|
182
|
+
one<eol/>two<eol/>three
|
183
|
+
|
184
|
+
</p>
|
185
|
+
</td>
|
186
|
+
</tr>
|
187
|
+
</table>'
|
188
|
+
XML
|
189
|
+
))
|
190
|
+
|
191
|
+
text = subject.text_from_act(doc)
|
192
|
+
text.should == '1.
|
193
|
+
|
194
|
+
{|
|
195
|
+
|-
|
196
|
+
| foo
|
197
|
+
bar
|
198
|
+
baz
|
199
|
+
|
|
200
|
+
one
|
201
|
+
two
|
202
|
+
three
|
203
|
+
|
204
|
+
|
205
|
+
|-
|
206
|
+
|}
|
207
|
+
|
145
208
|
'
|
146
209
|
end
|
147
210
|
end
|
data/spec/za/act_spec.rb
CHANGED
@@ -1876,6 +1876,32 @@ EOS
|
|
1876
1876
|
<td><p>r2c2</p></td></tr></table>'
|
1877
1877
|
end
|
1878
1878
|
|
1879
|
+
it 'should allow newlines in table cells' do
|
1880
|
+
node = parse :table, <<EOS
|
1881
|
+
{|
|
1882
|
+
| foo
|
1883
|
+
bar
|
1884
|
+
|
1885
|
+
baz
|
1886
|
+
|
|
1887
|
+
one
|
1888
|
+
two
|
1889
|
+
|
1890
|
+
three
|
1891
|
+
|
|
1892
|
+
four
|
1893
|
+
|
1894
|
+
|-
|
1895
|
+
|}
|
1896
|
+
EOS
|
1897
|
+
|
1898
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
1899
|
+
<tr><td><p>foo<eol/>bar<eol/><eol/>baz</p></td>
|
1900
|
+
<td><p>one<eol/>two<eol/><eol/>three</p></td>
|
1901
|
+
<td><p>four</p></td></tr>
|
1902
|
+
</table>'
|
1903
|
+
end
|
1904
|
+
|
1879
1905
|
it 'should parse a table in a section' do
|
1880
1906
|
node = parse :section, <<EOS
|
1881
1907
|
10. A section title
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.8.3
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: twitter-text
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 1.12.0
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.12.0
|
153
167
|
description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
|
154
168
|
acts from plain text and PDF documents.
|
155
169
|
email:
|