slaw 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/slaw/parse/builder.rb +16 -0
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +39 -5
- data/lib/slaw/za/act_nodes.rb +59 -30
- data/lib/slaw/za/act_text.xsl +9 -1
- data/slaw.gemspec +0 -1
- data/spec/parse/builder_spec.rb +40 -0
- data/spec/za/act_block_spec.rb +0 -182
- data/spec/za/act_inline_spec.rb +0 -2
- data/spec/za/act_schedules_spec.rb +0 -2
- data/spec/za/act_table_spec.rb +340 -0
- metadata +4 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80c548b7ff92ff7d2b73d3bab81790c38f3b964f
|
4
|
+
data.tar.gz: 98aa8da8542794b9fe35b490ad8092a77880f692
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34b23775bab7704112c4b33723e10db00505008cdd7f025ab8d7de0d9fd0504a0a4aad3728644bebedb26a13067637b01a8f74d7560c62e1c79095df3876c87a
|
7
|
+
data.tar.gz: fe525892734b77635bf45f458c64a80fdf36318d56e7d56065dba3faa178593d0285c3d7014cd71f0ea5082630c7f4f82c8c8bf71fa4b72b0fd505f65d4bcb23
|
data/README.md
CHANGED
@@ -218,6 +218,10 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
|
|
218
218
|
|
219
219
|
## Changelog
|
220
220
|
|
221
|
+
### 0.17.0
|
222
|
+
|
223
|
+
* Support links and images inside tables, by parsing tables natively.
|
224
|
+
|
221
225
|
### 0.16.0
|
222
226
|
|
223
227
|
* Support --crop for PDFs. Requires [poppler](https://poppler.freedesktop.org/) pdftotex, not xpdf.
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -68,6 +68,21 @@ module Slaw
|
|
68
68
|
postprocess(parse_xml(parse_text(text, parse_options)))
|
69
69
|
end
|
70
70
|
|
71
|
+
# Pre-process text just before parsing it using the grammar.
|
72
|
+
#
|
73
|
+
# @param text [String] the text to preprocess
|
74
|
+
# @return [String] text ready to parse
|
75
|
+
def preprocess(text)
|
76
|
+
# our grammar doesn't handle inline table cells; instead, we break
|
77
|
+
# inline cells into block-style cells
|
78
|
+
|
79
|
+
# first, find all the tables
|
80
|
+
text.gsub(/{\|(?!\|}).*?\|}/m) do |table|
|
81
|
+
# on each table line, split inline cells into block cells
|
82
|
+
table.split("\n").map { |line| line.gsub(/(\|\||!!)/) { |m| "\n" + m[0]} }.join("\n")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
71
86
|
# Parse text into XML. You should still run {#postprocess} on the
|
72
87
|
# resulting XML to normalise it.
|
73
88
|
#
|
@@ -76,6 +91,7 @@ module Slaw
|
|
76
91
|
#
|
77
92
|
# @return [String] an XML string
|
78
93
|
def parse_text(text, parse_options={})
|
94
|
+
text = preprocess(text)
|
79
95
|
tree = text_to_syntax_tree(text, parse_options)
|
80
96
|
xml_from_syntax_tree(tree)
|
81
97
|
end
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act.treetop
CHANGED
@@ -171,17 +171,51 @@ module Slaw
|
|
171
171
|
('(' letter_ordinal ')') / dotted_number_3
|
172
172
|
end
|
173
173
|
|
174
|
+
##########
|
175
|
+
# wikimedia-style tables
|
176
|
+
#
|
177
|
+
# this grammar doesn't support inline table cells (eg: | col1 || col2 || col3)
|
178
|
+
# instead, the builder preprocesses tables to break inline cells onto their own
|
179
|
+
# lines, which we do support.
|
180
|
+
|
174
181
|
rule table
|
175
|
-
space?
|
182
|
+
space? '{|' eol
|
183
|
+
table_body
|
184
|
+
'|}' eol
|
176
185
|
<Table>
|
177
186
|
end
|
178
187
|
|
179
|
-
rule
|
180
|
-
|
188
|
+
rule table_body
|
189
|
+
(table_row / table_cell)*
|
190
|
+
end
|
191
|
+
|
192
|
+
rule table_row
|
193
|
+
'|-' space? eol
|
194
|
+
end
|
195
|
+
|
196
|
+
rule table_cell
|
197
|
+
# don't match end-of-table
|
198
|
+
!'|}'
|
199
|
+
[!|] attribs:table_attribs? space?
|
200
|
+
# first content line, then multiple lines
|
201
|
+
content:(line:table_line (![!|] space? line:table_line)*)
|
202
|
+
<TableCell>
|
203
|
+
end
|
204
|
+
|
205
|
+
rule table_line
|
206
|
+
clauses:clauses? eol
|
207
|
+
<TableLine>
|
208
|
+
end
|
209
|
+
|
210
|
+
rule table_attribs
|
211
|
+
space? attribs:(table_attrib+) '|'
|
181
212
|
end
|
182
213
|
|
183
|
-
rule
|
184
|
-
|
214
|
+
rule table_attrib
|
215
|
+
name:([a-z_-]+) '=' value:(
|
216
|
+
('"' (!'"' .)* '"') /
|
217
|
+
("'" (!"'" .)* "'"))
|
218
|
+
space?
|
185
219
|
end
|
186
220
|
|
187
221
|
##########
|
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'wikicloth'
|
2
|
-
|
3
1
|
module Slaw
|
4
2
|
module ZA
|
5
3
|
module Act
|
@@ -368,39 +366,70 @@ module Slaw
|
|
368
366
|
|
369
367
|
class Table < Treetop::Runtime::SyntaxNode
|
370
368
|
def to_xml(b, idprefix, i=0)
|
371
|
-
#
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
p = Nokogiri::XML::Node.new("p", html)
|
385
|
-
p.children = cell.children
|
386
|
-
p.parent = cell
|
387
|
-
|
388
|
-
# replace newlines with <eol>
|
389
|
-
p.search("text()").each do |text|
|
390
|
-
lines = text.content.strip.split(/\n/)
|
391
|
-
text.content = lines.shift
|
392
|
-
|
393
|
-
for line in lines
|
394
|
-
eol = text.add_next_sibling(Nokogiri::XML::Node.new("eol", html))
|
395
|
-
text = eol.add_next_sibling(Nokogiri::XML::Text.new(line, html))
|
369
|
+
b.table(id: "#{idprefix}table#{i}") { |b|
|
370
|
+
# we'll gather cells into this row list
|
371
|
+
rows = []
|
372
|
+
cells = []
|
373
|
+
|
374
|
+
for child in table_body.elements
|
375
|
+
if child.is_a? TableCell
|
376
|
+
# cell
|
377
|
+
cells << child
|
378
|
+
else
|
379
|
+
# new row marker
|
380
|
+
rows << cells unless cells.empty?
|
381
|
+
cells = []
|
396
382
|
end
|
397
383
|
end
|
384
|
+
rows << cells unless cells.empty?
|
385
|
+
|
386
|
+
for row in rows
|
387
|
+
b.tr { |tr|
|
388
|
+
for cell in row
|
389
|
+
cell.to_xml(tr, "")
|
390
|
+
end
|
391
|
+
}
|
392
|
+
end
|
393
|
+
}
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
class TableCell < Treetop::Runtime::SyntaxNode
|
398
|
+
def to_xml(b, idprefix)
|
399
|
+
tag = text_value[0] == '!' ? 'th' : 'td'
|
400
|
+
|
401
|
+
attrs = {}
|
402
|
+
if not attribs.empty?
|
403
|
+
for item in attribs.attribs.elements
|
404
|
+
# key=value (strip quotes around value)
|
405
|
+
attrs[item.name.text_value.strip] = item.value.text_value[1..-2]
|
406
|
+
end
|
398
407
|
end
|
399
408
|
|
400
|
-
|
401
|
-
|
409
|
+
b.send(tag.to_sym, attrs) { |b|
|
410
|
+
b.p { |b|
|
411
|
+
# first line, and the rest
|
412
|
+
lines = [content.line] + content.elements.last.elements.map(&:line)
|
402
413
|
|
403
|
-
|
414
|
+
lines.each_with_index do |line, i|
|
415
|
+
line.to_xml(b, i, i == lines.length-1)
|
416
|
+
end
|
417
|
+
}
|
418
|
+
}
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
class TableLine < Treetop::Runtime::SyntaxNode
|
423
|
+
# line of table content
|
424
|
+
def to_xml(b, i, tail)
|
425
|
+
clauses.to_xml(b) unless clauses.empty?
|
426
|
+
|
427
|
+
# add trailing newlines.
|
428
|
+
# for the first line, eat whitespace at the start
|
429
|
+
# for the last line, eat whitespace at the end
|
430
|
+
if not tail and (i > 0 or not clauses.empty?)
|
431
|
+
eol.text_value.count("\n").times { b.eol }
|
432
|
+
end
|
404
433
|
end
|
405
434
|
end
|
406
435
|
|
data/lib/slaw/za/act_text.xsl
CHANGED
@@ -137,7 +137,7 @@
|
|
137
137
|
</xsl:template>
|
138
138
|
|
139
139
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
140
|
-
<xsl:template match="a:p/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
|
140
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
|
141
141
|
<xsl:call-template name="escape">
|
142
142
|
<xsl:with-param name="value" select="." />
|
143
143
|
</xsl:call-template>
|
@@ -238,6 +238,14 @@
|
|
238
238
|
<xsl:text>)</xsl:text>
|
239
239
|
</xsl:template>
|
240
240
|
|
241
|
+
<xsl:template match="a:img">
|
242
|
+
<xsl:text>![</xsl:text>
|
243
|
+
<xsl:value-of select="@alt" />
|
244
|
+
<xsl:text>](</xsl:text>
|
245
|
+
<xsl:value-of select="@src" />
|
246
|
+
<xsl:text>)</xsl:text>
|
247
|
+
</xsl:template>
|
248
|
+
|
241
249
|
<xsl:template match="a:eol">
|
242
250
|
<xsl:text>
|
243
251
|
</xsl:text>
|
data/slaw.gemspec
CHANGED
@@ -28,7 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_runtime_dependency "thor", "~> 0.19.1"
|
29
29
|
spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
|
30
30
|
spec.add_runtime_dependency 'yomu', '~> 0.2.2'
|
31
|
-
spec.add_runtime_dependency 'wikicloth', '~> 0.8.3'
|
32
31
|
# anchor twitter-text to avoid bug in 1.14.3
|
33
32
|
# https://github.com/twitter/twitter-text/issues/162
|
34
33
|
spec.add_runtime_dependency 'twitter-text', '~> 1.12.0'
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -800,4 +800,44 @@ XML
|
|
800
800
|
)
|
801
801
|
end
|
802
802
|
end
|
803
|
+
|
804
|
+
describe '#preprocess' do
|
805
|
+
it 'should split inline table cells into block table cells' do
|
806
|
+
text = <<EOS
|
807
|
+
foo
|
808
|
+
| bar || baz
|
809
|
+
|
810
|
+
{|
|
811
|
+
| boom || one !! two
|
812
|
+
|-
|
813
|
+
| three
|
814
|
+
|}
|
815
|
+
|
816
|
+
xxx
|
817
|
+
|
818
|
+
{|
|
819
|
+
| colspan="2" | bar || baz
|
820
|
+
|}
|
821
|
+
EOS
|
822
|
+
subject.preprocess(text).should == <<EOS
|
823
|
+
foo
|
824
|
+
| bar || baz
|
825
|
+
|
826
|
+
{|
|
827
|
+
| boom
|
828
|
+
| one
|
829
|
+
! two
|
830
|
+
|-
|
831
|
+
| three
|
832
|
+
|}
|
833
|
+
|
834
|
+
xxx
|
835
|
+
|
836
|
+
{|
|
837
|
+
| colspan="2" | bar
|
838
|
+
| baz
|
839
|
+
|}
|
840
|
+
EOS
|
841
|
+
end
|
842
|
+
end
|
803
843
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'builder'
|
4
|
-
|
5
3
|
require 'slaw'
|
6
4
|
|
7
5
|
describe Slaw::ActGenerator do
|
@@ -1887,186 +1885,6 @@ EOS
|
|
1887
1885
|
|
1888
1886
|
end
|
1889
1887
|
|
1890
|
-
#-------------------------------------------------------------------------------
|
1891
|
-
# tables
|
1892
|
-
|
1893
|
-
describe 'tables' do
|
1894
|
-
it 'should parse basic tables' do
|
1895
|
-
node = parse :table, <<EOS
|
1896
|
-
{|
|
1897
|
-
| r1c1
|
1898
|
-
| r1c2
|
1899
|
-
|-
|
1900
|
-
| r2c1
|
1901
|
-
| r2c2
|
1902
|
-
|}
|
1903
|
-
EOS
|
1904
|
-
|
1905
|
-
node.text_value.should == "{|\n| r1c1\n| r1c2\n|-\n| r2c1\n| r2c2\n|}\n"
|
1906
|
-
to_xml(node, "prefix.").should == '<table id="prefix.table0"><tr><td><p>r1c1</p></td>
|
1907
|
-
<td><p>r1c2</p></td></tr>
|
1908
|
-
<tr><td><p>r2c1</p></td>
|
1909
|
-
<td><p>r2c2</p></td></tr></table>'
|
1910
|
-
end
|
1911
|
-
|
1912
|
-
it 'should allow newlines in table cells' do
|
1913
|
-
node = parse :table, <<EOS
|
1914
|
-
{|
|
1915
|
-
| foo
|
1916
|
-
bar
|
1917
|
-
|
1918
|
-
baz
|
1919
|
-
|
|
1920
|
-
one
|
1921
|
-
two
|
1922
|
-
|
1923
|
-
three
|
1924
|
-
|
|
1925
|
-
four
|
1926
|
-
|
1927
|
-
|-
|
1928
|
-
|}
|
1929
|
-
EOS
|
1930
|
-
|
1931
|
-
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
1932
|
-
<tr><td><p>foo<eol/>bar<eol/><eol/>baz</p></td>
|
1933
|
-
<td><p>one<eol/>two<eol/><eol/>three</p></td>
|
1934
|
-
<td><p>four</p></td></tr>
|
1935
|
-
</table>'
|
1936
|
-
end
|
1937
|
-
|
1938
|
-
it 'should parse a table in a section' do
|
1939
|
-
node = parse :section, <<EOS
|
1940
|
-
10. A section title
|
1941
|
-
|
1942
|
-
Heres a table:
|
1943
|
-
|
1944
|
-
{|
|
1945
|
-
| r1c1
|
1946
|
-
| r1c2
|
1947
|
-
|-
|
1948
|
-
| r2c1
|
1949
|
-
| r2c2
|
1950
|
-
|}
|
1951
|
-
EOS
|
1952
|
-
|
1953
|
-
xml = to_xml(node)
|
1954
|
-
xml.should == '<section id="section-10">
|
1955
|
-
<num>10.</num>
|
1956
|
-
<heading>A section title</heading>
|
1957
|
-
<paragraph id="section-10.paragraph-0">
|
1958
|
-
<content>
|
1959
|
-
<p>Heres a table:</p>
|
1960
|
-
<table id="section-10.paragraph-0.table1"><tr><td><p>r1c1</p></td>
|
1961
|
-
<td><p>r1c2</p></td></tr>
|
1962
|
-
<tr><td><p>r2c1</p></td>
|
1963
|
-
<td><p>r2c2</p></td></tr></table>
|
1964
|
-
</content>
|
1965
|
-
</paragraph>
|
1966
|
-
</section>'
|
1967
|
-
end
|
1968
|
-
|
1969
|
-
it 'should parse a table in a schedule' do
|
1970
|
-
node = parse :schedule, <<EOS
|
1971
|
-
Schedule 1
|
1972
|
-
|
1973
|
-
Heres a table:
|
1974
|
-
|
1975
|
-
{|
|
1976
|
-
| r1c1
|
1977
|
-
| r1c2
|
1978
|
-
|-
|
1979
|
-
| r2c1
|
1980
|
-
| r2c2
|
1981
|
-
|}
|
1982
|
-
EOS
|
1983
|
-
|
1984
|
-
xml = to_xml(node, "")
|
1985
|
-
today = Time.now.strftime('%Y-%m-%d')
|
1986
|
-
xml.should == '<component id="component-schedule1">
|
1987
|
-
<doc name="schedule1">
|
1988
|
-
<meta>
|
1989
|
-
<identification source="#slaw">
|
1990
|
-
<FRBRWork>
|
1991
|
-
<FRBRthis value="/za/act/1980/01/schedule1"/>
|
1992
|
-
<FRBRuri value="/za/act/1980/01"/>
|
1993
|
-
<FRBRalias value="Schedule 1"/>
|
1994
|
-
<FRBRdate date="1980-01-01" name="Generation"/>
|
1995
|
-
<FRBRauthor href="#council"/>
|
1996
|
-
<FRBRcountry value="za"/>
|
1997
|
-
</FRBRWork>
|
1998
|
-
<FRBRExpression>
|
1999
|
-
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
2000
|
-
<FRBRuri value="/za/act/1980/01/eng@"/>
|
2001
|
-
<FRBRdate date="1980-01-01" name="Generation"/>
|
2002
|
-
<FRBRauthor href="#council"/>
|
2003
|
-
<FRBRlanguage language="eng"/>
|
2004
|
-
</FRBRExpression>
|
2005
|
-
<FRBRManifestation>
|
2006
|
-
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
2007
|
-
<FRBRuri value="/za/act/1980/01/eng@"/>
|
2008
|
-
<FRBRdate date="' + today + '" name="Generation"/>
|
2009
|
-
<FRBRauthor href="#slaw"/>
|
2010
|
-
</FRBRManifestation>
|
2011
|
-
</identification>
|
2012
|
-
</meta>
|
2013
|
-
<mainBody>
|
2014
|
-
<article id="schedule1">
|
2015
|
-
<paragraph id="schedule1.paragraph-0">
|
2016
|
-
<content>
|
2017
|
-
<p>Heres a table:</p>
|
2018
|
-
<table id="schedule1.paragraph-0.table1"><tr><td><p>r1c1</p></td>
|
2019
|
-
<td><p>r1c2</p></td></tr>
|
2020
|
-
<tr><td><p>r2c1</p></td>
|
2021
|
-
<td><p>r2c2</p></td></tr></table>
|
2022
|
-
</content>
|
2023
|
-
</paragraph>
|
2024
|
-
</article>
|
2025
|
-
</mainBody>
|
2026
|
-
</doc>
|
2027
|
-
</component>'
|
2028
|
-
end
|
2029
|
-
|
2030
|
-
it 'should ignore an escaped table' do
|
2031
|
-
node = parse :block_paragraphs, <<EOS
|
2032
|
-
\\{|
|
2033
|
-
| r1c1
|
2034
|
-
| r1c2
|
2035
|
-
|}
|
2036
|
-
EOS
|
2037
|
-
|
2038
|
-
to_xml(node).should == '<paragraph id="paragraph-0">
|
2039
|
-
<content>
|
2040
|
-
<p>{|</p>
|
2041
|
-
<p>| r1c1</p>
|
2042
|
-
<p>| r1c2</p>
|
2043
|
-
<p>|}</p>
|
2044
|
-
</content>
|
2045
|
-
</paragraph>'
|
2046
|
-
end
|
2047
|
-
|
2048
|
-
it 'should allow a table as part of a subsection' do
|
2049
|
-
node = parse :subsection, <<EOS
|
2050
|
-
(1) {|
|
2051
|
-
| foo
|
2052
|
-
|}
|
2053
|
-
EOS
|
2054
|
-
|
2055
|
-
to_xml(node, '', 0).should == '<subsection id="1">
|
2056
|
-
<num>(1)</num>
|
2057
|
-
<content>
|
2058
|
-
<table id="1.table0">
|
2059
|
-
<tr>
|
2060
|
-
<td>
|
2061
|
-
<p>foo</p>
|
2062
|
-
</td>
|
2063
|
-
</tr>
|
2064
|
-
</table>
|
2065
|
-
</content>
|
2066
|
-
</subsection>'
|
2067
|
-
end
|
2068
|
-
end
|
2069
|
-
|
2070
1888
|
#-------------------------------------------------------------------------------
|
2071
1889
|
# clauses
|
2072
1890
|
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -0,0 +1,340 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'slaw'
|
4
|
+
|
5
|
+
describe Slaw::ActGenerator do
|
6
|
+
def parse(rule, s)
|
7
|
+
subject.builder.text_to_syntax_tree(s, {root: rule})
|
8
|
+
end
|
9
|
+
|
10
|
+
def should_parse(rule, s)
|
11
|
+
s << "\n" unless s.end_with?("\n")
|
12
|
+
tree = subject.builder.text_to_syntax_tree(s, {root: rule})
|
13
|
+
|
14
|
+
if not tree
|
15
|
+
raise Exception.new(subject.failure_reason || "Couldn't match to grammar") if tree.nil?
|
16
|
+
else
|
17
|
+
# count an assertion
|
18
|
+
tree.should_not be_nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_xml(node, *args)
|
23
|
+
b = ::Nokogiri::XML::Builder.new
|
24
|
+
node.to_xml(b, *args)
|
25
|
+
b.doc.root.to_xml(encoding: 'UTF-8')
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'tables' do
|
29
|
+
it 'should parse basic tables' do
|
30
|
+
node = parse :table, <<EOS
|
31
|
+
{|
|
32
|
+
! r1c1
|
33
|
+
| r1c2
|
34
|
+
|-
|
35
|
+
| r2c1
|
36
|
+
| r2c2
|
37
|
+
|}
|
38
|
+
EOS
|
39
|
+
|
40
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
41
|
+
<tr>
|
42
|
+
<th>
|
43
|
+
<p>r1c1</p>
|
44
|
+
</th>
|
45
|
+
<td>
|
46
|
+
<p>r1c2</p>
|
47
|
+
</td>
|
48
|
+
</tr>
|
49
|
+
<tr>
|
50
|
+
<td>
|
51
|
+
<p>r2c1</p>
|
52
|
+
</td>
|
53
|
+
<td>
|
54
|
+
<p>r2c2</p>
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
</table>'
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should handle tables with empty cells' do
|
61
|
+
node = parse :table, <<EOS
|
62
|
+
{|
|
63
|
+
!
|
64
|
+
|
|
65
|
+
|-
|
66
|
+
|
|
67
|
+
|
68
|
+
|
|
69
|
+
|-
|
70
|
+
|-
|
71
|
+
|}
|
72
|
+
EOS
|
73
|
+
|
74
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
75
|
+
<tr>
|
76
|
+
<th>
|
77
|
+
<p/>
|
78
|
+
</th>
|
79
|
+
<td>
|
80
|
+
<p/>
|
81
|
+
</td>
|
82
|
+
</tr>
|
83
|
+
<tr>
|
84
|
+
<td>
|
85
|
+
<p/>
|
86
|
+
</td>
|
87
|
+
<td>
|
88
|
+
<p/>
|
89
|
+
</td>
|
90
|
+
</tr>
|
91
|
+
</table>'
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should parse table attributes' do
|
95
|
+
node = parse :table, <<EOS
|
96
|
+
{|
|
97
|
+
| colspan="2" | r1c1
|
98
|
+
| rowspan="1" colspan='3' | r1c2
|
99
|
+
|-
|
100
|
+
|a="b"| r2c1
|
101
|
+
|a="b"c="d" | r2c2
|
102
|
+
|}
|
103
|
+
EOS
|
104
|
+
|
105
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
106
|
+
<tr>
|
107
|
+
<td colspan="2">
|
108
|
+
<p>r1c1</p>
|
109
|
+
</td>
|
110
|
+
<td rowspan="1" colspan="3">
|
111
|
+
<p>r1c2</p>
|
112
|
+
</td>
|
113
|
+
</tr>
|
114
|
+
<tr>
|
115
|
+
<td a="b">
|
116
|
+
<p>r2c1</p>
|
117
|
+
</td>
|
118
|
+
<td a="b" c="d">
|
119
|
+
<p>r2c2</p>
|
120
|
+
</td>
|
121
|
+
</tr>
|
122
|
+
</table>'
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'should allow newlines in table cells' do
|
126
|
+
node = parse :table, <<EOS
|
127
|
+
{|
|
128
|
+
| foo
|
129
|
+
bar
|
130
|
+
|
131
|
+
baz
|
132
|
+
|
|
133
|
+
one
|
134
|
+
two
|
135
|
+
|
136
|
+
three
|
137
|
+
|
|
138
|
+
four
|
139
|
+
|
140
|
+
|-
|
141
|
+
|}
|
142
|
+
EOS
|
143
|
+
|
144
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
145
|
+
<tr>
|
146
|
+
<td>
|
147
|
+
<p>foo<eol/>bar<eol/><eol/>baz</p>
|
148
|
+
</td>
|
149
|
+
<td>
|
150
|
+
<p>one<eol/>two<eol/><eol/>three</p>
|
151
|
+
</td>
|
152
|
+
<td>
|
153
|
+
<p>four</p>
|
154
|
+
</td>
|
155
|
+
</tr>
|
156
|
+
</table>'
|
157
|
+
end
|
158
|
+
|
159
|
+
it 'should parse a table in a section' do
|
160
|
+
node = parse :section, <<EOS
|
161
|
+
10. A section title
|
162
|
+
|
163
|
+
Heres a table:
|
164
|
+
|
165
|
+
{|
|
166
|
+
| r1c1
|
167
|
+
| r1c2
|
168
|
+
|-
|
169
|
+
| r2c1
|
170
|
+
| r2c2
|
171
|
+
|}
|
172
|
+
EOS
|
173
|
+
|
174
|
+
xml = to_xml(node)
|
175
|
+
xml.should == '<section id="section-10">
|
176
|
+
<num>10.</num>
|
177
|
+
<heading>A section title</heading>
|
178
|
+
<paragraph id="section-10.paragraph-0">
|
179
|
+
<content>
|
180
|
+
<p>Heres a table:</p>
|
181
|
+
<table id="section-10.paragraph-0.table1">
|
182
|
+
<tr>
|
183
|
+
<td>
|
184
|
+
<p>r1c1</p>
|
185
|
+
</td>
|
186
|
+
<td>
|
187
|
+
<p>r1c2</p>
|
188
|
+
</td>
|
189
|
+
</tr>
|
190
|
+
<tr>
|
191
|
+
<td>
|
192
|
+
<p>r2c1</p>
|
193
|
+
</td>
|
194
|
+
<td>
|
195
|
+
<p>r2c2</p>
|
196
|
+
</td>
|
197
|
+
</tr>
|
198
|
+
</table>
|
199
|
+
</content>
|
200
|
+
</paragraph>
|
201
|
+
</section>'
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'should parse a table in a schedule' do
|
205
|
+
node = parse :schedule, <<EOS
|
206
|
+
Schedule 1
|
207
|
+
|
208
|
+
Heres a table:
|
209
|
+
|
210
|
+
{|
|
211
|
+
| r1c1
|
212
|
+
| r1c2
|
213
|
+
|-
|
214
|
+
| r2c1
|
215
|
+
| r2c2
|
216
|
+
|}
|
217
|
+
EOS
|
218
|
+
|
219
|
+
xml = to_xml(node, "")
|
220
|
+
today = Time.now.strftime('%Y-%m-%d')
|
221
|
+
xml.should == '<component id="component-schedule1">
|
222
|
+
<doc name="schedule1">
|
223
|
+
<meta>
|
224
|
+
<identification source="#slaw">
|
225
|
+
<FRBRWork>
|
226
|
+
<FRBRthis value="/za/act/1980/01/schedule1"/>
|
227
|
+
<FRBRuri value="/za/act/1980/01"/>
|
228
|
+
<FRBRalias value="Schedule 1"/>
|
229
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
230
|
+
<FRBRauthor href="#council"/>
|
231
|
+
<FRBRcountry value="za"/>
|
232
|
+
</FRBRWork>
|
233
|
+
<FRBRExpression>
|
234
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
235
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
236
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
237
|
+
<FRBRauthor href="#council"/>
|
238
|
+
<FRBRlanguage language="eng"/>
|
239
|
+
</FRBRExpression>
|
240
|
+
<FRBRManifestation>
|
241
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
242
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
243
|
+
<FRBRdate date="' + today + '" name="Generation"/>
|
244
|
+
<FRBRauthor href="#slaw"/>
|
245
|
+
</FRBRManifestation>
|
246
|
+
</identification>
|
247
|
+
</meta>
|
248
|
+
<mainBody>
|
249
|
+
<article id="schedule1">
|
250
|
+
<paragraph id="schedule1.paragraph-0">
|
251
|
+
<content>
|
252
|
+
<p>Heres a table:</p>
|
253
|
+
<table id="schedule1.paragraph-0.table1">
|
254
|
+
<tr>
|
255
|
+
<td>
|
256
|
+
<p>r1c1</p>
|
257
|
+
</td>
|
258
|
+
<td>
|
259
|
+
<p>r1c2</p>
|
260
|
+
</td>
|
261
|
+
</tr>
|
262
|
+
<tr>
|
263
|
+
<td>
|
264
|
+
<p>r2c1</p>
|
265
|
+
</td>
|
266
|
+
<td>
|
267
|
+
<p>r2c2</p>
|
268
|
+
</td>
|
269
|
+
</tr>
|
270
|
+
</table>
|
271
|
+
</content>
|
272
|
+
</paragraph>
|
273
|
+
</article>
|
274
|
+
</mainBody>
|
275
|
+
</doc>
|
276
|
+
</component>'
|
277
|
+
end
|
278
|
+
|
279
|
+
it 'should ignore an escaped table' do
|
280
|
+
node = parse :block_paragraphs, <<EOS
|
281
|
+
\\{|
|
282
|
+
| r1c1
|
283
|
+
| r1c2
|
284
|
+
|}
|
285
|
+
EOS
|
286
|
+
|
287
|
+
to_xml(node).should == '<paragraph id="paragraph-0">
|
288
|
+
<content>
|
289
|
+
<p>{|</p>
|
290
|
+
<p>| r1c1</p>
|
291
|
+
<p>| r1c2</p>
|
292
|
+
<p>|}</p>
|
293
|
+
</content>
|
294
|
+
</paragraph>'
|
295
|
+
end
|
296
|
+
|
297
|
+
it 'should allow a table as part of a subsection' do
|
298
|
+
node = parse :subsection, <<EOS
|
299
|
+
(1) {|
|
300
|
+
| foo
|
301
|
+
|}
|
302
|
+
EOS
|
303
|
+
|
304
|
+
to_xml(node, '', 0).should == '<subsection id="1">
|
305
|
+
<num>(1)</num>
|
306
|
+
<content>
|
307
|
+
<table id="1.table0">
|
308
|
+
<tr>
|
309
|
+
<td>
|
310
|
+
<p>foo</p>
|
311
|
+
</td>
|
312
|
+
</tr>
|
313
|
+
</table>
|
314
|
+
</content>
|
315
|
+
</subsection>'
|
316
|
+
end
|
317
|
+
|
318
|
+
it 'should allow links in a table' do
|
319
|
+
node = parse :table, <<EOS
|
320
|
+
{|
|
321
|
+
| a [link](/a/b) in a table
|
322
|
+
| [link](/a/b) and
|
323
|
+
[[comment]]
|
324
|
+
|}
|
325
|
+
EOS
|
326
|
+
|
327
|
+
to_xml(node, '', 0).should == '<table id="table0">
|
328
|
+
<tr>
|
329
|
+
<td>
|
330
|
+
<p>a <ref href="/a/b">link</ref> in a table</p>
|
331
|
+
</td>
|
332
|
+
<td>
|
333
|
+
<p><ref href="/a/b">link</ref> and<eol/><remark status="editorial">[comment]</remark></p>
|
334
|
+
</td>
|
335
|
+
</tr>
|
336
|
+
</table>'
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -136,20 +136,6 @@ dependencies:
|
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: 0.2.2
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: wikicloth
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: 0.8.3
|
146
|
-
type: :runtime
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - "~>"
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: 0.8.3
|
153
139
|
- !ruby/object:Gem::Dependency
|
154
140
|
name: twitter-text
|
155
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +206,7 @@ files:
|
|
220
206
|
- spec/za/act_block_spec.rb
|
221
207
|
- spec/za/act_inline_spec.rb
|
222
208
|
- spec/za/act_schedules_spec.rb
|
209
|
+
- spec/za/act_table_spec.rb
|
223
210
|
homepage: ''
|
224
211
|
licenses:
|
225
212
|
- MIT
|
@@ -257,3 +244,4 @@ test_files:
|
|
257
244
|
- spec/za/act_block_spec.rb
|
258
245
|
- spec/za/act_inline_spec.rb
|
259
246
|
- spec/za/act_schedules_spec.rb
|
247
|
+
- spec/za/act_table_spec.rb
|