slaw 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/lib/slaw/parse/builder.rb +16 -0
- data/lib/slaw/version.rb +1 -1
- data/lib/slaw/za/act.treetop +39 -5
- data/lib/slaw/za/act_nodes.rb +59 -30
- data/lib/slaw/za/act_text.xsl +9 -1
- data/slaw.gemspec +0 -1
- data/spec/parse/builder_spec.rb +40 -0
- data/spec/za/act_block_spec.rb +0 -182
- data/spec/za/act_inline_spec.rb +0 -2
- data/spec/za/act_schedules_spec.rb +0 -2
- data/spec/za/act_table_spec.rb +340 -0
- metadata +4 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80c548b7ff92ff7d2b73d3bab81790c38f3b964f
|
4
|
+
data.tar.gz: 98aa8da8542794b9fe35b490ad8092a77880f692
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34b23775bab7704112c4b33723e10db00505008cdd7f025ab8d7de0d9fd0504a0a4aad3728644bebedb26a13067637b01a8f74d7560c62e1c79095df3876c87a
|
7
|
+
data.tar.gz: fe525892734b77635bf45f458c64a80fdf36318d56e7d56065dba3faa178593d0285c3d7014cd71f0ea5082630c7f4f82c8c8bf71fa4b72b0fd505f65d4bcb23
|
data/README.md
CHANGED
@@ -218,6 +218,10 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
|
|
218
218
|
|
219
219
|
## Changelog
|
220
220
|
|
221
|
+
### 0.17.0
|
222
|
+
|
223
|
+
* Support links and images inside tables, by parsing tables natively.
|
224
|
+
|
221
225
|
### 0.16.0
|
222
226
|
|
223
227
|
* Support --crop for PDFs. Requires [poppler](https://poppler.freedesktop.org/) pdftotex, not xpdf.
|
data/lib/slaw/parse/builder.rb
CHANGED
@@ -68,6 +68,21 @@ module Slaw
|
|
68
68
|
postprocess(parse_xml(parse_text(text, parse_options)))
|
69
69
|
end
|
70
70
|
|
71
|
+
# Pre-process text just before parsing it using the grammar.
|
72
|
+
#
|
73
|
+
# @param text [String] the text to preprocess
|
74
|
+
# @return [String] text ready to parse
|
75
|
+
def preprocess(text)
|
76
|
+
# our grammar doesn't handle inline table cells; instead, we break
|
77
|
+
# inline cells into block-style cells
|
78
|
+
|
79
|
+
# first, find all the tables
|
80
|
+
text.gsub(/{\|(?!\|}).*?\|}/m) do |table|
|
81
|
+
# on each table line, split inline cells into block cells
|
82
|
+
table.split("\n").map { |line| line.gsub(/(\|\||!!)/) { |m| "\n" + m[0]} }.join("\n")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
71
86
|
# Parse text into XML. You should still run {#postprocess} on the
|
72
87
|
# resulting XML to normalise it.
|
73
88
|
#
|
@@ -76,6 +91,7 @@ module Slaw
|
|
76
91
|
#
|
77
92
|
# @return [String] an XML string
|
78
93
|
def parse_text(text, parse_options={})
|
94
|
+
text = preprocess(text)
|
79
95
|
tree = text_to_syntax_tree(text, parse_options)
|
80
96
|
xml_from_syntax_tree(tree)
|
81
97
|
end
|
data/lib/slaw/version.rb
CHANGED
data/lib/slaw/za/act.treetop
CHANGED
@@ -171,17 +171,51 @@ module Slaw
|
|
171
171
|
('(' letter_ordinal ')') / dotted_number_3
|
172
172
|
end
|
173
173
|
|
174
|
+
##########
|
175
|
+
# wikimedia-style tables
|
176
|
+
#
|
177
|
+
# this grammar doesn't support inline table cells (eg: | col1 || col2 || col3)
|
178
|
+
# instead, the builder preprocesses tables to break inline cells onto their own
|
179
|
+
# lines, which we do support.
|
180
|
+
|
174
181
|
rule table
|
175
|
-
space?
|
182
|
+
space? '{|' eol
|
183
|
+
table_body
|
184
|
+
'|}' eol
|
176
185
|
<Table>
|
177
186
|
end
|
178
187
|
|
179
|
-
rule
|
180
|
-
|
188
|
+
rule table_body
|
189
|
+
(table_row / table_cell)*
|
190
|
+
end
|
191
|
+
|
192
|
+
rule table_row
|
193
|
+
'|-' space? eol
|
194
|
+
end
|
195
|
+
|
196
|
+
rule table_cell
|
197
|
+
# don't match end-of-table
|
198
|
+
!'|}'
|
199
|
+
[!|] attribs:table_attribs? space?
|
200
|
+
# first content line, then multiple lines
|
201
|
+
content:(line:table_line (![!|] space? line:table_line)*)
|
202
|
+
<TableCell>
|
203
|
+
end
|
204
|
+
|
205
|
+
rule table_line
|
206
|
+
clauses:clauses? eol
|
207
|
+
<TableLine>
|
208
|
+
end
|
209
|
+
|
210
|
+
rule table_attribs
|
211
|
+
space? attribs:(table_attrib+) '|'
|
181
212
|
end
|
182
213
|
|
183
|
-
rule
|
184
|
-
|
214
|
+
rule table_attrib
|
215
|
+
name:([a-z_-]+) '=' value:(
|
216
|
+
('"' (!'"' .)* '"') /
|
217
|
+
("'" (!"'" .)* "'"))
|
218
|
+
space?
|
185
219
|
end
|
186
220
|
|
187
221
|
##########
|
data/lib/slaw/za/act_nodes.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'wikicloth'
|
2
|
-
|
3
1
|
module Slaw
|
4
2
|
module ZA
|
5
3
|
module Act
|
@@ -368,39 +366,70 @@ module Slaw
|
|
368
366
|
|
369
367
|
class Table < Treetop::Runtime::SyntaxNode
|
370
368
|
def to_xml(b, idprefix, i=0)
|
371
|
-
#
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
p = Nokogiri::XML::Node.new("p", html)
|
385
|
-
p.children = cell.children
|
386
|
-
p.parent = cell
|
387
|
-
|
388
|
-
# replace newlines with <eol>
|
389
|
-
p.search("text()").each do |text|
|
390
|
-
lines = text.content.strip.split(/\n/)
|
391
|
-
text.content = lines.shift
|
392
|
-
|
393
|
-
for line in lines
|
394
|
-
eol = text.add_next_sibling(Nokogiri::XML::Node.new("eol", html))
|
395
|
-
text = eol.add_next_sibling(Nokogiri::XML::Text.new(line, html))
|
369
|
+
b.table(id: "#{idprefix}table#{i}") { |b|
|
370
|
+
# we'll gather cells into this row list
|
371
|
+
rows = []
|
372
|
+
cells = []
|
373
|
+
|
374
|
+
for child in table_body.elements
|
375
|
+
if child.is_a? TableCell
|
376
|
+
# cell
|
377
|
+
cells << child
|
378
|
+
else
|
379
|
+
# new row marker
|
380
|
+
rows << cells unless cells.empty?
|
381
|
+
cells = []
|
396
382
|
end
|
397
383
|
end
|
384
|
+
rows << cells unless cells.empty?
|
385
|
+
|
386
|
+
for row in rows
|
387
|
+
b.tr { |tr|
|
388
|
+
for cell in row
|
389
|
+
cell.to_xml(tr, "")
|
390
|
+
end
|
391
|
+
}
|
392
|
+
end
|
393
|
+
}
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
class TableCell < Treetop::Runtime::SyntaxNode
|
398
|
+
def to_xml(b, idprefix)
|
399
|
+
tag = text_value[0] == '!' ? 'th' : 'td'
|
400
|
+
|
401
|
+
attrs = {}
|
402
|
+
if not attribs.empty?
|
403
|
+
for item in attribs.attribs.elements
|
404
|
+
# key=value (strip quotes around value)
|
405
|
+
attrs[item.name.text_value.strip] = item.value.text_value[1..-2]
|
406
|
+
end
|
398
407
|
end
|
399
408
|
|
400
|
-
|
401
|
-
|
409
|
+
b.send(tag.to_sym, attrs) { |b|
|
410
|
+
b.p { |b|
|
411
|
+
# first line, and the rest
|
412
|
+
lines = [content.line] + content.elements.last.elements.map(&:line)
|
402
413
|
|
403
|
-
|
414
|
+
lines.each_with_index do |line, i|
|
415
|
+
line.to_xml(b, i, i == lines.length-1)
|
416
|
+
end
|
417
|
+
}
|
418
|
+
}
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
class TableLine < Treetop::Runtime::SyntaxNode
|
423
|
+
# line of table content
|
424
|
+
def to_xml(b, i, tail)
|
425
|
+
clauses.to_xml(b) unless clauses.empty?
|
426
|
+
|
427
|
+
# add trailing newlines.
|
428
|
+
# for the first line, eat whitespace at the start
|
429
|
+
# for the last line, eat whitespace at the end
|
430
|
+
if not tail and (i > 0 or not clauses.empty?)
|
431
|
+
eol.text_value.count("\n").times { b.eol }
|
432
|
+
end
|
404
433
|
end
|
405
434
|
end
|
406
435
|
|
data/lib/slaw/za/act_text.xsl
CHANGED
@@ -137,7 +137,7 @@
|
|
137
137
|
</xsl:template>
|
138
138
|
|
139
139
|
<!-- first text nodes of these elems must be escaped if they have special chars -->
|
140
|
-
<xsl:template match="a:p/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
|
140
|
+
<xsl:template match="a:p[not(ancestor::a:table)]/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
|
141
141
|
<xsl:call-template name="escape">
|
142
142
|
<xsl:with-param name="value" select="." />
|
143
143
|
</xsl:call-template>
|
@@ -238,6 +238,14 @@
|
|
238
238
|
<xsl:text>)</xsl:text>
|
239
239
|
</xsl:template>
|
240
240
|
|
241
|
+
<xsl:template match="a:img">
|
242
|
+
<xsl:text></xsl:text>
|
247
|
+
</xsl:template>
|
248
|
+
|
241
249
|
<xsl:template match="a:eol">
|
242
250
|
<xsl:text>
|
243
251
|
</xsl:text>
|
data/slaw.gemspec
CHANGED
@@ -28,7 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_runtime_dependency "thor", "~> 0.19.1"
|
29
29
|
spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
|
30
30
|
spec.add_runtime_dependency 'yomu', '~> 0.2.2'
|
31
|
-
spec.add_runtime_dependency 'wikicloth', '~> 0.8.3'
|
32
31
|
# anchor twitter-text to avoid bug in 1.14.3
|
33
32
|
# https://github.com/twitter/twitter-text/issues/162
|
34
33
|
spec.add_runtime_dependency 'twitter-text', '~> 1.12.0'
|
data/spec/parse/builder_spec.rb
CHANGED
@@ -800,4 +800,44 @@ XML
|
|
800
800
|
)
|
801
801
|
end
|
802
802
|
end
|
803
|
+
|
804
|
+
describe '#preprocess' do
|
805
|
+
it 'should split inline table cells into block table cells' do
|
806
|
+
text = <<EOS
|
807
|
+
foo
|
808
|
+
| bar || baz
|
809
|
+
|
810
|
+
{|
|
811
|
+
| boom || one !! two
|
812
|
+
|-
|
813
|
+
| three
|
814
|
+
|}
|
815
|
+
|
816
|
+
xxx
|
817
|
+
|
818
|
+
{|
|
819
|
+
| colspan="2" | bar || baz
|
820
|
+
|}
|
821
|
+
EOS
|
822
|
+
subject.preprocess(text).should == <<EOS
|
823
|
+
foo
|
824
|
+
| bar || baz
|
825
|
+
|
826
|
+
{|
|
827
|
+
| boom
|
828
|
+
| one
|
829
|
+
! two
|
830
|
+
|-
|
831
|
+
| three
|
832
|
+
|}
|
833
|
+
|
834
|
+
xxx
|
835
|
+
|
836
|
+
{|
|
837
|
+
| colspan="2" | bar
|
838
|
+
| baz
|
839
|
+
|}
|
840
|
+
EOS
|
841
|
+
end
|
842
|
+
end
|
803
843
|
end
|
data/spec/za/act_block_spec.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'builder'
|
4
|
-
|
5
3
|
require 'slaw'
|
6
4
|
|
7
5
|
describe Slaw::ActGenerator do
|
@@ -1887,186 +1885,6 @@ EOS
|
|
1887
1885
|
|
1888
1886
|
end
|
1889
1887
|
|
1890
|
-
#-------------------------------------------------------------------------------
|
1891
|
-
# tables
|
1892
|
-
|
1893
|
-
describe 'tables' do
|
1894
|
-
it 'should parse basic tables' do
|
1895
|
-
node = parse :table, <<EOS
|
1896
|
-
{|
|
1897
|
-
| r1c1
|
1898
|
-
| r1c2
|
1899
|
-
|-
|
1900
|
-
| r2c1
|
1901
|
-
| r2c2
|
1902
|
-
|}
|
1903
|
-
EOS
|
1904
|
-
|
1905
|
-
node.text_value.should == "{|\n| r1c1\n| r1c2\n|-\n| r2c1\n| r2c2\n|}\n"
|
1906
|
-
to_xml(node, "prefix.").should == '<table id="prefix.table0"><tr><td><p>r1c1</p></td>
|
1907
|
-
<td><p>r1c2</p></td></tr>
|
1908
|
-
<tr><td><p>r2c1</p></td>
|
1909
|
-
<td><p>r2c2</p></td></tr></table>'
|
1910
|
-
end
|
1911
|
-
|
1912
|
-
it 'should allow newlines in table cells' do
|
1913
|
-
node = parse :table, <<EOS
|
1914
|
-
{|
|
1915
|
-
| foo
|
1916
|
-
bar
|
1917
|
-
|
1918
|
-
baz
|
1919
|
-
|
|
1920
|
-
one
|
1921
|
-
two
|
1922
|
-
|
1923
|
-
three
|
1924
|
-
|
|
1925
|
-
four
|
1926
|
-
|
1927
|
-
|-
|
1928
|
-
|}
|
1929
|
-
EOS
|
1930
|
-
|
1931
|
-
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
1932
|
-
<tr><td><p>foo<eol/>bar<eol/><eol/>baz</p></td>
|
1933
|
-
<td><p>one<eol/>two<eol/><eol/>three</p></td>
|
1934
|
-
<td><p>four</p></td></tr>
|
1935
|
-
</table>'
|
1936
|
-
end
|
1937
|
-
|
1938
|
-
it 'should parse a table in a section' do
|
1939
|
-
node = parse :section, <<EOS
|
1940
|
-
10. A section title
|
1941
|
-
|
1942
|
-
Heres a table:
|
1943
|
-
|
1944
|
-
{|
|
1945
|
-
| r1c1
|
1946
|
-
| r1c2
|
1947
|
-
|-
|
1948
|
-
| r2c1
|
1949
|
-
| r2c2
|
1950
|
-
|}
|
1951
|
-
EOS
|
1952
|
-
|
1953
|
-
xml = to_xml(node)
|
1954
|
-
xml.should == '<section id="section-10">
|
1955
|
-
<num>10.</num>
|
1956
|
-
<heading>A section title</heading>
|
1957
|
-
<paragraph id="section-10.paragraph-0">
|
1958
|
-
<content>
|
1959
|
-
<p>Heres a table:</p>
|
1960
|
-
<table id="section-10.paragraph-0.table1"><tr><td><p>r1c1</p></td>
|
1961
|
-
<td><p>r1c2</p></td></tr>
|
1962
|
-
<tr><td><p>r2c1</p></td>
|
1963
|
-
<td><p>r2c2</p></td></tr></table>
|
1964
|
-
</content>
|
1965
|
-
</paragraph>
|
1966
|
-
</section>'
|
1967
|
-
end
|
1968
|
-
|
1969
|
-
it 'should parse a table in a schedule' do
|
1970
|
-
node = parse :schedule, <<EOS
|
1971
|
-
Schedule 1
|
1972
|
-
|
1973
|
-
Heres a table:
|
1974
|
-
|
1975
|
-
{|
|
1976
|
-
| r1c1
|
1977
|
-
| r1c2
|
1978
|
-
|-
|
1979
|
-
| r2c1
|
1980
|
-
| r2c2
|
1981
|
-
|}
|
1982
|
-
EOS
|
1983
|
-
|
1984
|
-
xml = to_xml(node, "")
|
1985
|
-
today = Time.now.strftime('%Y-%m-%d')
|
1986
|
-
xml.should == '<component id="component-schedule1">
|
1987
|
-
<doc name="schedule1">
|
1988
|
-
<meta>
|
1989
|
-
<identification source="#slaw">
|
1990
|
-
<FRBRWork>
|
1991
|
-
<FRBRthis value="/za/act/1980/01/schedule1"/>
|
1992
|
-
<FRBRuri value="/za/act/1980/01"/>
|
1993
|
-
<FRBRalias value="Schedule 1"/>
|
1994
|
-
<FRBRdate date="1980-01-01" name="Generation"/>
|
1995
|
-
<FRBRauthor href="#council"/>
|
1996
|
-
<FRBRcountry value="za"/>
|
1997
|
-
</FRBRWork>
|
1998
|
-
<FRBRExpression>
|
1999
|
-
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
2000
|
-
<FRBRuri value="/za/act/1980/01/eng@"/>
|
2001
|
-
<FRBRdate date="1980-01-01" name="Generation"/>
|
2002
|
-
<FRBRauthor href="#council"/>
|
2003
|
-
<FRBRlanguage language="eng"/>
|
2004
|
-
</FRBRExpression>
|
2005
|
-
<FRBRManifestation>
|
2006
|
-
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
2007
|
-
<FRBRuri value="/za/act/1980/01/eng@"/>
|
2008
|
-
<FRBRdate date="' + today + '" name="Generation"/>
|
2009
|
-
<FRBRauthor href="#slaw"/>
|
2010
|
-
</FRBRManifestation>
|
2011
|
-
</identification>
|
2012
|
-
</meta>
|
2013
|
-
<mainBody>
|
2014
|
-
<article id="schedule1">
|
2015
|
-
<paragraph id="schedule1.paragraph-0">
|
2016
|
-
<content>
|
2017
|
-
<p>Heres a table:</p>
|
2018
|
-
<table id="schedule1.paragraph-0.table1"><tr><td><p>r1c1</p></td>
|
2019
|
-
<td><p>r1c2</p></td></tr>
|
2020
|
-
<tr><td><p>r2c1</p></td>
|
2021
|
-
<td><p>r2c2</p></td></tr></table>
|
2022
|
-
</content>
|
2023
|
-
</paragraph>
|
2024
|
-
</article>
|
2025
|
-
</mainBody>
|
2026
|
-
</doc>
|
2027
|
-
</component>'
|
2028
|
-
end
|
2029
|
-
|
2030
|
-
it 'should ignore an escaped table' do
|
2031
|
-
node = parse :block_paragraphs, <<EOS
|
2032
|
-
\\{|
|
2033
|
-
| r1c1
|
2034
|
-
| r1c2
|
2035
|
-
|}
|
2036
|
-
EOS
|
2037
|
-
|
2038
|
-
to_xml(node).should == '<paragraph id="paragraph-0">
|
2039
|
-
<content>
|
2040
|
-
<p>{|</p>
|
2041
|
-
<p>| r1c1</p>
|
2042
|
-
<p>| r1c2</p>
|
2043
|
-
<p>|}</p>
|
2044
|
-
</content>
|
2045
|
-
</paragraph>'
|
2046
|
-
end
|
2047
|
-
|
2048
|
-
it 'should allow a table as part of a subsection' do
|
2049
|
-
node = parse :subsection, <<EOS
|
2050
|
-
(1) {|
|
2051
|
-
| foo
|
2052
|
-
|}
|
2053
|
-
EOS
|
2054
|
-
|
2055
|
-
to_xml(node, '', 0).should == '<subsection id="1">
|
2056
|
-
<num>(1)</num>
|
2057
|
-
<content>
|
2058
|
-
<table id="1.table0">
|
2059
|
-
<tr>
|
2060
|
-
<td>
|
2061
|
-
<p>foo</p>
|
2062
|
-
</td>
|
2063
|
-
</tr>
|
2064
|
-
</table>
|
2065
|
-
</content>
|
2066
|
-
</subsection>'
|
2067
|
-
end
|
2068
|
-
end
|
2069
|
-
|
2070
1888
|
#-------------------------------------------------------------------------------
|
2071
1889
|
# clauses
|
2072
1890
|
|
data/spec/za/act_inline_spec.rb
CHANGED
@@ -0,0 +1,340 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'slaw'
|
4
|
+
|
5
|
+
describe Slaw::ActGenerator do
|
6
|
+
def parse(rule, s)
|
7
|
+
subject.builder.text_to_syntax_tree(s, {root: rule})
|
8
|
+
end
|
9
|
+
|
10
|
+
def should_parse(rule, s)
|
11
|
+
s << "\n" unless s.end_with?("\n")
|
12
|
+
tree = subject.builder.text_to_syntax_tree(s, {root: rule})
|
13
|
+
|
14
|
+
if not tree
|
15
|
+
raise Exception.new(subject.failure_reason || "Couldn't match to grammar") if tree.nil?
|
16
|
+
else
|
17
|
+
# count an assertion
|
18
|
+
tree.should_not be_nil
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_xml(node, *args)
|
23
|
+
b = ::Nokogiri::XML::Builder.new
|
24
|
+
node.to_xml(b, *args)
|
25
|
+
b.doc.root.to_xml(encoding: 'UTF-8')
|
26
|
+
end
|
27
|
+
|
28
|
+
describe 'tables' do
|
29
|
+
it 'should parse basic tables' do
|
30
|
+
node = parse :table, <<EOS
|
31
|
+
{|
|
32
|
+
! r1c1
|
33
|
+
| r1c2
|
34
|
+
|-
|
35
|
+
| r2c1
|
36
|
+
| r2c2
|
37
|
+
|}
|
38
|
+
EOS
|
39
|
+
|
40
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
41
|
+
<tr>
|
42
|
+
<th>
|
43
|
+
<p>r1c1</p>
|
44
|
+
</th>
|
45
|
+
<td>
|
46
|
+
<p>r1c2</p>
|
47
|
+
</td>
|
48
|
+
</tr>
|
49
|
+
<tr>
|
50
|
+
<td>
|
51
|
+
<p>r2c1</p>
|
52
|
+
</td>
|
53
|
+
<td>
|
54
|
+
<p>r2c2</p>
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
</table>'
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should handle tables with empty cells' do
|
61
|
+
node = parse :table, <<EOS
|
62
|
+
{|
|
63
|
+
!
|
64
|
+
|
|
65
|
+
|-
|
66
|
+
|
|
67
|
+
|
68
|
+
|
|
69
|
+
|-
|
70
|
+
|-
|
71
|
+
|}
|
72
|
+
EOS
|
73
|
+
|
74
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
75
|
+
<tr>
|
76
|
+
<th>
|
77
|
+
<p/>
|
78
|
+
</th>
|
79
|
+
<td>
|
80
|
+
<p/>
|
81
|
+
</td>
|
82
|
+
</tr>
|
83
|
+
<tr>
|
84
|
+
<td>
|
85
|
+
<p/>
|
86
|
+
</td>
|
87
|
+
<td>
|
88
|
+
<p/>
|
89
|
+
</td>
|
90
|
+
</tr>
|
91
|
+
</table>'
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should parse table attributes' do
|
95
|
+
node = parse :table, <<EOS
|
96
|
+
{|
|
97
|
+
| colspan="2" | r1c1
|
98
|
+
| rowspan="1" colspan='3' | r1c2
|
99
|
+
|-
|
100
|
+
|a="b"| r2c1
|
101
|
+
|a="b"c="d" | r2c2
|
102
|
+
|}
|
103
|
+
EOS
|
104
|
+
|
105
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
106
|
+
<tr>
|
107
|
+
<td colspan="2">
|
108
|
+
<p>r1c1</p>
|
109
|
+
</td>
|
110
|
+
<td rowspan="1" colspan="3">
|
111
|
+
<p>r1c2</p>
|
112
|
+
</td>
|
113
|
+
</tr>
|
114
|
+
<tr>
|
115
|
+
<td a="b">
|
116
|
+
<p>r2c1</p>
|
117
|
+
</td>
|
118
|
+
<td a="b" c="d">
|
119
|
+
<p>r2c2</p>
|
120
|
+
</td>
|
121
|
+
</tr>
|
122
|
+
</table>'
|
123
|
+
end
|
124
|
+
|
125
|
+
it 'should allow newlines in table cells' do
|
126
|
+
node = parse :table, <<EOS
|
127
|
+
{|
|
128
|
+
| foo
|
129
|
+
bar
|
130
|
+
|
131
|
+
baz
|
132
|
+
|
|
133
|
+
one
|
134
|
+
two
|
135
|
+
|
136
|
+
three
|
137
|
+
|
|
138
|
+
four
|
139
|
+
|
140
|
+
|-
|
141
|
+
|}
|
142
|
+
EOS
|
143
|
+
|
144
|
+
to_xml(node, "prefix.").should == '<table id="prefix.table0">
|
145
|
+
<tr>
|
146
|
+
<td>
|
147
|
+
<p>foo<eol/>bar<eol/><eol/>baz</p>
|
148
|
+
</td>
|
149
|
+
<td>
|
150
|
+
<p>one<eol/>two<eol/><eol/>three</p>
|
151
|
+
</td>
|
152
|
+
<td>
|
153
|
+
<p>four</p>
|
154
|
+
</td>
|
155
|
+
</tr>
|
156
|
+
</table>'
|
157
|
+
end
|
158
|
+
|
159
|
+
it 'should parse a table in a section' do
|
160
|
+
node = parse :section, <<EOS
|
161
|
+
10. A section title
|
162
|
+
|
163
|
+
Heres a table:
|
164
|
+
|
165
|
+
{|
|
166
|
+
| r1c1
|
167
|
+
| r1c2
|
168
|
+
|-
|
169
|
+
| r2c1
|
170
|
+
| r2c2
|
171
|
+
|}
|
172
|
+
EOS
|
173
|
+
|
174
|
+
xml = to_xml(node)
|
175
|
+
xml.should == '<section id="section-10">
|
176
|
+
<num>10.</num>
|
177
|
+
<heading>A section title</heading>
|
178
|
+
<paragraph id="section-10.paragraph-0">
|
179
|
+
<content>
|
180
|
+
<p>Heres a table:</p>
|
181
|
+
<table id="section-10.paragraph-0.table1">
|
182
|
+
<tr>
|
183
|
+
<td>
|
184
|
+
<p>r1c1</p>
|
185
|
+
</td>
|
186
|
+
<td>
|
187
|
+
<p>r1c2</p>
|
188
|
+
</td>
|
189
|
+
</tr>
|
190
|
+
<tr>
|
191
|
+
<td>
|
192
|
+
<p>r2c1</p>
|
193
|
+
</td>
|
194
|
+
<td>
|
195
|
+
<p>r2c2</p>
|
196
|
+
</td>
|
197
|
+
</tr>
|
198
|
+
</table>
|
199
|
+
</content>
|
200
|
+
</paragraph>
|
201
|
+
</section>'
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'should parse a table in a schedule' do
|
205
|
+
node = parse :schedule, <<EOS
|
206
|
+
Schedule 1
|
207
|
+
|
208
|
+
Heres a table:
|
209
|
+
|
210
|
+
{|
|
211
|
+
| r1c1
|
212
|
+
| r1c2
|
213
|
+
|-
|
214
|
+
| r2c1
|
215
|
+
| r2c2
|
216
|
+
|}
|
217
|
+
EOS
|
218
|
+
|
219
|
+
xml = to_xml(node, "")
|
220
|
+
today = Time.now.strftime('%Y-%m-%d')
|
221
|
+
xml.should == '<component id="component-schedule1">
|
222
|
+
<doc name="schedule1">
|
223
|
+
<meta>
|
224
|
+
<identification source="#slaw">
|
225
|
+
<FRBRWork>
|
226
|
+
<FRBRthis value="/za/act/1980/01/schedule1"/>
|
227
|
+
<FRBRuri value="/za/act/1980/01"/>
|
228
|
+
<FRBRalias value="Schedule 1"/>
|
229
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
230
|
+
<FRBRauthor href="#council"/>
|
231
|
+
<FRBRcountry value="za"/>
|
232
|
+
</FRBRWork>
|
233
|
+
<FRBRExpression>
|
234
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
235
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
236
|
+
<FRBRdate date="1980-01-01" name="Generation"/>
|
237
|
+
<FRBRauthor href="#council"/>
|
238
|
+
<FRBRlanguage language="eng"/>
|
239
|
+
</FRBRExpression>
|
240
|
+
<FRBRManifestation>
|
241
|
+
<FRBRthis value="/za/act/1980/01/eng@/schedule1"/>
|
242
|
+
<FRBRuri value="/za/act/1980/01/eng@"/>
|
243
|
+
<FRBRdate date="' + today + '" name="Generation"/>
|
244
|
+
<FRBRauthor href="#slaw"/>
|
245
|
+
</FRBRManifestation>
|
246
|
+
</identification>
|
247
|
+
</meta>
|
248
|
+
<mainBody>
|
249
|
+
<article id="schedule1">
|
250
|
+
<paragraph id="schedule1.paragraph-0">
|
251
|
+
<content>
|
252
|
+
<p>Heres a table:</p>
|
253
|
+
<table id="schedule1.paragraph-0.table1">
|
254
|
+
<tr>
|
255
|
+
<td>
|
256
|
+
<p>r1c1</p>
|
257
|
+
</td>
|
258
|
+
<td>
|
259
|
+
<p>r1c2</p>
|
260
|
+
</td>
|
261
|
+
</tr>
|
262
|
+
<tr>
|
263
|
+
<td>
|
264
|
+
<p>r2c1</p>
|
265
|
+
</td>
|
266
|
+
<td>
|
267
|
+
<p>r2c2</p>
|
268
|
+
</td>
|
269
|
+
</tr>
|
270
|
+
</table>
|
271
|
+
</content>
|
272
|
+
</paragraph>
|
273
|
+
</article>
|
274
|
+
</mainBody>
|
275
|
+
</doc>
|
276
|
+
</component>'
|
277
|
+
end
|
278
|
+
|
279
|
+
it 'should ignore an escaped table' do
|
280
|
+
node = parse :block_paragraphs, <<EOS
|
281
|
+
\\{|
|
282
|
+
| r1c1
|
283
|
+
| r1c2
|
284
|
+
|}
|
285
|
+
EOS
|
286
|
+
|
287
|
+
to_xml(node).should == '<paragraph id="paragraph-0">
|
288
|
+
<content>
|
289
|
+
<p>{|</p>
|
290
|
+
<p>| r1c1</p>
|
291
|
+
<p>| r1c2</p>
|
292
|
+
<p>|}</p>
|
293
|
+
</content>
|
294
|
+
</paragraph>'
|
295
|
+
end
|
296
|
+
|
297
|
+
it 'should allow a table as part of a subsection' do
|
298
|
+
node = parse :subsection, <<EOS
|
299
|
+
(1) {|
|
300
|
+
| foo
|
301
|
+
|}
|
302
|
+
EOS
|
303
|
+
|
304
|
+
to_xml(node, '', 0).should == '<subsection id="1">
|
305
|
+
<num>(1)</num>
|
306
|
+
<content>
|
307
|
+
<table id="1.table0">
|
308
|
+
<tr>
|
309
|
+
<td>
|
310
|
+
<p>foo</p>
|
311
|
+
</td>
|
312
|
+
</tr>
|
313
|
+
</table>
|
314
|
+
</content>
|
315
|
+
</subsection>'
|
316
|
+
end
|
317
|
+
|
318
|
+
it 'should allow links in a table' do
|
319
|
+
node = parse :table, <<EOS
|
320
|
+
{|
|
321
|
+
| a [link](/a/b) in a table
|
322
|
+
| [link](/a/b) and
|
323
|
+
[[comment]]
|
324
|
+
|}
|
325
|
+
EOS
|
326
|
+
|
327
|
+
to_xml(node, '', 0).should == '<table id="table0">
|
328
|
+
<tr>
|
329
|
+
<td>
|
330
|
+
<p>a <ref href="/a/b">link</ref> in a table</p>
|
331
|
+
</td>
|
332
|
+
<td>
|
333
|
+
<p><ref href="/a/b">link</ref> and<eol/><remark status="editorial">[comment]</remark></p>
|
334
|
+
</td>
|
335
|
+
</tr>
|
336
|
+
</table>'
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slaw
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Greg Kempe
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -136,20 +136,6 @@ dependencies:
|
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: 0.2.2
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: wikicloth
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: 0.8.3
|
146
|
-
type: :runtime
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - "~>"
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: 0.8.3
|
153
139
|
- !ruby/object:Gem::Dependency
|
154
140
|
name: twitter-text
|
155
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +206,7 @@ files:
|
|
220
206
|
- spec/za/act_block_spec.rb
|
221
207
|
- spec/za/act_inline_spec.rb
|
222
208
|
- spec/za/act_schedules_spec.rb
|
209
|
+
- spec/za/act_table_spec.rb
|
223
210
|
homepage: ''
|
224
211
|
licenses:
|
225
212
|
- MIT
|
@@ -257,3 +244,4 @@ test_files:
|
|
257
244
|
- spec/za/act_block_spec.rb
|
258
245
|
- spec/za/act_inline_spec.rb
|
259
246
|
- spec/za/act_schedules_spec.rb
|
247
|
+
- spec/za/act_table_spec.rb
|