slaw 0.10.1 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e97aa37515b9c1ca126afd8a7ede5e1e614ffea
4
- data.tar.gz: c2c85c2ac10c4da253edcde157f8a8b9ae3ad27e
3
+ metadata.gz: d9bf7237f83956f343675d24a55a77043dd4eaed
4
+ data.tar.gz: 4dbde167eb2194d5f8786c2a02b97204085b031d
5
5
  SHA512:
6
- metadata.gz: 34332cfccb221e26870c8e1dfc29238b1471b8d8e7babc6fb7b66cf80faddf10a00b6d2d8b0dd1c001abf26e1940e7d15fafda2e90929630d9901d81562864e4
7
- data.tar.gz: 57c9065e4f9031295ad7082ceed7d69201510939952b072f1dfa8baeadaa32432bc69777b389a651c58aea2b80865a785cea54d29ab3e41d02faf825ec08333b
6
+ metadata.gz: 3e204d080a241d971c49fcf06e4ff17fdd22418fcfaca3238f985c59a710895ce0d5ab61c32fde518f9885806d9c5bbf031b461b940e82c5acff85bd8daa6f3a
7
+ data.tar.gz: ce9e7b1b28a9799ad4d8451a0225be98707fd0ab144f10cae7a61cfb16a20a3720c975d0db08b9b5bfa36802ba874cff73f1d4fa41f46048e3f70453df99e61f
data/README.md CHANGED
@@ -218,6 +218,11 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
218
218
 
219
219
  ## Changelog
220
220
 
221
+ ### 0.11.0
222
+
223
+ * Support newlines in table cells as EOL (or BR in HTML)
224
+ * FIX unparsing of remarks, introduced in 0.10.0
225
+
221
226
  ### 0.10.1
222
227
 
223
228
  * Ensure backslash escaping handles listIntroductions and partial words correctly
@@ -18,13 +18,13 @@ module Slaw
18
18
  s = expand_tabs(s)
19
19
  s = chomp(s)
20
20
  s = enforce_newline(s)
21
- s = remove_boilerplate(s)
22
21
  end
23
22
 
24
23
  # Run deeper introspections and reformat the text, such as
25
24
  # unwrapping/re-wrapping lines. These may not be safe to run
26
25
  # multiple times.
27
26
  def reformat(s)
27
+ s = remove_boilerplate(s)
28
28
  s = unbreak_lines(s)
29
29
  s = break_lines(s)
30
30
  s = strip_toc(s)
@@ -71,7 +71,7 @@ module Slaw
71
71
  .gsub(/^\s*\d+\s*$/, '')\
72
72
  .gsub(/^.*This gazette is also available.*$/, '')\
73
73
  # get rid of date lines
74
- .gsub(/^\d+\s+\w+\s+\d+$/, '')\
74
+ .gsub(/^\d{1,2}\s+\w+\s+\d{4}$/, '')\
75
75
  # get rid of page number lines
76
76
  .gsub(/^\s*page \d+( of \d+)?\s*\n/i, '')\
77
77
  .gsub(/^\s*\d*\s*No\. \d+$/, '')\
@@ -113,4 +113,8 @@
113
113
  </xsl:element>
114
114
  </xsl:template>
115
115
 
116
+ <xsl:template match="a:eol">
117
+ <xsl:element name="br" />
118
+ </xsl:template>
119
+
116
120
  </xsl:stylesheet>
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.10.1"
2
+ VERSION = "0.11.0"
3
3
  end
@@ -342,7 +342,9 @@ module Slaw
342
342
  class Table < Treetop::Runtime::SyntaxNode
343
343
  def to_xml(b, idprefix, i=0)
344
344
  # parse the table using wikicloth
345
- html = WikiCloth::Parser.new({data: self.text_value}).to_html
345
+ # strip whitespace at the start of lines, to avoid wikicloth from treating it as PRE
346
+ text = self.text_value.strip.gsub(/^[ \t]+/, '')
347
+ html = WikiCloth::Parser.new({data: text}).to_html
346
348
 
347
349
  # we need to strip any surrounding p tags and add
348
350
  # an id to the table
@@ -355,12 +357,23 @@ module Slaw
355
357
  p = Nokogiri::XML::Node.new("p", html)
356
358
  p.children = cell.children
357
359
  p.parent = cell
360
+
361
+ # replace newlines with <eol>
362
+ p.search("text()").each do |text|
363
+ lines = text.content.strip.split(/\n/)
364
+ text.content = lines.shift
365
+
366
+ for line in lines
367
+ eol = text.add_next_sibling(Nokogiri::XML::Node.new("eol", html))
368
+ text = eol.add_next_sibling(Nokogiri::XML::Text.new(line, html))
369
+ end
370
+ end
358
371
  end
359
372
 
360
373
  table.xpath('//text()[1]').each{ |t| t.content = t.content.lstrip }
361
374
  table.xpath('//text()[last()]').each{ |t| t.content = t.content.rstrip }
362
375
 
363
- b << table.to_html
376
+ b.parent << table
364
377
  end
365
378
  end
366
379
 
@@ -103,21 +103,13 @@
103
103
  </xsl:template>
104
104
 
105
105
  <xsl:template match="a:p">
106
- <xsl:call-template name="escape">
107
- <xsl:with-param name="value" select="." />
108
- </xsl:call-template>
106
+ <xsl:apply-templates/>
109
107
  <!-- p tags must end with a newline -->
110
108
  <xsl:text>
111
109
 
112
110
  </xsl:text>
113
111
  </xsl:template>
114
112
 
115
- <xsl:template match="a:listIntroduction|a:intro">
116
- <xsl:call-template name="escape">
117
- <xsl:with-param name="value" select="." />
118
- </xsl:call-template>
119
- </xsl:template>
120
-
121
113
  <xsl:template match="a:blockList">
122
114
  <xsl:if test="a:listIntroduction != ''">
123
115
  <xsl:apply-templates select="a:listIntroduction" />
@@ -144,6 +136,13 @@
144
136
  <xsl:apply-templates select="./*[not(self::a:intro)]" />
145
137
  </xsl:template>
146
138
 
139
+ <!-- first text nodes of these elems must be escaped if they have special chars -->
140
+ <xsl:template match="a:p/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
141
+ <xsl:call-template name="escape">
142
+ <xsl:with-param name="value" select="." />
143
+ </xsl:call-template>
144
+ </xsl:template>
145
+
147
146
  <!-- components/schedules -->
148
147
  <xsl:template match="a:doc">
149
148
  <xsl:text>Schedule - </xsl:text>
@@ -231,6 +230,11 @@
231
230
  <xsl:text>]</xsl:text>
232
231
  </xsl:template>
233
232
 
233
+ <xsl:template match="a:eol">
234
+ <xsl:text>
235
+ </xsl:text>
236
+ </xsl:template>
237
+
234
238
 
235
239
  <!-- for most nodes, just dump their text content -->
236
240
  <xsl:template match="*">
data/slaw.gemspec CHANGED
@@ -29,4 +29,7 @@ Gem::Specification.new do |spec|
29
29
  spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
30
30
  spec.add_runtime_dependency 'yomu', '~> 0.2.2'
31
31
  spec.add_runtime_dependency 'wikicloth', '~> 0.8.3'
32
+ # anchor twitter-text to avoid bug in 1.14.3
33
+ # https://github.com/twitter/twitter-text/issues/162
34
+ spec.add_runtime_dependency 'twitter-text', '~> 1.12.0'
32
35
  end
@@ -118,7 +118,7 @@ PREFACE not escaped
118
118
  <paragraph id="section-9.paragraph-0">
119
119
  <content>
120
120
  <blockList id="section-9.paragraph-0.list1">
121
- <listIntroduction>(2) A special meeting:</listIntroduction>
121
+ <listIntroduction>(2) A special meeting <remark>[ foo ]</remark>:</listIntroduction>
122
122
  <item id="section-9.paragraph-0.list1.a">
123
123
  <num>(a)</num>
124
124
  <p>the chairperson so directs; or</p>
@@ -136,12 +136,75 @@ XML
136
136
  text = subject.text_from_act(doc)
137
137
  text.should == '1. Section
138
138
 
139
- \(2) A special meeting:
139
+ \(2) A special meeting [[ foo ]]:
140
140
 
141
141
  (a) the chairperson so directs; or
142
142
 
143
143
  (b) a majority of the members
144
144
 
145
+ '
146
+ end
147
+
148
+ it 'should unparse remarks correctly' do
149
+ doc = xml2doc(section(<<XML
150
+ <num>1.</num>
151
+ <paragraph id="section-19.paragraph-0">
152
+ <content>
153
+ <p>
154
+ <remark status="editorial">[ foo ]</remark>
155
+ </p>
156
+ <p>Section 1 <remark status="editorial">[ foo ]</remark></p>
157
+ </content>
158
+ </paragraph>
159
+ XML
160
+ ))
161
+
162
+ text = subject.text_from_act(doc)
163
+ text.should == '1.
164
+
165
+ [[ foo ]]
166
+
167
+ Section 1 [[ foo ]]
168
+
169
+ '
170
+ end
171
+
172
+ it 'should replace eol with newlines in tables' do
173
+ doc = xml2doc(section(<<XML
174
+ <num>1.</num>
175
+ <table id="section-21.paragraph-0.table1">
176
+ <tr>
177
+ <td>
178
+ <p>foo<eol/>bar<eol/>baz</p>
179
+ </td>
180
+ <td>
181
+ <p>
182
+ one<eol/>two<eol/>three
183
+
184
+ </p>
185
+ </td>
186
+ </tr>
187
+ </table>'
188
+ XML
189
+ ))
190
+
191
+ text = subject.text_from_act(doc)
192
+ text.should == '1.
193
+
194
+ {|
195
+ |-
196
+ | foo
197
+ bar
198
+ baz
199
+ |
200
+ one
201
+ two
202
+ three
203
+
204
+
205
+ |-
206
+ |}
207
+
145
208
  '
146
209
  end
147
210
  end
data/spec/za/act_spec.rb CHANGED
@@ -1876,6 +1876,32 @@ EOS
1876
1876
  <td><p>r2c2</p></td></tr></table>'
1877
1877
  end
1878
1878
 
1879
+ it 'should allow newlines in table cells' do
1880
+ node = parse :table, <<EOS
1881
+ {|
1882
+ | foo
1883
+ bar
1884
+
1885
+ baz
1886
+ |
1887
+ one
1888
+ two
1889
+
1890
+ three
1891
+ |
1892
+ four
1893
+
1894
+ |-
1895
+ |}
1896
+ EOS
1897
+
1898
+ to_xml(node, "prefix.").should == '<table id="prefix.table0">
1899
+ <tr><td><p>foo<eol/>bar<eol/><eol/>baz</p></td>
1900
+ <td><p>one<eol/>two<eol/><eol/>three</p></td>
1901
+ <td><p>four</p></td></tr>
1902
+ </table>'
1903
+ end
1904
+
1879
1905
  it 'should parse a table in a section' do
1880
1906
  node = parse :section, <<EOS
1881
1907
  10. A section title
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-18 00:00:00.000000000 Z
11
+ date: 2017-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: 0.8.3
153
+ - !ruby/object:Gem::Dependency
154
+ name: twitter-text
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 1.12.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 1.12.0
153
167
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
154
168
  acts from plain text and PDF documents.
155
169
  email: