slaw 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e97aa37515b9c1ca126afd8a7ede5e1e614ffea
4
- data.tar.gz: c2c85c2ac10c4da253edcde157f8a8b9ae3ad27e
3
+ metadata.gz: d9bf7237f83956f343675d24a55a77043dd4eaed
4
+ data.tar.gz: 4dbde167eb2194d5f8786c2a02b97204085b031d
5
5
  SHA512:
6
- metadata.gz: 34332cfccb221e26870c8e1dfc29238b1471b8d8e7babc6fb7b66cf80faddf10a00b6d2d8b0dd1c001abf26e1940e7d15fafda2e90929630d9901d81562864e4
7
- data.tar.gz: 57c9065e4f9031295ad7082ceed7d69201510939952b072f1dfa8baeadaa32432bc69777b389a651c58aea2b80865a785cea54d29ab3e41d02faf825ec08333b
6
+ metadata.gz: 3e204d080a241d971c49fcf06e4ff17fdd22418fcfaca3238f985c59a710895ce0d5ab61c32fde518f9885806d9c5bbf031b461b940e82c5acff85bd8daa6f3a
7
+ data.tar.gz: ce9e7b1b28a9799ad4d8451a0225be98707fd0ab144f10cae7a61cfb16a20a3720c975d0db08b9b5bfa36802ba874cff73f1d4fa41f46048e3f70453df99e61f
data/README.md CHANGED
@@ -218,6 +218,11 @@ Akoma Ntoso `component` elements at the end of the XML document, with a name of
218
218
 
219
219
  ## Changelog
220
220
 
221
+ ### 0.11.0
222
+
223
+ * Support newlines in table cells as EOL (or BR in HTML)
224
+ * FIX unparsing of remarks, introduced in 0.10.0
225
+
221
226
  ### 0.10.1
222
227
 
223
228
  * Ensure backslash escaping handles listIntroductions and partial words correctly
@@ -18,13 +18,13 @@ module Slaw
18
18
  s = expand_tabs(s)
19
19
  s = chomp(s)
20
20
  s = enforce_newline(s)
21
- s = remove_boilerplate(s)
22
21
  end
23
22
 
24
23
  # Run deeper introspections and reformat the text, such as
25
24
  # unwrapping/re-wrapping lines. These may not be safe to run
26
25
  # multiple times.
27
26
  def reformat(s)
27
+ s = remove_boilerplate(s)
28
28
  s = unbreak_lines(s)
29
29
  s = break_lines(s)
30
30
  s = strip_toc(s)
@@ -71,7 +71,7 @@ module Slaw
71
71
  .gsub(/^\s*\d+\s*$/, '')\
72
72
  .gsub(/^.*This gazette is also available.*$/, '')\
73
73
  # get rid of date lines
74
- .gsub(/^\d+\s+\w+\s+\d+$/, '')\
74
+ .gsub(/^\d{1,2}\s+\w+\s+\d{4}$/, '')\
75
75
  # get rid of page number lines
76
76
  .gsub(/^\s*page \d+( of \d+)?\s*\n/i, '')\
77
77
  .gsub(/^\s*\d*\s*No\. \d+$/, '')\
@@ -113,4 +113,8 @@
113
113
  </xsl:element>
114
114
  </xsl:template>
115
115
 
116
+ <xsl:template match="a:eol">
117
+ <xsl:element name="br" />
118
+ </xsl:template>
119
+
116
120
  </xsl:stylesheet>
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.10.1"
2
+ VERSION = "0.11.0"
3
3
  end
@@ -342,7 +342,9 @@ module Slaw
342
342
  class Table < Treetop::Runtime::SyntaxNode
343
343
  def to_xml(b, idprefix, i=0)
344
344
  # parse the table using wikicloth
345
- html = WikiCloth::Parser.new({data: self.text_value}).to_html
345
+ # strip whitespace at the start of lines, to avoid wikicloth from treating it as PRE
346
+ text = self.text_value.strip.gsub(/^[ \t]+/, '')
347
+ html = WikiCloth::Parser.new({data: text}).to_html
346
348
 
347
349
  # we need to strip any surrounding p tags and add
348
350
  # an id to the table
@@ -355,12 +357,23 @@ module Slaw
355
357
  p = Nokogiri::XML::Node.new("p", html)
356
358
  p.children = cell.children
357
359
  p.parent = cell
360
+
361
+ # replace newlines with <eol>
362
+ p.search("text()").each do |text|
363
+ lines = text.content.strip.split(/\n/)
364
+ text.content = lines.shift
365
+
366
+ for line in lines
367
+ eol = text.add_next_sibling(Nokogiri::XML::Node.new("eol", html))
368
+ text = eol.add_next_sibling(Nokogiri::XML::Text.new(line, html))
369
+ end
370
+ end
358
371
  end
359
372
 
360
373
  table.xpath('//text()[1]').each{ |t| t.content = t.content.lstrip }
361
374
  table.xpath('//text()[last()]').each{ |t| t.content = t.content.rstrip }
362
375
 
363
- b << table.to_html
376
+ b.parent << table
364
377
  end
365
378
  end
366
379
 
@@ -103,21 +103,13 @@
103
103
  </xsl:template>
104
104
 
105
105
  <xsl:template match="a:p">
106
- <xsl:call-template name="escape">
107
- <xsl:with-param name="value" select="." />
108
- </xsl:call-template>
106
+ <xsl:apply-templates/>
109
107
  <!-- p tags must end with a newline -->
110
108
  <xsl:text>
111
109
 
112
110
  </xsl:text>
113
111
  </xsl:template>
114
112
 
115
- <xsl:template match="a:listIntroduction|a:intro">
116
- <xsl:call-template name="escape">
117
- <xsl:with-param name="value" select="." />
118
- </xsl:call-template>
119
- </xsl:template>
120
-
121
113
  <xsl:template match="a:blockList">
122
114
  <xsl:if test="a:listIntroduction != ''">
123
115
  <xsl:apply-templates select="a:listIntroduction" />
@@ -144,6 +136,13 @@
144
136
  <xsl:apply-templates select="./*[not(self::a:intro)]" />
145
137
  </xsl:template>
146
138
 
139
+ <!-- first text nodes of these elems must be escaped if they have special chars -->
140
+ <xsl:template match="a:p/text()[1] | a:listIntroduction/text()[1] | a:intro/text()[1]">
141
+ <xsl:call-template name="escape">
142
+ <xsl:with-param name="value" select="." />
143
+ </xsl:call-template>
144
+ </xsl:template>
145
+
147
146
  <!-- components/schedules -->
148
147
  <xsl:template match="a:doc">
149
148
  <xsl:text>Schedule - </xsl:text>
@@ -231,6 +230,11 @@
231
230
  <xsl:text>]</xsl:text>
232
231
  </xsl:template>
233
232
 
233
+ <xsl:template match="a:eol">
234
+ <xsl:text>
235
+ </xsl:text>
236
+ </xsl:template>
237
+
234
238
 
235
239
  <!-- for most nodes, just dump their text content -->
236
240
  <xsl:template match="*">
data/slaw.gemspec CHANGED
@@ -29,4 +29,7 @@ Gem::Specification.new do |spec|
29
29
  spec.add_runtime_dependency "mimemagic", "~> 0.2.1"
30
30
  spec.add_runtime_dependency 'yomu', '~> 0.2.2'
31
31
  spec.add_runtime_dependency 'wikicloth', '~> 0.8.3'
32
+ # anchor twitter-text to avoid bug in 1.14.3
33
+ # https://github.com/twitter/twitter-text/issues/162
34
+ spec.add_runtime_dependency 'twitter-text', '~> 1.12.0'
32
35
  end
@@ -118,7 +118,7 @@ PREFACE not escaped
118
118
  <paragraph id="section-9.paragraph-0">
119
119
  <content>
120
120
  <blockList id="section-9.paragraph-0.list1">
121
- <listIntroduction>(2) A special meeting:</listIntroduction>
121
+ <listIntroduction>(2) A special meeting <remark>[ foo ]</remark>:</listIntroduction>
122
122
  <item id="section-9.paragraph-0.list1.a">
123
123
  <num>(a)</num>
124
124
  <p>the chairperson so directs; or</p>
@@ -136,12 +136,75 @@ XML
136
136
  text = subject.text_from_act(doc)
137
137
  text.should == '1. Section
138
138
 
139
- \(2) A special meeting:
139
+ \(2) A special meeting [[ foo ]]:
140
140
 
141
141
  (a) the chairperson so directs; or
142
142
 
143
143
  (b) a majority of the members
144
144
 
145
+ '
146
+ end
147
+
148
+ it 'should unparse remarks correctly' do
149
+ doc = xml2doc(section(<<XML
150
+ <num>1.</num>
151
+ <paragraph id="section-19.paragraph-0">
152
+ <content>
153
+ <p>
154
+ <remark status="editorial">[ foo ]</remark>
155
+ </p>
156
+ <p>Section 1 <remark status="editorial">[ foo ]</remark></p>
157
+ </content>
158
+ </paragraph>
159
+ XML
160
+ ))
161
+
162
+ text = subject.text_from_act(doc)
163
+ text.should == '1.
164
+
165
+ [[ foo ]]
166
+
167
+ Section 1 [[ foo ]]
168
+
169
+ '
170
+ end
171
+
172
+ it 'should replace eol with newlines in tables' do
173
+ doc = xml2doc(section(<<XML
174
+ <num>1.</num>
175
+ <table id="section-21.paragraph-0.table1">
176
+ <tr>
177
+ <td>
178
+ <p>foo<eol/>bar<eol/>baz</p>
179
+ </td>
180
+ <td>
181
+ <p>
182
+ one<eol/>two<eol/>three
183
+
184
+ </p>
185
+ </td>
186
+ </tr>
187
+ </table>'
188
+ XML
189
+ ))
190
+
191
+ text = subject.text_from_act(doc)
192
+ text.should == '1.
193
+
194
+ {|
195
+ |-
196
+ | foo
197
+ bar
198
+ baz
199
+ |
200
+ one
201
+ two
202
+ three
203
+
204
+
205
+ |-
206
+ |}
207
+
145
208
  '
146
209
  end
147
210
  end
data/spec/za/act_spec.rb CHANGED
@@ -1876,6 +1876,32 @@ EOS
1876
1876
  <td><p>r2c2</p></td></tr></table>'
1877
1877
  end
1878
1878
 
1879
+ it 'should allow newlines in table cells' do
1880
+ node = parse :table, <<EOS
1881
+ {|
1882
+ | foo
1883
+ bar
1884
+
1885
+ baz
1886
+ |
1887
+ one
1888
+ two
1889
+
1890
+ three
1891
+ |
1892
+ four
1893
+
1894
+ |-
1895
+ |}
1896
+ EOS
1897
+
1898
+ to_xml(node, "prefix.").should == '<table id="prefix.table0">
1899
+ <tr><td><p>foo<eol/>bar<eol/><eol/>baz</p></td>
1900
+ <td><p>one<eol/>two<eol/><eol/>three</p></td>
1901
+ <td><p>four</p></td></tr>
1902
+ </table>'
1903
+ end
1904
+
1879
1905
  it 'should parse a table in a section' do
1880
1906
  node = parse :section, <<EOS
1881
1907
  10. A section title
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-18 00:00:00.000000000 Z
11
+ date: 2017-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
152
  version: 0.8.3
153
+ - !ruby/object:Gem::Dependency
154
+ name: twitter-text
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 1.12.0
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 1.12.0
153
167
  description: Slaw is a lightweight library for rendering and generating Akoma Ntoso
154
168
  acts from plain text and PDF documents.
155
169
  email: