sablon 0.0.18 → 0.0.19.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Sablon
3
+ module Processor
4
+ class Document
5
+ def self.process(xml_node, context, properties = {})
6
+ processor = new(parser)
7
+ processor.manipulate xml_node, Sablon::Context.transform(context)
8
+ processor.write_properties xml_node, properties if properties.any?
9
+ xml_node
10
+ end
11
+
12
+ def self.parser
13
+ @parser ||= Sablon::Parser::MailMerge.new
14
+ end
15
+
16
+ def initialize(parser)
17
+ @parser = parser
18
+ end
19
+
20
+ def manipulate(xml_node, context)
21
+ operations = build_operations(@parser.parse_fields(xml_node))
22
+ operations.each do |step|
23
+ step.evaluate context
24
+ end
25
+ cleanup(xml_node)
26
+ xml_node
27
+ end
28
+
29
+ def write_properties(xml_node, properties)
30
+ if start_page_number = properties[:start_page_number] || properties["start_page_number"]
31
+ section_properties = SectionProperties.from_document(xml_node)
32
+ section_properties.start_page_number = start_page_number
33
+ end
34
+ end
35
+
36
+ private
37
+ def build_operations(fields)
38
+ OperationConstruction.new(fields).operations
39
+ end
40
+
41
+ def cleanup(xml_node)
42
+ fill_empty_table_cells xml_node
43
+ end
44
+
45
+ def fill_empty_table_cells(xml_node)
46
+ xml_node.xpath("//w:tc[count(*[name() = 'w:p'])=0 or not(*)]").each do |blank_cell|
47
+ filler = Nokogiri::XML::Node.new("w:p", xml_node.document)
48
+ blank_cell.add_child filler
49
+ end
50
+ end
51
+
52
+ class Block < Struct.new(:start_field, :end_field)
53
+ def self.enclosed_by(start_field, end_field)
54
+ @blocks ||= [RowBlock, ParagraphBlock, InlineParagraphBlock]
55
+ block_class = @blocks.detect { |klass| klass.encloses?(start_field, end_field) }
56
+ block_class.new start_field, end_field
57
+ end
58
+
59
+ def process(context)
60
+ replaced_node = Nokogiri::XML::Node.new("tmp", start_node.document)
61
+ replaced_node.children = Nokogiri::XML::NodeSet.new(start_node.document, body.map(&:dup))
62
+ Processor::Document.process replaced_node, context
63
+ replaced_node.children
64
+ end
65
+
66
+ def replace(content)
67
+ content.each { |n| start_node.add_next_sibling n }
68
+ remove_control_elements
69
+ end
70
+
71
+ def remove_control_elements
72
+ body.each &:remove
73
+ start_node.remove
74
+ end_node.remove
75
+ end
76
+
77
+ def body
78
+ return @body if defined?(@body)
79
+ @body = []
80
+ node = start_node
81
+ while (node = node.next_element) && node != end_node
82
+ @body << node
83
+ end
84
+ @body
85
+ end
86
+
87
+ def start_node
88
+ @start_node ||= self.class.parent(start_field).first
89
+ end
90
+
91
+ def end_node
92
+ @end_node ||= self.class.parent(end_field).first
93
+ end
94
+
95
+ def self.encloses?(start_field, end_field)
96
+ parent(start_field).any? && parent(end_field).any?
97
+ end
98
+ end
99
+
100
+ class RowBlock < Block
101
+ def self.parent(node)
102
+ node.ancestors ".//w:tr"
103
+ end
104
+
105
+ def self.encloses?(start_field, end_field)
106
+ super && parent(start_field) != parent(end_field)
107
+ end
108
+ end
109
+
110
+ class ParagraphBlock < Block
111
+ def self.parent(node)
112
+ node.ancestors ".//w:p"
113
+ end
114
+
115
+ def self.encloses?(start_field, end_field)
116
+ super && parent(start_field) != parent(end_field)
117
+ end
118
+ end
119
+
120
+ class InlineParagraphBlock < Block
121
+ def self.parent(node)
122
+ node.ancestors ".//w:p"
123
+ end
124
+
125
+ def remove_control_elements
126
+ body.each &:remove
127
+ start_field.remove
128
+ end_field.remove
129
+ end
130
+
131
+ def start_node
132
+ @start_node ||= start_field.end_node
133
+ end
134
+
135
+ def end_node
136
+ @end_node ||= end_field.start_node
137
+ end
138
+
139
+ def self.encloses?(start_field, end_field)
140
+ super && parent(start_field) == parent(end_field)
141
+ end
142
+ end
143
+
144
+ class OperationConstruction
145
+ def initialize(fields)
146
+ @fields = fields
147
+ @operations = []
148
+ end
149
+
150
+ def operations
151
+ while @fields.any?
152
+ @operations << consume(true)
153
+ end
154
+ @operations.compact
155
+ end
156
+
157
+ def consume(allow_insertion)
158
+ @field = @fields.shift
159
+ return unless @field
160
+ case @field.expression
161
+ when /^=/
162
+ if allow_insertion
163
+ Statement::Insertion.new(Expression.parse(@field.expression[1..-1]), @field)
164
+ end
165
+ when /([^ ]+):each\(([^ ]+)\)/
166
+ block = consume_block("#{$1}:endEach")
167
+ Statement::Loop.new(Expression.parse($1), $2, block)
168
+ when /([^ ]+):if\(([^)]+)\)/
169
+ block = consume_block("#{$1}:endIf")
170
+ Statement::Condition.new(Expression.parse($1), block, $2)
171
+ when /([^ ]+):if/
172
+ block = consume_block("#{$1}:endIf")
173
+ Statement::Condition.new(Expression.parse($1), block)
174
+ end
175
+ end
176
+
177
+ def consume_block(end_expression)
178
+ start_field = end_field = @field
179
+ while end_field && end_field.expression != end_expression
180
+ consume(false)
181
+ end_field = @field
182
+ end
183
+
184
+ if end_field
185
+ Block.enclosed_by start_field, end_field
186
+ else
187
+ raise TemplateError, "Could not find end field for «#{start_field.expression}». Was looking for «#{end_expression}»"
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,47 @@
1
+ module Sablon
2
+ module Processor
3
+ class Numbering
4
+ LIST_DEFINITION = <<-XML.gsub(/^\s+/, '').tr("\n", '')
5
+ <w:num w:numId="%s">
6
+ <w:abstractNumId w:val="%s" />
7
+ </w:num>
8
+ XML
9
+
10
+ def self.process(doc)
11
+ processor = new(doc)
12
+ processor.manipulate
13
+ doc
14
+ end
15
+
16
+ def initialize(doc)
17
+ @doc = doc
18
+ end
19
+
20
+ def manipulate
21
+ Sablon::Numbering.instance.definitions.each do |definition|
22
+ abstract_num_ref = find_definition(definition.style)
23
+ abstract_num_copy = abstract_num_ref.dup
24
+ abstract_num_copy['w:abstractNumId'] = definition.numid
25
+ abstract_num_copy.xpath('./w:nsid').each(&:remove)
26
+ container.prepend_child abstract_num_copy
27
+ container.add_child(LIST_DEFINITION % [definition.numid, abstract_num_copy['w:abstractNumId']])
28
+ end
29
+ @doc
30
+ end
31
+
32
+ private
33
+ def container
34
+ @container ||= @doc.xpath('//w:numbering').first
35
+ end
36
+
37
+ def find_definition(style)
38
+ abstract_num = @doc.xpath("//w:abstractNum[descendant-or-self::*[w:pStyle[@w:val='#{style}']]]").first
39
+ if abstract_num
40
+ abstract_num
41
+ else
42
+ raise ArgumentError, "Could not find w:abstractNum definition for style: #{style.inspect}"
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,5 @@
1
1
  module Sablon
2
- class Processor
2
+ module Processor
3
3
  class SectionProperties
4
4
  def self.from_document(document_xml)
5
5
  new document_xml.at_xpath(".//w:sectPr")
@@ -18,15 +18,19 @@ module Sablon
18
18
 
19
19
  private
20
20
  def render(context, properties = {})
21
+ Sablon::Numbering.instance.reset!
22
+ Zip.sort_entries = true # required to process document.xml before numbering.xml
21
23
  Zip::OutputStream.write_buffer(StringIO.new) do |out|
22
24
  Zip::File.open(@path).each do |entry|
23
25
  entry_name = entry.name
24
26
  out.put_next_entry(entry_name)
25
27
  content = entry.get_input_stream.read
26
28
  if entry_name == 'word/document.xml'
27
- out.write(process(content, context, properties))
29
+ out.write(process(Processor::Document, content, context, properties))
28
30
  elsif entry_name =~ /word\/header\d*\.xml/ || entry_name =~ /word\/footer\d*\.xml/
29
- out.write(process(content, context))
31
+ out.write(process(Processor::Document, content, context))
32
+ elsif entry_name == 'word/numbering.xml'
33
+ out.write(process(Processor::Numbering, content))
30
34
  else
31
35
  out.write(content)
32
36
  end
@@ -38,9 +42,9 @@ module Sablon
38
42
  #
39
43
  # IMPORTANT: Open Office does not ignore whitespace around tags.
40
44
  # We need to render the xml without indent and whitespace.
41
- def process(content, context, *args)
45
+ def process(processor, content, *args)
42
46
  document = Nokogiri::XML(content)
43
- Processor.process(document, context, *args).to_xml(indent: 0, save_with: 0)
47
+ processor.process(document, *args).to_xml(indent: 0, save_with: 0)
44
48
  end
45
49
  end
46
50
  end
@@ -1,3 +1,3 @@
1
1
  module Sablon
2
- VERSION = "0.0.18"
2
+ VERSION = "0.0.19.beta1"
3
3
  end
Binary file
@@ -0,0 +1,303 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "test_helper"
3
+
4
+ class HTMLConverterTest < Sablon::TestCase
5
+ def setup
6
+ super
7
+ @converter = Sablon::HTMLConverter.new
8
+ end
9
+
10
+ def test_convert_text_inside_div
11
+ input = '<div>Lorem ipsum dolor sit amet</div>'
12
+ expected_output = <<-DOCX.strip
13
+ <w:p>
14
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
15
+ <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
16
+ </w:p>
17
+ DOCX
18
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
19
+ end
20
+
21
+ def test_convert_text_inside_p
22
+ input = '<p>Lorem ipsum dolor sit amet</p>'
23
+ expected_output = <<-DOCX.strip
24
+ <w:p>
25
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
26
+ <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
27
+ </w:p>
28
+ DOCX
29
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
30
+ end
31
+
32
+ def test_convert_text_inside_multiple_divs
33
+ input = '<div>Lorem ipsum</div><div>dolor sit amet</div>'
34
+ expected_output = <<-DOCX.strip
35
+ <w:p>
36
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
37
+ <w:r><w:t xml:space="preserve">Lorem ipsum</w:t></w:r>
38
+ </w:p>
39
+ <w:p>
40
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
41
+ <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
42
+ </w:p>
43
+ DOCX
44
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
45
+ end
46
+
47
+ def test_convert_newline_inside_div
48
+ input = '<div>Lorem ipsum<br>dolor sit amet</div>'
49
+ expected_output = <<-DOCX.strip
50
+ <w:p>
51
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
52
+ <w:r><w:t xml:space="preserve">Lorem ipsum</w:t></w:r>
53
+ <w:r><w:br/></w:r>
54
+ <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
55
+ </w:p>
56
+ DOCX
57
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
58
+ end
59
+
60
+ def test_convert_strong_tags_inside_div
61
+ input = '<div>Lorem&nbsp;<strong>ipsum dolor</strong>&nbsp;sit amet</div>'
62
+ expected_output = <<-DOCX.strip
63
+ <w:p>
64
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
65
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
66
+ <w:r><w:rPr><w:b /></w:rPr><w:t xml:space="preserve">ipsum dolor</w:t></w:r>
67
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
68
+ </w:p>
69
+ DOCX
70
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
71
+ end
72
+
73
+ def test_convert_em_tags_inside_div
74
+ input = '<div>Lorem&nbsp;<em>ipsum dolor</em>&nbsp;sit amet</div>'
75
+ expected_output = <<-DOCX.strip
76
+ <w:p>
77
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
78
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
79
+ <w:r><w:rPr><w:i /></w:rPr><w:t xml:space="preserve">ipsum dolor</w:t></w:r>
80
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
81
+ </w:p>
82
+ DOCX
83
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
84
+ end
85
+
86
+ def test_unorderd_lists
87
+ input = '<ul><li>Lorem</li><li>ipsum</li><li>dolor</li></ul>'
88
+ expected_output = <<-DOCX.strip
89
+ <w:p>
90
+ <w:pPr>
91
+ <w:pStyle w:val="ListBullet" />
92
+ <w:numPr>
93
+ <w:ilvl w:val="0" />
94
+ <w:numId w:val="1001" />
95
+ </w:numPr>
96
+ </w:pPr>
97
+ <w:r><w:t xml:space="preserve">Lorem</w:t></w:r>
98
+ </w:p>
99
+
100
+ <w:p>
101
+ <w:pPr>
102
+ <w:pStyle w:val="ListBullet" />
103
+ <w:numPr>
104
+ <w:ilvl w:val="0" />
105
+ <w:numId w:val="1001" />
106
+ </w:numPr>
107
+ </w:pPr>
108
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
109
+ </w:p>
110
+
111
+ <w:p>
112
+ <w:pPr>
113
+ <w:pStyle w:val="ListBullet" />
114
+ <w:numPr>
115
+ <w:ilvl w:val="0" />
116
+ <w:numId w:val="1001" />
117
+ </w:numPr>
118
+ </w:pPr>
119
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
120
+ </w:p>
121
+ DOCX
122
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
123
+
124
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
125
+ end
126
+
127
+ def test_ordered_lists
128
+ input = '<ol><li>Lorem</li><li>ipsum</li><li>dolor</li></ol>'
129
+ expected_output = <<-DOCX.strip
130
+ <w:p>
131
+ <w:pPr>
132
+ <w:pStyle w:val="ListNumber" />
133
+ <w:numPr>
134
+ <w:ilvl w:val="0" />
135
+ <w:numId w:val="1001" />
136
+ </w:numPr>
137
+ </w:pPr>
138
+ <w:r><w:t xml:space="preserve">Lorem</w:t></w:r>
139
+ </w:p>
140
+
141
+ <w:p>
142
+ <w:pPr>
143
+ <w:pStyle w:val="ListNumber" />
144
+ <w:numPr>
145
+ <w:ilvl w:val="0" />
146
+ <w:numId w:val="1001" />
147
+ </w:numPr>
148
+ </w:pPr>
149
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
150
+ </w:p>
151
+
152
+ <w:p>
153
+ <w:pPr>
154
+ <w:pStyle w:val="ListNumber" />
155
+ <w:numPr>
156
+ <w:ilvl w:val="0" />
157
+ <w:numId w:val="1001" />
158
+ </w:numPr>
159
+ </w:pPr>
160
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
161
+ </w:p>
162
+ DOCX
163
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
164
+
165
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber')], Sablon::Numbering.instance.definitions
166
+ end
167
+
168
+ def test_mixed_lists
169
+ input = '<ol><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li></ol>'
170
+ expected_output = <<-DOCX.strip
171
+ <w:p>
172
+ <w:pPr>
173
+ <w:pStyle w:val="ListNumber" />
174
+ <w:numPr>
175
+ <w:ilvl w:val="0" />
176
+ <w:numId w:val="1001" />
177
+ </w:numPr>
178
+ </w:pPr>
179
+ <w:r><w:t xml:space=\"preserve\">Lorem</w:t></w:r>
180
+ </w:p>
181
+
182
+ <w:p>
183
+ <w:pPr>
184
+ <w:pStyle w:val="ListBullet" />
185
+ <w:numPr>
186
+ <w:ilvl w:val="0" />
187
+ <w:numId w:val="1002" />
188
+ </w:numPr>
189
+ </w:pPr>
190
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
191
+ </w:p>
192
+
193
+ <w:p>
194
+ <w:pPr>
195
+ <w:pStyle w:val="ListNumber" />
196
+ <w:numPr>
197
+ <w:ilvl w:val="0" />
198
+ <w:numId w:val="1003" />
199
+ </w:numPr>
200
+ </w:pPr>
201
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
202
+ </w:p>
203
+ DOCX
204
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
205
+
206
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber'),
207
+ Sablon::Numbering::Definition.new(1002, 'ListBullet'),
208
+ Sablon::Numbering::Definition.new(1003, 'ListNumber')], Sablon::Numbering.instance.definitions
209
+ end
210
+
211
+ def test_nested_unordered_lists
212
+ input = '<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>'
213
+ expected_output = <<-DOCX.strip
214
+ <w:p>
215
+ <w:pPr>
216
+ <w:pStyle w:val="ListBullet" />
217
+ <w:numPr>
218
+ <w:ilvl w:val="0" />
219
+ <w:numId w:val="1001" />
220
+ </w:numPr>
221
+ </w:pPr>
222
+ <w:r><w:t xml:space="preserve">Lorem</w:t></w:r>
223
+ </w:p>
224
+
225
+ <w:p>
226
+ <w:pPr>
227
+ <w:pStyle w:val="ListBullet" />
228
+ <w:numPr>
229
+ <w:ilvl w:val="1" />
230
+ <w:numId w:val="1001" />
231
+ </w:numPr>
232
+ </w:pPr>
233
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
234
+ </w:p>
235
+
236
+ <w:p>
237
+ <w:pPr>
238
+ <w:pStyle w:val="ListBullet" />
239
+ <w:numPr>
240
+ <w:ilvl w:val="2" />
241
+ <w:numId w:val="1001" />
242
+ </w:numPr>
243
+ </w:pPr>
244
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
245
+ </w:p>
246
+ DOCX
247
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
248
+
249
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
250
+ end
251
+
252
+ private
253
+ def normalize_wordml(wordml)
254
+ wordml.gsub(/^\s+/, '').tr("\n", '')
255
+ end
256
+ end
257
+
258
+ class HTMLConverterASTTest < Sablon::TestCase
259
+ def setup
260
+ super
261
+ @converter = Sablon::HTMLConverter.new
262
+ end
263
+
264
+ def test_div
265
+ input = '<div>Lorem ipsum dolor sit amet</div>'
266
+ ast = @converter.processed_ast(input).to_a
267
+ assert_equal [Sablon::HTMLConverter::Paragraph], ast.map(&:class)
268
+ assert_equal ['Paragraph'], ast.map(&:style)
269
+ end
270
+
271
+
272
+ def test_ul
273
+ input = '<ul><li>Lorem</li><li>ipsum</li></ul>'
274
+ ast = @converter.processed_ast(input).to_a
275
+ assert_equal [Sablon::HTMLConverter::ListParagraph, Sablon::HTMLConverter::ListParagraph], ast.map(&:class)
276
+ assert_equal ["ListBullet", "ListBullet"], ast.map(&:style)
277
+ end
278
+
279
+ def test_ol
280
+ input = '<ol><li>Lorem</li><li>ipsum</li></ol>'
281
+ ast = @converter.processed_ast(input).to_a
282
+ assert_equal [Sablon::HTMLConverter::ListParagraph, Sablon::HTMLConverter::ListParagraph], ast.map(&:class)
283
+ assert_equal ["ListNumber", "ListNumber"], ast.map(&:style)
284
+ end
285
+
286
+ def test_num_id
287
+ ast = @converter.processed_ast('<ol><li>Some</li><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li><li>sit</li></ol>')
288
+ assert_equal [1001, 1001, 1002, 1003, 1003], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
289
+ end
290
+
291
+ def test_nested_lists_have_the_same_numid
292
+ ast = @converter.processed_ast('<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>')
293
+ assert_equal [1001, 1001, 1001], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
294
+ end
295
+
296
+ def test_keep_nested_list_order
297
+ input = '<ul><li>1<ul><li>1.1<ul><li>1.1.1</li></ul></li><li>1.2</li></ul></li><li>2<ul><li>1.3<ul><li>1.3.1</li></ul></li></ul></li></ul>'
298
+ ast = @converter.processed_ast(input)
299
+ list_p = ast.grep(Sablon::HTMLConverter::ListParagraph)
300
+ assert_equal [1001], list_p.map(&:numid).uniq
301
+ assert_equal [0, 1, 2, 1, 0, 1, 2], list_p.map(&:ilvl)
302
+ end
303
+ end