sablon 0.0.18 → 0.0.19.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,193 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Sablon
3
+ module Processor
4
+ class Document
5
+ def self.process(xml_node, context, properties = {})
6
+ processor = new(parser)
7
+ processor.manipulate xml_node, Sablon::Context.transform(context)
8
+ processor.write_properties xml_node, properties if properties.any?
9
+ xml_node
10
+ end
11
+
12
+ def self.parser
13
+ @parser ||= Sablon::Parser::MailMerge.new
14
+ end
15
+
16
+ def initialize(parser)
17
+ @parser = parser
18
+ end
19
+
20
+ def manipulate(xml_node, context)
21
+ operations = build_operations(@parser.parse_fields(xml_node))
22
+ operations.each do |step|
23
+ step.evaluate context
24
+ end
25
+ cleanup(xml_node)
26
+ xml_node
27
+ end
28
+
29
+ def write_properties(xml_node, properties)
30
+ if start_page_number = properties[:start_page_number] || properties["start_page_number"]
31
+ section_properties = SectionProperties.from_document(xml_node)
32
+ section_properties.start_page_number = start_page_number
33
+ end
34
+ end
35
+
36
+ private
37
+ def build_operations(fields)
38
+ OperationConstruction.new(fields).operations
39
+ end
40
+
41
+ def cleanup(xml_node)
42
+ fill_empty_table_cells xml_node
43
+ end
44
+
45
+ def fill_empty_table_cells(xml_node)
46
+ xml_node.xpath("//w:tc[count(*[name() = 'w:p'])=0 or not(*)]").each do |blank_cell|
47
+ filler = Nokogiri::XML::Node.new("w:p", xml_node.document)
48
+ blank_cell.add_child filler
49
+ end
50
+ end
51
+
52
+ class Block < Struct.new(:start_field, :end_field)
53
+ def self.enclosed_by(start_field, end_field)
54
+ @blocks ||= [RowBlock, ParagraphBlock, InlineParagraphBlock]
55
+ block_class = @blocks.detect { |klass| klass.encloses?(start_field, end_field) }
56
+ block_class.new start_field, end_field
57
+ end
58
+
59
+ def process(context)
60
+ replaced_node = Nokogiri::XML::Node.new("tmp", start_node.document)
61
+ replaced_node.children = Nokogiri::XML::NodeSet.new(start_node.document, body.map(&:dup))
62
+ Processor::Document.process replaced_node, context
63
+ replaced_node.children
64
+ end
65
+
66
+ def replace(content)
67
+ content.each { |n| start_node.add_next_sibling n }
68
+ remove_control_elements
69
+ end
70
+
71
+ def remove_control_elements
72
+ body.each &:remove
73
+ start_node.remove
74
+ end_node.remove
75
+ end
76
+
77
+ def body
78
+ return @body if defined?(@body)
79
+ @body = []
80
+ node = start_node
81
+ while (node = node.next_element) && node != end_node
82
+ @body << node
83
+ end
84
+ @body
85
+ end
86
+
87
+ def start_node
88
+ @start_node ||= self.class.parent(start_field).first
89
+ end
90
+
91
+ def end_node
92
+ @end_node ||= self.class.parent(end_field).first
93
+ end
94
+
95
+ def self.encloses?(start_field, end_field)
96
+ parent(start_field).any? && parent(end_field).any?
97
+ end
98
+ end
99
+
100
+ class RowBlock < Block
101
+ def self.parent(node)
102
+ node.ancestors ".//w:tr"
103
+ end
104
+
105
+ def self.encloses?(start_field, end_field)
106
+ super && parent(start_field) != parent(end_field)
107
+ end
108
+ end
109
+
110
+ class ParagraphBlock < Block
111
+ def self.parent(node)
112
+ node.ancestors ".//w:p"
113
+ end
114
+
115
+ def self.encloses?(start_field, end_field)
116
+ super && parent(start_field) != parent(end_field)
117
+ end
118
+ end
119
+
120
+ class InlineParagraphBlock < Block
121
+ def self.parent(node)
122
+ node.ancestors ".//w:p"
123
+ end
124
+
125
+ def remove_control_elements
126
+ body.each &:remove
127
+ start_field.remove
128
+ end_field.remove
129
+ end
130
+
131
+ def start_node
132
+ @start_node ||= start_field.end_node
133
+ end
134
+
135
+ def end_node
136
+ @end_node ||= end_field.start_node
137
+ end
138
+
139
+ def self.encloses?(start_field, end_field)
140
+ super && parent(start_field) == parent(end_field)
141
+ end
142
+ end
143
+
144
+ class OperationConstruction
145
+ def initialize(fields)
146
+ @fields = fields
147
+ @operations = []
148
+ end
149
+
150
+ def operations
151
+ while @fields.any?
152
+ @operations << consume(true)
153
+ end
154
+ @operations.compact
155
+ end
156
+
157
+ def consume(allow_insertion)
158
+ @field = @fields.shift
159
+ return unless @field
160
+ case @field.expression
161
+ when /^=/
162
+ if allow_insertion
163
+ Statement::Insertion.new(Expression.parse(@field.expression[1..-1]), @field)
164
+ end
165
+ when /([^ ]+):each\(([^ ]+)\)/
166
+ block = consume_block("#{$1}:endEach")
167
+ Statement::Loop.new(Expression.parse($1), $2, block)
168
+ when /([^ ]+):if\(([^)]+)\)/
169
+ block = consume_block("#{$1}:endIf")
170
+ Statement::Condition.new(Expression.parse($1), block, $2)
171
+ when /([^ ]+):if/
172
+ block = consume_block("#{$1}:endIf")
173
+ Statement::Condition.new(Expression.parse($1), block)
174
+ end
175
+ end
176
+
177
+ def consume_block(end_expression)
178
+ start_field = end_field = @field
179
+ while end_field && end_field.expression != end_expression
180
+ consume(false)
181
+ end_field = @field
182
+ end
183
+
184
+ if end_field
185
+ Block.enclosed_by start_field, end_field
186
+ else
187
+ raise TemplateError, "Could not find end field for «#{start_field.expression}». Was looking for «#{end_expression}»"
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,47 @@
1
+ module Sablon
2
+ module Processor
3
+ class Numbering
4
+ LIST_DEFINITION = <<-XML.gsub(/^\s+/, '').tr("\n", '')
5
+ <w:num w:numId="%s">
6
+ <w:abstractNumId w:val="%s" />
7
+ </w:num>
8
+ XML
9
+
10
+ def self.process(doc)
11
+ processor = new(doc)
12
+ processor.manipulate
13
+ doc
14
+ end
15
+
16
+ def initialize(doc)
17
+ @doc = doc
18
+ end
19
+
20
+ def manipulate
21
+ Sablon::Numbering.instance.definitions.each do |definition|
22
+ abstract_num_ref = find_definition(definition.style)
23
+ abstract_num_copy = abstract_num_ref.dup
24
+ abstract_num_copy['w:abstractNumId'] = definition.numid
25
+ abstract_num_copy.xpath('./w:nsid').each(&:remove)
26
+ container.prepend_child abstract_num_copy
27
+ container.add_child(LIST_DEFINITION % [definition.numid, abstract_num_copy['w:abstractNumId']])
28
+ end
29
+ @doc
30
+ end
31
+
32
+ private
33
+ def container
34
+ @container ||= @doc.xpath('//w:numbering').first
35
+ end
36
+
37
+ def find_definition(style)
38
+ abstract_num = @doc.xpath("//w:abstractNum[descendant-or-self::*[w:pStyle[@w:val='#{style}']]]").first
39
+ if abstract_num
40
+ abstract_num
41
+ else
42
+ raise ArgumentError, "Could not find w:abstractNum definition for style: #{style.inspect}"
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,5 @@
1
1
  module Sablon
2
- class Processor
2
+ module Processor
3
3
  class SectionProperties
4
4
  def self.from_document(document_xml)
5
5
  new document_xml.at_xpath(".//w:sectPr")
@@ -18,15 +18,19 @@ module Sablon
18
18
 
19
19
  private
20
20
  def render(context, properties = {})
21
+ Sablon::Numbering.instance.reset!
22
+ Zip.sort_entries = true # required to process document.xml before numbering.xml
21
23
  Zip::OutputStream.write_buffer(StringIO.new) do |out|
22
24
  Zip::File.open(@path).each do |entry|
23
25
  entry_name = entry.name
24
26
  out.put_next_entry(entry_name)
25
27
  content = entry.get_input_stream.read
26
28
  if entry_name == 'word/document.xml'
27
- out.write(process(content, context, properties))
29
+ out.write(process(Processor::Document, content, context, properties))
28
30
  elsif entry_name =~ /word\/header\d*\.xml/ || entry_name =~ /word\/footer\d*\.xml/
29
- out.write(process(content, context))
31
+ out.write(process(Processor::Document, content, context))
32
+ elsif entry_name == 'word/numbering.xml'
33
+ out.write(process(Processor::Numbering, content))
30
34
  else
31
35
  out.write(content)
32
36
  end
@@ -38,9 +42,9 @@ module Sablon
38
42
  #
39
43
  # IMPORTANT: Open Office does not ignore whitespace around tags.
40
44
  # We need to render the xml without indent and whitespace.
41
- def process(content, context, *args)
45
+ def process(processor, content, *args)
42
46
  document = Nokogiri::XML(content)
43
- Processor.process(document, context, *args).to_xml(indent: 0, save_with: 0)
47
+ processor.process(document, *args).to_xml(indent: 0, save_with: 0)
44
48
  end
45
49
  end
46
50
  end
@@ -1,3 +1,3 @@
1
1
  module Sablon
2
- VERSION = "0.0.18"
2
+ VERSION = "0.0.19.beta1"
3
3
  end
Binary file
@@ -0,0 +1,303 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "test_helper"
3
+
4
+ class HTMLConverterTest < Sablon::TestCase
5
+ def setup
6
+ super
7
+ @converter = Sablon::HTMLConverter.new
8
+ end
9
+
10
+ def test_convert_text_inside_div
11
+ input = '<div>Lorem ipsum dolor sit amet</div>'
12
+ expected_output = <<-DOCX.strip
13
+ <w:p>
14
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
15
+ <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
16
+ </w:p>
17
+ DOCX
18
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
19
+ end
20
+
21
+ def test_convert_text_inside_p
22
+ input = '<p>Lorem ipsum dolor sit amet</p>'
23
+ expected_output = <<-DOCX.strip
24
+ <w:p>
25
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
26
+ <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
27
+ </w:p>
28
+ DOCX
29
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
30
+ end
31
+
32
+ def test_convert_text_inside_multiple_divs
33
+ input = '<div>Lorem ipsum</div><div>dolor sit amet</div>'
34
+ expected_output = <<-DOCX.strip
35
+ <w:p>
36
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
37
+ <w:r><w:t xml:space="preserve">Lorem ipsum</w:t></w:r>
38
+ </w:p>
39
+ <w:p>
40
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
41
+ <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
42
+ </w:p>
43
+ DOCX
44
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
45
+ end
46
+
47
+ def test_convert_newline_inside_div
48
+ input = '<div>Lorem ipsum<br>dolor sit amet</div>'
49
+ expected_output = <<-DOCX.strip
50
+ <w:p>
51
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
52
+ <w:r><w:t xml:space="preserve">Lorem ipsum</w:t></w:r>
53
+ <w:r><w:br/></w:r>
54
+ <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
55
+ </w:p>
56
+ DOCX
57
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
58
+ end
59
+
60
+ def test_convert_strong_tags_inside_div
61
+ input = '<div>Lorem&nbsp;<strong>ipsum dolor</strong>&nbsp;sit amet</div>'
62
+ expected_output = <<-DOCX.strip
63
+ <w:p>
64
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
65
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
66
+ <w:r><w:rPr><w:b /></w:rPr><w:t xml:space="preserve">ipsum dolor</w:t></w:r>
67
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
68
+ </w:p>
69
+ DOCX
70
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
71
+ end
72
+
73
+ def test_convert_em_tags_inside_div
74
+ input = '<div>Lorem&nbsp;<em>ipsum dolor</em>&nbsp;sit amet</div>'
75
+ expected_output = <<-DOCX.strip
76
+ <w:p>
77
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
78
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
79
+ <w:r><w:rPr><w:i /></w:rPr><w:t xml:space="preserve">ipsum dolor</w:t></w:r>
80
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
81
+ </w:p>
82
+ DOCX
83
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
84
+ end
85
+
86
+ def test_unorderd_lists
87
+ input = '<ul><li>Lorem</li><li>ipsum</li><li>dolor</li></ul>'
88
+ expected_output = <<-DOCX.strip
89
+ <w:p>
90
+ <w:pPr>
91
+ <w:pStyle w:val="ListBullet" />
92
+ <w:numPr>
93
+ <w:ilvl w:val="0" />
94
+ <w:numId w:val="1001" />
95
+ </w:numPr>
96
+ </w:pPr>
97
+ <w:r><w:t xml:space="preserve">Lorem</w:t></w:r>
98
+ </w:p>
99
+
100
+ <w:p>
101
+ <w:pPr>
102
+ <w:pStyle w:val="ListBullet" />
103
+ <w:numPr>
104
+ <w:ilvl w:val="0" />
105
+ <w:numId w:val="1001" />
106
+ </w:numPr>
107
+ </w:pPr>
108
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
109
+ </w:p>
110
+
111
+ <w:p>
112
+ <w:pPr>
113
+ <w:pStyle w:val="ListBullet" />
114
+ <w:numPr>
115
+ <w:ilvl w:val="0" />
116
+ <w:numId w:val="1001" />
117
+ </w:numPr>
118
+ </w:pPr>
119
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
120
+ </w:p>
121
+ DOCX
122
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
123
+
124
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
125
+ end
126
+
127
+ def test_ordered_lists
128
+ input = '<ol><li>Lorem</li><li>ipsum</li><li>dolor</li></ol>'
129
+ expected_output = <<-DOCX.strip
130
+ <w:p>
131
+ <w:pPr>
132
+ <w:pStyle w:val="ListNumber" />
133
+ <w:numPr>
134
+ <w:ilvl w:val="0" />
135
+ <w:numId w:val="1001" />
136
+ </w:numPr>
137
+ </w:pPr>
138
+ <w:r><w:t xml:space="preserve">Lorem</w:t></w:r>
139
+ </w:p>
140
+
141
+ <w:p>
142
+ <w:pPr>
143
+ <w:pStyle w:val="ListNumber" />
144
+ <w:numPr>
145
+ <w:ilvl w:val="0" />
146
+ <w:numId w:val="1001" />
147
+ </w:numPr>
148
+ </w:pPr>
149
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
150
+ </w:p>
151
+
152
+ <w:p>
153
+ <w:pPr>
154
+ <w:pStyle w:val="ListNumber" />
155
+ <w:numPr>
156
+ <w:ilvl w:val="0" />
157
+ <w:numId w:val="1001" />
158
+ </w:numPr>
159
+ </w:pPr>
160
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
161
+ </w:p>
162
+ DOCX
163
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
164
+
165
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber')], Sablon::Numbering.instance.definitions
166
+ end
167
+
168
+ def test_mixed_lists
169
+ input = '<ol><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li></ol>'
170
+ expected_output = <<-DOCX.strip
171
+ <w:p>
172
+ <w:pPr>
173
+ <w:pStyle w:val="ListNumber" />
174
+ <w:numPr>
175
+ <w:ilvl w:val="0" />
176
+ <w:numId w:val="1001" />
177
+ </w:numPr>
178
+ </w:pPr>
179
+ <w:r><w:t xml:space=\"preserve\">Lorem</w:t></w:r>
180
+ </w:p>
181
+
182
+ <w:p>
183
+ <w:pPr>
184
+ <w:pStyle w:val="ListBullet" />
185
+ <w:numPr>
186
+ <w:ilvl w:val="0" />
187
+ <w:numId w:val="1002" />
188
+ </w:numPr>
189
+ </w:pPr>
190
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
191
+ </w:p>
192
+
193
+ <w:p>
194
+ <w:pPr>
195
+ <w:pStyle w:val="ListNumber" />
196
+ <w:numPr>
197
+ <w:ilvl w:val="0" />
198
+ <w:numId w:val="1003" />
199
+ </w:numPr>
200
+ </w:pPr>
201
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
202
+ </w:p>
203
+ DOCX
204
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
205
+
206
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber'),
207
+ Sablon::Numbering::Definition.new(1002, 'ListBullet'),
208
+ Sablon::Numbering::Definition.new(1003, 'ListNumber')], Sablon::Numbering.instance.definitions
209
+ end
210
+
211
+ def test_nested_unordered_lists
212
+ input = '<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>'
213
+ expected_output = <<-DOCX.strip
214
+ <w:p>
215
+ <w:pPr>
216
+ <w:pStyle w:val="ListBullet" />
217
+ <w:numPr>
218
+ <w:ilvl w:val="0" />
219
+ <w:numId w:val="1001" />
220
+ </w:numPr>
221
+ </w:pPr>
222
+ <w:r><w:t xml:space="preserve">Lorem</w:t></w:r>
223
+ </w:p>
224
+
225
+ <w:p>
226
+ <w:pPr>
227
+ <w:pStyle w:val="ListBullet" />
228
+ <w:numPr>
229
+ <w:ilvl w:val="1" />
230
+ <w:numId w:val="1001" />
231
+ </w:numPr>
232
+ </w:pPr>
233
+ <w:r><w:t xml:space="preserve">ipsum</w:t></w:r>
234
+ </w:p>
235
+
236
+ <w:p>
237
+ <w:pPr>
238
+ <w:pStyle w:val="ListBullet" />
239
+ <w:numPr>
240
+ <w:ilvl w:val="2" />
241
+ <w:numId w:val="1001" />
242
+ </w:numPr>
243
+ </w:pPr>
244
+ <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
245
+ </w:p>
246
+ DOCX
247
+ assert_equal normalize_wordml(expected_output), @converter.process(input)
248
+
249
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
250
+ end
251
+
252
+ private
253
+ def normalize_wordml(wordml)
254
+ wordml.gsub(/^\s+/, '').tr("\n", '')
255
+ end
256
+ end
257
+
258
+ class HTMLConverterASTTest < Sablon::TestCase
259
+ def setup
260
+ super
261
+ @converter = Sablon::HTMLConverter.new
262
+ end
263
+
264
+ def test_div
265
+ input = '<div>Lorem ipsum dolor sit amet</div>'
266
+ ast = @converter.processed_ast(input).to_a
267
+ assert_equal [Sablon::HTMLConverter::Paragraph], ast.map(&:class)
268
+ assert_equal ['Paragraph'], ast.map(&:style)
269
+ end
270
+
271
+
272
+ def test_ul
273
+ input = '<ul><li>Lorem</li><li>ipsum</li></ul>'
274
+ ast = @converter.processed_ast(input).to_a
275
+ assert_equal [Sablon::HTMLConverter::ListParagraph, Sablon::HTMLConverter::ListParagraph], ast.map(&:class)
276
+ assert_equal ["ListBullet", "ListBullet"], ast.map(&:style)
277
+ end
278
+
279
+ def test_ol
280
+ input = '<ol><li>Lorem</li><li>ipsum</li></ol>'
281
+ ast = @converter.processed_ast(input).to_a
282
+ assert_equal [Sablon::HTMLConverter::ListParagraph, Sablon::HTMLConverter::ListParagraph], ast.map(&:class)
283
+ assert_equal ["ListNumber", "ListNumber"], ast.map(&:style)
284
+ end
285
+
286
+ def test_num_id
287
+ ast = @converter.processed_ast('<ol><li>Some</li><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li><li>sit</li></ol>')
288
+ assert_equal [1001, 1001, 1002, 1003, 1003], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
289
+ end
290
+
291
+ def test_nested_lists_have_the_same_numid
292
+ ast = @converter.processed_ast('<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>')
293
+ assert_equal [1001, 1001, 1001], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
294
+ end
295
+
296
+ def test_keep_nested_list_order
297
+ input = '<ul><li>1<ul><li>1.1<ul><li>1.1.1</li></ul></li><li>1.2</li></ul></li><li>2<ul><li>1.3<ul><li>1.3.1</li></ul></li></ul></li></ul>'
298
+ ast = @converter.processed_ast(input)
299
+ list_p = ast.grep(Sablon::HTMLConverter::ListParagraph)
300
+ assert_equal [1001], list_p.map(&:numid).uniq
301
+ assert_equal [0, 1, 2, 1, 0, 1, 2], list_p.map(&:ilvl)
302
+ end
303
+ end