sablon 0.0.21 → 0.0.22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/Gemfile.lock +9 -9
  4. data/README.md +120 -11
  5. data/lib/sablon.rb +7 -1
  6. data/lib/sablon/configuration/configuration.rb +165 -0
  7. data/lib/sablon/configuration/html_tag.rb +99 -0
  8. data/lib/sablon/content.rb +12 -9
  9. data/lib/sablon/context.rb +27 -20
  10. data/lib/sablon/environment.rb +31 -0
  11. data/lib/sablon/html/ast.rb +290 -75
  12. data/lib/sablon/html/ast_builder.rb +90 -0
  13. data/lib/sablon/html/converter.rb +3 -123
  14. data/lib/sablon/numbering.rb +0 -5
  15. data/lib/sablon/operations.rb +11 -11
  16. data/lib/sablon/parser/mail_merge.rb +7 -6
  17. data/lib/sablon/processor/document.rb +9 -9
  18. data/lib/sablon/processor/numbering.rb +4 -4
  19. data/lib/sablon/template.rb +5 -4
  20. data/lib/sablon/version.rb +1 -1
  21. data/sablon.gemspec +3 -3
  22. data/test/configuration_test.rb +122 -0
  23. data/test/content_test.rb +7 -6
  24. data/test/context_test.rb +11 -11
  25. data/test/environment_test.rb +27 -0
  26. data/test/expression_test.rb +2 -2
  27. data/test/fixtures/html/html_test_content.html +174 -0
  28. data/test/fixtures/html_sample.docx +0 -0
  29. data/test/fixtures/xml/comment_block_and_comment_as_key.xml +31 -0
  30. data/test/html/ast_builder_test.rb +65 -0
  31. data/test/html/ast_test.rb +117 -0
  32. data/test/html/converter_test.rb +386 -87
  33. data/test/html/node_properties_test.rb +113 -0
  34. data/test/html_test.rb +10 -10
  35. data/test/mail_merge_parser_test.rb +3 -2
  36. data/test/processor/document_test.rb +20 -2
  37. data/test/section_properties_test.rb +1 -1
  38. data/test/support/html_snippets.rb +9 -0
  39. data/test/test_helper.rb +0 -1
  40. metadata +27 -7
@@ -40,6 +40,7 @@ module Sablon
40
40
  end
41
41
  end
42
42
 
43
+ # Handles simple text replacement of fields in the template
43
44
  class String < Struct.new(:string)
44
45
  include Sablon::Content
45
46
  def self.id; :string end
@@ -51,7 +52,7 @@ module Sablon
51
52
  super value.to_s
52
53
  end
53
54
 
54
- def append_to(paragraph, display_node)
55
+ def append_to(paragraph, display_node, env)
55
56
  string.scan(/[^\n]+|\n/).reverse.each do |part|
56
57
  if part == "\n"
57
58
  display_node.add_next_sibling Nokogiri::XML::Node.new "w:br", display_node.document
@@ -64,12 +65,13 @@ module Sablon
64
65
  end
65
66
  end
66
67
 
68
+ # handles direct addition of WordML to the document template
67
69
  class WordML < Struct.new(:xml)
68
70
  include Sablon::Content
69
71
  def self.id; :word_ml end
70
72
  def self.wraps?(value) false end
71
73
 
72
- def append_to(paragraph, display_node)
74
+ def append_to(paragraph, display_node, env)
73
75
  Nokogiri::XML.fragment(xml).children.reverse.each do |child|
74
76
  paragraph.add_next_sibling child
75
77
  end
@@ -77,19 +79,20 @@ module Sablon
77
79
  end
78
80
  end
79
81
 
80
- class HTML < Struct.new(:word_ml)
82
+ # Handles conversion of HTML -> WordML and addition into template
83
+ class HTML < Struct.new(:html_content)
81
84
  include Sablon::Content
82
85
  def self.id; :html end
83
86
  def self.wraps?(value) false end
84
87
 
85
- def initialize(html)
86
- converter = HTMLConverter.new
87
- word_ml = Sablon.content(:word_ml, converter.process(html))
88
- super word_ml
88
+ def initialize(value)
89
+ super value
89
90
  end
90
91
 
91
- def append_to(*args)
92
- word_ml.append_to(*args)
92
+ def append_to(paragraph, display_node, env)
93
+ converter = HTMLConverter.new
94
+ word_ml = WordML.new(converter.process(html_content, env))
95
+ word_ml.append_to(paragraph, display_node, env)
93
96
  end
94
97
  end
95
98
 
@@ -1,31 +1,38 @@
1
1
  module Sablon
2
+ # A context represents the user supplied arguments to render a
3
+ # template.
4
+ #
5
+ # This module contains transformation functions to turn a
6
+ # user supplied hash into a data structure suitable for rendering the
7
+ # docx template.
2
8
  module Context
3
- def self.transform(hash)
4
- transform_hash(hash)
5
- end
9
+ class << self
10
+ def transform_hash(hash)
11
+ Hash[hash.map { |k, v| transform_pair(k.to_s, v) }]
12
+ end
6
13
 
7
- def self.transform_hash(hash)
8
- Hash[hash.map{|k,v| transform_pair(k.to_s, v) }]
9
- end
14
+ private
10
15
 
11
- def self.transform_pair(key, value)
12
- if key =~ /\A([^:]+):(.+)\z/
13
- if value.nil?
14
- [$2, value]
16
+ def transform_standard_key(key, value)
17
+ case value
18
+ when Hash
19
+ [key, transform_hash(value)]
15
20
  else
16
- [$2, Sablon.content($1.to_sym, value)]
21
+ [key, value]
17
22
  end
18
- else
19
- transform_standard_key(key, value)
20
23
  end
21
- end
22
24
 
23
- def self.transform_standard_key(key, value)
24
- case value
25
- when Hash
26
- [key, transform_hash(value)]
27
- else
28
- [key, value]
25
+ def transform_pair(key, value)
26
+ if key =~ /\A([^:]+):(.+)\z/
27
+ if value.nil?
28
+ [Regexp.last_match[2], value]
29
+ else
30
+ key_sym = Regexp.last_match[1].to_sym
31
+ [Regexp.last_match[2], Content.make(key_sym, value)]
32
+ end
33
+ else
34
+ transform_standard_key(key, value)
35
+ end
29
36
  end
30
37
  end
31
38
  end
@@ -0,0 +1,31 @@
1
+ module Sablon
2
+ # Combines the user supplied context and template into a single object
3
+ # to manage data during template processing.
4
+ class Environment
5
+ attr_reader :template
6
+ attr_reader :numbering
7
+ attr_reader :context
8
+
9
+ # returns a new environment with merged contexts
10
+ def alter_context(context = {})
11
+ new_context = @context.merge(context)
12
+ Environment.new(nil, new_context, self)
13
+ end
14
+
15
+ private
16
+
17
+ def initialize(template, context = {}, parent_env = nil)
18
+ # pass attributes of the supplied environment to the new one or
19
+ # create new references
20
+ if parent_env
21
+ @template = parent_env.template
22
+ @numbering = parent_env.numbering
23
+ else
24
+ @template = template
25
+ @numbering = Numbering.new
26
+ end
27
+ #
28
+ @context = Context.transform_hash(context)
29
+ end
30
+ end
31
+ end
@@ -1,18 +1,177 @@
1
+ require "sablon/html/ast_builder"
2
+
1
3
  module Sablon
2
4
  class HTMLConverter
5
+ # A top level abstract class to handle common logic for all AST nodes
3
6
  class Node
7
+ PROPERTIES = [].freeze
8
+
9
+ def self.node_name
10
+ @node_name ||= name.split('::').last
11
+ end
12
+
13
+ # Returns a hash defined on the configuration object by default. However,
14
+ # this method can be overridden by subclasses to return a different
15
+ # node's style conversion config (i.e. :run) or a hash unrelated to the
16
+ # config itself. The config object is used for all built-in classes to
17
+ # allow for end-user customization via the configuration object
18
+ def self.style_conversion
19
+ # converts camelcase to underscored
20
+ key = node_name.gsub(/([a-z])([A-Z])/, '\1_\2').downcase.to_sym
21
+ Sablon::Configuration.instance.defined_style_conversions.fetch(key, {})
22
+ end
23
+
24
+ # maps the CSS style property to it's OpenXML equivalent. Not all CSS
25
+ # properties have an equivalent, nor share the same behavior when
26
+ # defined on different node types (Paragraph, Table and Run).
27
+ def self.process_properties(properties)
28
+ # process the styles as a hash and store values
29
+ style_attrs = {}
30
+ properties.each do |key, value|
31
+ unless key.is_a? Symbol
32
+ key, value = *convert_style_property(key.strip, value.strip)
33
+ end
34
+ style_attrs[key] = value if key
35
+ end
36
+ style_attrs
37
+ end
38
+
39
+ # handles conversion of a single attribute allowing recursion through
40
+ # super classes. If the key exists and conversion is succesful a
41
+ # symbol is returned to avoid conflicts with a CSS prop sharing the
42
+ # same name. Keys without a conversion class are returned as is
43
+ def self.convert_style_property(key, value)
44
+ if style_conversion.key?(key)
45
+ key, value = style_conversion[key].call(value)
46
+ key = key.to_sym if key
47
+ [key, value]
48
+ elsif self == Node
49
+ [key, value]
50
+ else
51
+ superclass.convert_style_property(key, value)
52
+ end
53
+ end
54
+
55
+ def initialize(_env, _node, _properties)
56
+ @properties ||= nil
57
+ @attributes ||= {}
58
+ end
59
+
4
60
  def accept(visitor)
5
61
  visitor.visit(self)
6
62
  end
7
63
 
8
- def self.node_name
9
- @node_name ||= name.split('::').last
64
+ # Simplifies usage at call sites by only requiring them to supply
65
+ # the tag name to use and any child AST nodes to render
66
+ def to_docx(tag)
67
+ prop_str = @properties.to_docx if @properties
68
+ #
69
+ "<#{tag}#{attributes_to_docx}>#{prop_str}#{children_to_docx}</#{tag}>"
70
+ end
71
+
72
+ private
73
+
74
+ # Simplifies usage at call sites
75
+ def transferred_properties
76
+ @properties.transferred_properties
77
+ end
78
+
79
+ # Gracefully handles conversion of an attributes hash into a
80
+ # string
81
+ def attributes_to_docx
82
+ return '' if @attributes.nil? || @attributes.empty?
83
+ ' ' + @attributes.map { |k, v| %(#{k}="#{v}") }.join(' ')
84
+ end
85
+
86
+ # Acts like an abstract method allowing subclases full flexibility to
87
+ # define any content inside the tags.
88
+ def children_to_docx
89
+ ''
90
+ end
91
+ end
92
+
93
+ # Manages the properties for an AST node
94
+ class NodeProperties
95
+ attr_reader :transferred_properties
96
+
97
+ def self.paragraph(properties)
98
+ new('w:pPr', properties, Paragraph::PROPERTIES)
99
+ end
100
+
101
+ def self.run(properties)
102
+ new('w:rPr', properties, Run::PROPERTIES)
103
+ end
104
+
105
+ def initialize(tagname, properties, whitelist)
106
+ @tagname = tagname
107
+ filter_properties(properties, whitelist)
108
+ end
109
+
110
+ def inspect
111
+ @properties.map { |k, v| v ? "#{k}=#{v}" : k }.join(';')
112
+ end
113
+
114
+ def [](key)
115
+ @properties[key]
116
+ end
117
+
118
+ def []=(key, value)
119
+ @properties[key] = value
120
+ end
121
+
122
+ def to_docx
123
+ "<#{@tagname}>#{properties_word_ml}</#{@tagname}>" unless @properties.empty?
124
+ end
125
+
126
+ private
127
+
128
+ # processes properties adding those on the whitelist to the
129
+ # properties instance variable and those not to the transferred_properties
130
+ # isntance variable
131
+ def filter_properties(properties, whitelist)
132
+ @transferred_properties = {}
133
+ @properties = {}
134
+ #
135
+ properties.each do |key, value|
136
+ if whitelist.include? key.to_s
137
+ @properties[key] = value
138
+ else
139
+ @transferred_properties[key] = value
140
+ end
141
+ end
142
+ end
143
+
144
+ # processes attributes defined on the node into wordML property syntax
145
+ def properties_word_ml
146
+ @properties.map { |k, v| transform_attr(k, v) }.join
147
+ end
148
+
149
+ # properties that have a list as the value get nested in tags and
150
+ # each entry in the list is transformed. When a value is a hash the
151
+ # keys in the hash are used to explicitly build the XML tag attributes.
152
+ def transform_attr(key, value)
153
+ if value.is_a? Array
154
+ sub_attrs = value.map do |sub_prop|
155
+ sub_prop.map { |k, v| transform_attr(k, v) }
156
+ end
157
+ "<w:#{key}>#{sub_attrs.join}</w:#{key}>"
158
+ elsif value.is_a? Hash
159
+ props = value.map { |k, v| format('w:%s="%s"', k, v) if v }
160
+ "<w:#{key} #{props.compact.join(' ')} />"
161
+ else
162
+ value = format('w:val="%s" ', value) if value
163
+ "<w:#{key} #{value}/>"
164
+ end
10
165
  end
11
166
  end
12
167
 
168
+ # A container for an array of AST nodes with convenience methods to
169
+ # work with the internal array as if it were a regular node
13
170
  class Collection < Node
14
171
  attr_reader :nodes
15
172
  def initialize(nodes)
173
+ @properties ||= nil
174
+ @attributes ||= {}
16
175
  @nodes = nodes
17
176
  end
18
177
 
@@ -32,7 +191,18 @@ module Sablon
32
191
  end
33
192
  end
34
193
 
194
+ # Stores all of the AST nodes from the current fragment of HTML being
195
+ # parsed
35
196
  class Root < Collection
197
+ def initialize(env, node)
198
+ # strip text nodes from the root level element, these are typically
199
+ # extra whitespace from indenting the markup
200
+ node.search('./text()').remove
201
+
202
+ # convert children from HTML to AST nodes
203
+ super(ASTBuilder.html_to_ast(env, node.children, {}))
204
+ end
205
+
36
206
  def grep(pattern)
37
207
  visitor = GrepVisitor.new(pattern)
38
208
  accept(visitor)
@@ -44,24 +214,26 @@ module Sablon
44
214
  end
45
215
  end
46
216
 
217
+ # An AST node representing the top level content container for a word
218
+ # document. These cannot be nested within other paragraph elements
47
219
  class Paragraph < Node
48
- attr_accessor :style, :runs
49
- def initialize(style, runs)
50
- @style, @runs = style, runs
51
- end
220
+ PROPERTIES = %w[framePr ind jc keepLines keepNext numPr
221
+ outlineLvl pBdr pStyle rPr sectPr shd spacing
222
+ tabs textAlignment].freeze
223
+ attr_accessor :runs
52
224
 
53
- PATTERN = <<-XML.gsub("\n", "")
54
- <w:p>
55
- <w:pPr>
56
- <w:pStyle w:val="%s" />
57
- %s
58
- </w:pPr>
59
- %s
60
- </w:p>
61
- XML
225
+ def initialize(env, node, properties)
226
+ super
227
+ properties = self.class.process_properties(properties)
228
+ @properties = NodeProperties.paragraph(properties)
229
+ #
230
+ trans_props = transferred_properties
231
+ @runs = ASTBuilder.html_to_ast(env, node.children, trans_props)
232
+ @runs = Collection.new(@runs)
233
+ end
62
234
 
63
235
  def to_docx
64
- PATTERN % [style, ppr_docx, runs.to_docx]
236
+ super('w:p')
65
237
  end
66
238
 
67
239
  def accept(visitor)
@@ -70,107 +242,150 @@ XML
70
242
  end
71
243
 
72
244
  def inspect
73
- "<Paragraph{#{style}}: #{runs.inspect}>"
245
+ "<Paragraph{#{@properties[:pStyle]}}: #{runs.inspect}>"
74
246
  end
75
247
 
76
248
  private
77
- def ppr_docx
249
+
250
+ def children_to_docx
251
+ runs.to_docx
78
252
  end
79
253
  end
80
254
 
81
- class ListParagraph < Paragraph
82
- LIST_STYLE = <<-XML.gsub("\n", "")
83
- <w:numPr>
84
- <w:ilvl w:val="%s" />
85
- <w:numId w:val="%s" />
86
- </w:numPr>
87
- XML
88
- attr_accessor :numid, :ilvl
89
- def initialize(style, runs, numid, ilvl)
90
- super style, runs
91
- @numid = numid
92
- @ilvl = ilvl
93
- end
255
+ # Manages the child nodes of a list type tag
256
+ class List < Collection
257
+ def initialize(env, node, properties)
258
+ # intialize values
259
+ @list_tag = node.name
260
+ #
261
+ @definition = nil
262
+ if node.ancestors(".//#{@list_tag}").length.zero?
263
+ # Only register a definition when upon the first list tag encountered
264
+ @definition = env.numbering.register(properties[:pStyle])
265
+ end
94
266
 
95
- private
96
- def ppr_docx
97
- LIST_STYLE % [@ilvl, numid]
98
- end
99
- end
267
+ # update attributes of all child nodes
268
+ transfer_node_attributes(node.children, node.attributes)
100
269
 
101
- class TextFormat
102
- def initialize(bold, italic, underline)
103
- @bold = bold
104
- @italic = italic
105
- @underline = underline
270
+ # Move any list tags that are a child of a list item up one level
271
+ process_child_nodes(node)
272
+
273
+ # strip text nodes from the list level element, this is typically
274
+ # extra whitespace from indenting the markup
275
+ node.search('./text()').remove
276
+
277
+ # convert children from HTML to AST nodes
278
+ super(ASTBuilder.html_to_ast(env, node.children, properties))
106
279
  end
107
280
 
108
281
  def inspect
109
- parts = []
110
- parts << 'bold' if @bold
111
- parts << 'italic' if @italic
112
- parts << 'underline' if @underline
113
- parts.join('|')
282
+ "<List: #{super}>"
114
283
  end
115
284
 
116
- def to_docx
117
- styles = []
118
- styles << '<w:b />' if @bold
119
- styles << '<w:i />' if @italic
120
- styles << '<w:u w:val="single"/>' if @underline
121
- if styles.any?
122
- "<w:rPr>#{styles.join}</w:rPr>"
123
- else
124
- ''
285
+ private
286
+
287
+ # handles passing all attributes on the parent down to children
288
+ def transfer_node_attributes(nodes, attributes)
289
+ nodes.each do |child|
290
+ # update all attributes
291
+ merge_attributes(child, attributes)
292
+
293
+ # set attributes specific to list items
294
+ if @definition
295
+ child['pStyle'] = @definition.style
296
+ child['numId'] = @definition.numid
297
+ end
298
+ child['ilvl'] = child.ancestors(".//#{@list_tag}").length - 1
125
299
  end
126
300
  end
127
301
 
128
- def self.default
129
- @default ||= new(false, false, false)
302
+ # merges parent and child attributes together, preappending the parent's
303
+ # values to allow the child node to override it if the value is already
304
+ # defined on the child node.
305
+ def merge_attributes(child, parent_attributes)
306
+ parent_attributes.each do |name, par_attr|
307
+ child_attr = child[name] ? child[name].split(';') : []
308
+ child[name] = par_attr.value.split(';').concat(child_attr).join('; ')
309
+ end
130
310
  end
131
311
 
132
- def with_bold
133
- TextFormat.new(true, @italic, @underline)
312
+ # moves any list tags that are a child of a list item tag up one level
313
+ # so they become a sibling instead of a child
314
+ def process_child_nodes(node)
315
+ node.xpath("./li/#{@list_tag}").each do |list|
316
+ # transfer attributes from parent now because the list tag will
317
+ # no longer be a child and won't inheirit them as usual
318
+ transfer_node_attributes(list.children, list.parent.attributes)
319
+ list.parent.add_next_sibling(list)
320
+ end
134
321
  end
322
+ end
135
323
 
136
- def with_italic
137
- TextFormat.new(@bold, true, @underline)
324
+ # Sets list item specific attributes registered on the node to properly
325
+ # generate a list paragraph
326
+ class ListParagraph < Paragraph
327
+ def initialize(env, node, properties)
328
+ list_props = {
329
+ pStyle: node['pStyle'],
330
+ numPr: [{ ilvl: node['ilvl'] }, { numId: node['numId'] }]
331
+ }
332
+ properties = properties.merge(list_props)
333
+ super
138
334
  end
139
335
 
140
- def with_underline
141
- TextFormat.new(@bold, @italic, true)
336
+ private
337
+
338
+ def transferred_properties
339
+ super
142
340
  end
143
341
  end
144
342
 
145
- class Text < Node
146
- attr_reader :string
147
- def initialize(string, format)
148
- @string = string
149
- @format = format
343
+ # Create a run of text in the document, runs cannot be nested within
344
+ # each other
345
+ class Run < Node
346
+ PROPERTIES = %w[b i caps color dstrike emboss imprint highlight outline
347
+ rStyle shadow shd smallCaps strike sz u vanish
348
+ vertAlign].freeze
349
+
350
+ def initialize(_env, node, properties)
351
+ super
352
+ properties = self.class.process_properties(properties)
353
+ @properties = NodeProperties.run(properties)
354
+ @string = node.to_s # using `text` doesn't reconvert HTML entities
150
355
  end
151
356
 
152
357
  def to_docx
153
- "<w:r>#{@format.to_docx}<w:t xml:space=\"preserve\">#{normalized_string}</w:t></w:r>"
358
+ super('w:r')
154
359
  end
155
360
 
156
361
  def inspect
157
- "<Text{#{@format.inspect}}: #{string}>"
362
+ "<Run{#{@properties.inspect}}: #{@string}>"
158
363
  end
159
364
 
160
365
  private
161
- def normalized_string
162
- string.tr("\u00A0", ' ')
366
+
367
+ def children_to_docx
368
+ content = @string.tr("\u00A0", ' ')
369
+ "<w:t xml:space=\"preserve\">#{content}</w:t>"
163
370
  end
164
371
  end
165
372
 
166
- class Newline < Node
167
- def to_docx
168
- "<w:r><w:br/></w:r>"
373
+ # Creates a blank line in the word document
374
+ class Newline < Run
375
+ def initialize(*)
376
+ @properties = nil
377
+ @attributes = {}
169
378
  end
170
379
 
171
380
  def inspect
172
381
  "<Newline>"
173
382
  end
383
+
384
+ private
385
+
386
+ def children_to_docx
387
+ "<w:br/>"
388
+ end
174
389
  end
175
390
  end
176
391
  end