sablon 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/Gemfile.lock +9 -9
  4. data/README.md +120 -11
  5. data/lib/sablon.rb +7 -1
  6. data/lib/sablon/configuration/configuration.rb +165 -0
  7. data/lib/sablon/configuration/html_tag.rb +99 -0
  8. data/lib/sablon/content.rb +12 -9
  9. data/lib/sablon/context.rb +27 -20
  10. data/lib/sablon/environment.rb +31 -0
  11. data/lib/sablon/html/ast.rb +290 -75
  12. data/lib/sablon/html/ast_builder.rb +90 -0
  13. data/lib/sablon/html/converter.rb +3 -123
  14. data/lib/sablon/numbering.rb +0 -5
  15. data/lib/sablon/operations.rb +11 -11
  16. data/lib/sablon/parser/mail_merge.rb +7 -6
  17. data/lib/sablon/processor/document.rb +9 -9
  18. data/lib/sablon/processor/numbering.rb +4 -4
  19. data/lib/sablon/template.rb +5 -4
  20. data/lib/sablon/version.rb +1 -1
  21. data/sablon.gemspec +3 -3
  22. data/test/configuration_test.rb +122 -0
  23. data/test/content_test.rb +7 -6
  24. data/test/context_test.rb +11 -11
  25. data/test/environment_test.rb +27 -0
  26. data/test/expression_test.rb +2 -2
  27. data/test/fixtures/html/html_test_content.html +174 -0
  28. data/test/fixtures/html_sample.docx +0 -0
  29. data/test/fixtures/xml/comment_block_and_comment_as_key.xml +31 -0
  30. data/test/html/ast_builder_test.rb +65 -0
  31. data/test/html/ast_test.rb +117 -0
  32. data/test/html/converter_test.rb +386 -87
  33. data/test/html/node_properties_test.rb +113 -0
  34. data/test/html_test.rb +10 -10
  35. data/test/mail_merge_parser_test.rb +3 -2
  36. data/test/processor/document_test.rb +20 -2
  37. data/test/section_properties_test.rb +1 -1
  38. data/test/support/html_snippets.rb +9 -0
  39. data/test/test_helper.rb +0 -1
  40. metadata +27 -7
@@ -40,6 +40,7 @@ module Sablon
40
40
  end
41
41
  end
42
42
 
43
+ # Handles simple text replacement of fields in the template
43
44
  class String < Struct.new(:string)
44
45
  include Sablon::Content
45
46
  def self.id; :string end
@@ -51,7 +52,7 @@ module Sablon
51
52
  super value.to_s
52
53
  end
53
54
 
54
- def append_to(paragraph, display_node)
55
+ def append_to(paragraph, display_node, env)
55
56
  string.scan(/[^\n]+|\n/).reverse.each do |part|
56
57
  if part == "\n"
57
58
  display_node.add_next_sibling Nokogiri::XML::Node.new "w:br", display_node.document
@@ -64,12 +65,13 @@ module Sablon
64
65
  end
65
66
  end
66
67
 
68
+ # handles direct addition of WordML to the document template
67
69
  class WordML < Struct.new(:xml)
68
70
  include Sablon::Content
69
71
  def self.id; :word_ml end
70
72
  def self.wraps?(value) false end
71
73
 
72
- def append_to(paragraph, display_node)
74
+ def append_to(paragraph, display_node, env)
73
75
  Nokogiri::XML.fragment(xml).children.reverse.each do |child|
74
76
  paragraph.add_next_sibling child
75
77
  end
@@ -77,19 +79,20 @@ module Sablon
77
79
  end
78
80
  end
79
81
 
80
- class HTML < Struct.new(:word_ml)
82
+ # Handles conversion of HTML -> WordML and addition into template
83
+ class HTML < Struct.new(:html_content)
81
84
  include Sablon::Content
82
85
  def self.id; :html end
83
86
  def self.wraps?(value) false end
84
87
 
85
- def initialize(html)
86
- converter = HTMLConverter.new
87
- word_ml = Sablon.content(:word_ml, converter.process(html))
88
- super word_ml
88
+ def initialize(value)
89
+ super value
89
90
  end
90
91
 
91
- def append_to(*args)
92
- word_ml.append_to(*args)
92
+ def append_to(paragraph, display_node, env)
93
+ converter = HTMLConverter.new
94
+ word_ml = WordML.new(converter.process(html_content, env))
95
+ word_ml.append_to(paragraph, display_node, env)
93
96
  end
94
97
  end
95
98
 
@@ -1,31 +1,38 @@
1
1
  module Sablon
2
+ # A context represents the user supplied arguments to render a
3
+ # template.
4
+ #
5
+ # This module contains transformation functions to turn a
6
+ # user supplied hash into a data structure suitable for rendering the
7
+ # docx template.
2
8
  module Context
3
- def self.transform(hash)
4
- transform_hash(hash)
5
- end
9
+ class << self
10
+ def transform_hash(hash)
11
+ Hash[hash.map { |k, v| transform_pair(k.to_s, v) }]
12
+ end
6
13
 
7
- def self.transform_hash(hash)
8
- Hash[hash.map{|k,v| transform_pair(k.to_s, v) }]
9
- end
14
+ private
10
15
 
11
- def self.transform_pair(key, value)
12
- if key =~ /\A([^:]+):(.+)\z/
13
- if value.nil?
14
- [$2, value]
16
+ def transform_standard_key(key, value)
17
+ case value
18
+ when Hash
19
+ [key, transform_hash(value)]
15
20
  else
16
- [$2, Sablon.content($1.to_sym, value)]
21
+ [key, value]
17
22
  end
18
- else
19
- transform_standard_key(key, value)
20
23
  end
21
- end
22
24
 
23
- def self.transform_standard_key(key, value)
24
- case value
25
- when Hash
26
- [key, transform_hash(value)]
27
- else
28
- [key, value]
25
+ def transform_pair(key, value)
26
+ if key =~ /\A([^:]+):(.+)\z/
27
+ if value.nil?
28
+ [Regexp.last_match[2], value]
29
+ else
30
+ key_sym = Regexp.last_match[1].to_sym
31
+ [Regexp.last_match[2], Content.make(key_sym, value)]
32
+ end
33
+ else
34
+ transform_standard_key(key, value)
35
+ end
29
36
  end
30
37
  end
31
38
  end
@@ -0,0 +1,31 @@
1
+ module Sablon
2
+ # Combines the user supplied context and template into a single object
3
+ # to manage data during template processing.
4
+ class Environment
5
+ attr_reader :template
6
+ attr_reader :numbering
7
+ attr_reader :context
8
+
9
+ # returns a new environment with merged contexts
10
+ def alter_context(context = {})
11
+ new_context = @context.merge(context)
12
+ Environment.new(nil, new_context, self)
13
+ end
14
+
15
+ private
16
+
17
+ def initialize(template, context = {}, parent_env = nil)
18
+ # pass attributes of the supplied environment to the new one or
19
+ # create new references
20
+ if parent_env
21
+ @template = parent_env.template
22
+ @numbering = parent_env.numbering
23
+ else
24
+ @template = template
25
+ @numbering = Numbering.new
26
+ end
27
+ #
28
+ @context = Context.transform_hash(context)
29
+ end
30
+ end
31
+ end
@@ -1,18 +1,177 @@
1
+ require "sablon/html/ast_builder"
2
+
1
3
  module Sablon
2
4
  class HTMLConverter
5
+ # A top level abstract class to handle common logic for all AST nodes
3
6
  class Node
7
+ PROPERTIES = [].freeze
8
+
9
+ def self.node_name
10
+ @node_name ||= name.split('::').last
11
+ end
12
+
13
+ # Returns a hash defined on the configuration object by default. However,
14
+ # this method can be overridden by subclasses to return a different
15
+ # node's style conversion config (i.e. :run) or a hash unrelated to the
16
+ # config itself. The config object is used for all built-in classes to
17
+ # allow for end-user customization via the configuration object
18
+ def self.style_conversion
19
+ # converts camelcase to underscored
20
+ key = node_name.gsub(/([a-z])([A-Z])/, '\1_\2').downcase.to_sym
21
+ Sablon::Configuration.instance.defined_style_conversions.fetch(key, {})
22
+ end
23
+
24
+ # maps the CSS style property to it's OpenXML equivalent. Not all CSS
25
+ # properties have an equivalent, nor share the same behavior when
26
+ # defined on different node types (Paragraph, Table and Run).
27
+ def self.process_properties(properties)
28
+ # process the styles as a hash and store values
29
+ style_attrs = {}
30
+ properties.each do |key, value|
31
+ unless key.is_a? Symbol
32
+ key, value = *convert_style_property(key.strip, value.strip)
33
+ end
34
+ style_attrs[key] = value if key
35
+ end
36
+ style_attrs
37
+ end
38
+
39
+ # handles conversion of a single attribute allowing recursion through
40
+ # super classes. If the key exists and conversion is succesful a
41
+ # symbol is returned to avoid conflicts with a CSS prop sharing the
42
+ # same name. Keys without a conversion class are returned as is
43
+ def self.convert_style_property(key, value)
44
+ if style_conversion.key?(key)
45
+ key, value = style_conversion[key].call(value)
46
+ key = key.to_sym if key
47
+ [key, value]
48
+ elsif self == Node
49
+ [key, value]
50
+ else
51
+ superclass.convert_style_property(key, value)
52
+ end
53
+ end
54
+
55
+ def initialize(_env, _node, _properties)
56
+ @properties ||= nil
57
+ @attributes ||= {}
58
+ end
59
+
4
60
  def accept(visitor)
5
61
  visitor.visit(self)
6
62
  end
7
63
 
8
- def self.node_name
9
- @node_name ||= name.split('::').last
64
+ # Simplifies usage at call sites by only requiring them to supply
65
+ # the tag name to use and any child AST nodes to render
66
+ def to_docx(tag)
67
+ prop_str = @properties.to_docx if @properties
68
+ #
69
+ "<#{tag}#{attributes_to_docx}>#{prop_str}#{children_to_docx}</#{tag}>"
70
+ end
71
+
72
+ private
73
+
74
+ # Simplifies usage at call sites
75
+ def transferred_properties
76
+ @properties.transferred_properties
77
+ end
78
+
79
+ # Gracefully handles conversion of an attributes hash into a
80
+ # string
81
+ def attributes_to_docx
82
+ return '' if @attributes.nil? || @attributes.empty?
83
+ ' ' + @attributes.map { |k, v| %(#{k}="#{v}") }.join(' ')
84
+ end
85
+
86
+ # Acts like an abstract method allowing subclases full flexibility to
87
+ # define any content inside the tags.
88
+ def children_to_docx
89
+ ''
90
+ end
91
+ end
92
+
93
+ # Manages the properties for an AST node
94
+ class NodeProperties
95
+ attr_reader :transferred_properties
96
+
97
+ def self.paragraph(properties)
98
+ new('w:pPr', properties, Paragraph::PROPERTIES)
99
+ end
100
+
101
+ def self.run(properties)
102
+ new('w:rPr', properties, Run::PROPERTIES)
103
+ end
104
+
105
+ def initialize(tagname, properties, whitelist)
106
+ @tagname = tagname
107
+ filter_properties(properties, whitelist)
108
+ end
109
+
110
+ def inspect
111
+ @properties.map { |k, v| v ? "#{k}=#{v}" : k }.join(';')
112
+ end
113
+
114
+ def [](key)
115
+ @properties[key]
116
+ end
117
+
118
+ def []=(key, value)
119
+ @properties[key] = value
120
+ end
121
+
122
+ def to_docx
123
+ "<#{@tagname}>#{properties_word_ml}</#{@tagname}>" unless @properties.empty?
124
+ end
125
+
126
+ private
127
+
128
+ # processes properties adding those on the whitelist to the
129
+ # properties instance variable and those not to the transferred_properties
130
+ # isntance variable
131
+ def filter_properties(properties, whitelist)
132
+ @transferred_properties = {}
133
+ @properties = {}
134
+ #
135
+ properties.each do |key, value|
136
+ if whitelist.include? key.to_s
137
+ @properties[key] = value
138
+ else
139
+ @transferred_properties[key] = value
140
+ end
141
+ end
142
+ end
143
+
144
+ # processes attributes defined on the node into wordML property syntax
145
+ def properties_word_ml
146
+ @properties.map { |k, v| transform_attr(k, v) }.join
147
+ end
148
+
149
+ # properties that have a list as the value get nested in tags and
150
+ # each entry in the list is transformed. When a value is a hash the
151
+ # keys in the hash are used to explicitly build the XML tag attributes.
152
+ def transform_attr(key, value)
153
+ if value.is_a? Array
154
+ sub_attrs = value.map do |sub_prop|
155
+ sub_prop.map { |k, v| transform_attr(k, v) }
156
+ end
157
+ "<w:#{key}>#{sub_attrs.join}</w:#{key}>"
158
+ elsif value.is_a? Hash
159
+ props = value.map { |k, v| format('w:%s="%s"', k, v) if v }
160
+ "<w:#{key} #{props.compact.join(' ')} />"
161
+ else
162
+ value = format('w:val="%s" ', value) if value
163
+ "<w:#{key} #{value}/>"
164
+ end
10
165
  end
11
166
  end
12
167
 
168
+ # A container for an array of AST nodes with convenience methods to
169
+ # work with the internal array as if it were a regular node
13
170
  class Collection < Node
14
171
  attr_reader :nodes
15
172
  def initialize(nodes)
173
+ @properties ||= nil
174
+ @attributes ||= {}
16
175
  @nodes = nodes
17
176
  end
18
177
 
@@ -32,7 +191,18 @@ module Sablon
32
191
  end
33
192
  end
34
193
 
194
+ # Stores all of the AST nodes from the current fragment of HTML being
195
+ # parsed
35
196
  class Root < Collection
197
+ def initialize(env, node)
198
+ # strip text nodes from the root level element, these are typically
199
+ # extra whitespace from indenting the markup
200
+ node.search('./text()').remove
201
+
202
+ # convert children from HTML to AST nodes
203
+ super(ASTBuilder.html_to_ast(env, node.children, {}))
204
+ end
205
+
36
206
  def grep(pattern)
37
207
  visitor = GrepVisitor.new(pattern)
38
208
  accept(visitor)
@@ -44,24 +214,26 @@ module Sablon
44
214
  end
45
215
  end
46
216
 
217
+ # An AST node representing the top level content container for a word
218
+ # document. These cannot be nested within other paragraph elements
47
219
  class Paragraph < Node
48
- attr_accessor :style, :runs
49
- def initialize(style, runs)
50
- @style, @runs = style, runs
51
- end
220
+ PROPERTIES = %w[framePr ind jc keepLines keepNext numPr
221
+ outlineLvl pBdr pStyle rPr sectPr shd spacing
222
+ tabs textAlignment].freeze
223
+ attr_accessor :runs
52
224
 
53
- PATTERN = <<-XML.gsub("\n", "")
54
- <w:p>
55
- <w:pPr>
56
- <w:pStyle w:val="%s" />
57
- %s
58
- </w:pPr>
59
- %s
60
- </w:p>
61
- XML
225
+ def initialize(env, node, properties)
226
+ super
227
+ properties = self.class.process_properties(properties)
228
+ @properties = NodeProperties.paragraph(properties)
229
+ #
230
+ trans_props = transferred_properties
231
+ @runs = ASTBuilder.html_to_ast(env, node.children, trans_props)
232
+ @runs = Collection.new(@runs)
233
+ end
62
234
 
63
235
  def to_docx
64
- PATTERN % [style, ppr_docx, runs.to_docx]
236
+ super('w:p')
65
237
  end
66
238
 
67
239
  def accept(visitor)
@@ -70,107 +242,150 @@ XML
70
242
  end
71
243
 
72
244
  def inspect
73
- "<Paragraph{#{style}}: #{runs.inspect}>"
245
+ "<Paragraph{#{@properties[:pStyle]}}: #{runs.inspect}>"
74
246
  end
75
247
 
76
248
  private
77
- def ppr_docx
249
+
250
+ def children_to_docx
251
+ runs.to_docx
78
252
  end
79
253
  end
80
254
 
81
- class ListParagraph < Paragraph
82
- LIST_STYLE = <<-XML.gsub("\n", "")
83
- <w:numPr>
84
- <w:ilvl w:val="%s" />
85
- <w:numId w:val="%s" />
86
- </w:numPr>
87
- XML
88
- attr_accessor :numid, :ilvl
89
- def initialize(style, runs, numid, ilvl)
90
- super style, runs
91
- @numid = numid
92
- @ilvl = ilvl
93
- end
255
+ # Manages the child nodes of a list type tag
256
+ class List < Collection
257
+ def initialize(env, node, properties)
258
+ # intialize values
259
+ @list_tag = node.name
260
+ #
261
+ @definition = nil
262
+ if node.ancestors(".//#{@list_tag}").length.zero?
263
+ # Only register a definition when upon the first list tag encountered
264
+ @definition = env.numbering.register(properties[:pStyle])
265
+ end
94
266
 
95
- private
96
- def ppr_docx
97
- LIST_STYLE % [@ilvl, numid]
98
- end
99
- end
267
+ # update attributes of all child nodes
268
+ transfer_node_attributes(node.children, node.attributes)
100
269
 
101
- class TextFormat
102
- def initialize(bold, italic, underline)
103
- @bold = bold
104
- @italic = italic
105
- @underline = underline
270
+ # Move any list tags that are a child of a list item up one level
271
+ process_child_nodes(node)
272
+
273
+ # strip text nodes from the list level element, this is typically
274
+ # extra whitespace from indenting the markup
275
+ node.search('./text()').remove
276
+
277
+ # convert children from HTML to AST nodes
278
+ super(ASTBuilder.html_to_ast(env, node.children, properties))
106
279
  end
107
280
 
108
281
  def inspect
109
- parts = []
110
- parts << 'bold' if @bold
111
- parts << 'italic' if @italic
112
- parts << 'underline' if @underline
113
- parts.join('|')
282
+ "<List: #{super}>"
114
283
  end
115
284
 
116
- def to_docx
117
- styles = []
118
- styles << '<w:b />' if @bold
119
- styles << '<w:i />' if @italic
120
- styles << '<w:u w:val="single"/>' if @underline
121
- if styles.any?
122
- "<w:rPr>#{styles.join}</w:rPr>"
123
- else
124
- ''
285
+ private
286
+
287
+ # handles passing all attributes on the parent down to children
288
+ def transfer_node_attributes(nodes, attributes)
289
+ nodes.each do |child|
290
+ # update all attributes
291
+ merge_attributes(child, attributes)
292
+
293
+ # set attributes specific to list items
294
+ if @definition
295
+ child['pStyle'] = @definition.style
296
+ child['numId'] = @definition.numid
297
+ end
298
+ child['ilvl'] = child.ancestors(".//#{@list_tag}").length - 1
125
299
  end
126
300
  end
127
301
 
128
- def self.default
129
- @default ||= new(false, false, false)
302
+ # merges parent and child attributes together, preappending the parent's
303
+ # values to allow the child node to override it if the value is already
304
+ # defined on the child node.
305
+ def merge_attributes(child, parent_attributes)
306
+ parent_attributes.each do |name, par_attr|
307
+ child_attr = child[name] ? child[name].split(';') : []
308
+ child[name] = par_attr.value.split(';').concat(child_attr).join('; ')
309
+ end
130
310
  end
131
311
 
132
- def with_bold
133
- TextFormat.new(true, @italic, @underline)
312
+ # moves any list tags that are a child of a list item tag up one level
313
+ # so they become a sibling instead of a child
314
+ def process_child_nodes(node)
315
+ node.xpath("./li/#{@list_tag}").each do |list|
316
+ # transfer attributes from parent now because the list tag will
317
+ # no longer be a child and won't inheirit them as usual
318
+ transfer_node_attributes(list.children, list.parent.attributes)
319
+ list.parent.add_next_sibling(list)
320
+ end
134
321
  end
322
+ end
135
323
 
136
- def with_italic
137
- TextFormat.new(@bold, true, @underline)
324
+ # Sets list item specific attributes registered on the node to properly
325
+ # generate a list paragraph
326
+ class ListParagraph < Paragraph
327
+ def initialize(env, node, properties)
328
+ list_props = {
329
+ pStyle: node['pStyle'],
330
+ numPr: [{ ilvl: node['ilvl'] }, { numId: node['numId'] }]
331
+ }
332
+ properties = properties.merge(list_props)
333
+ super
138
334
  end
139
335
 
140
- def with_underline
141
- TextFormat.new(@bold, @italic, true)
336
+ private
337
+
338
+ def transferred_properties
339
+ super
142
340
  end
143
341
  end
144
342
 
145
- class Text < Node
146
- attr_reader :string
147
- def initialize(string, format)
148
- @string = string
149
- @format = format
343
+ # Create a run of text in the document, runs cannot be nested within
344
+ # each other
345
+ class Run < Node
346
+ PROPERTIES = %w[b i caps color dstrike emboss imprint highlight outline
347
+ rStyle shadow shd smallCaps strike sz u vanish
348
+ vertAlign].freeze
349
+
350
+ def initialize(_env, node, properties)
351
+ super
352
+ properties = self.class.process_properties(properties)
353
+ @properties = NodeProperties.run(properties)
354
+ @string = node.to_s # using `text` doesn't reconvert HTML entities
150
355
  end
151
356
 
152
357
  def to_docx
153
- "<w:r>#{@format.to_docx}<w:t xml:space=\"preserve\">#{normalized_string}</w:t></w:r>"
358
+ super('w:r')
154
359
  end
155
360
 
156
361
  def inspect
157
- "<Text{#{@format.inspect}}: #{string}>"
362
+ "<Run{#{@properties.inspect}}: #{@string}>"
158
363
  end
159
364
 
160
365
  private
161
- def normalized_string
162
- string.tr("\u00A0", ' ')
366
+
367
+ def children_to_docx
368
+ content = @string.tr("\u00A0", ' ')
369
+ "<w:t xml:space=\"preserve\">#{content}</w:t>"
163
370
  end
164
371
  end
165
372
 
166
- class Newline < Node
167
- def to_docx
168
- "<w:r><w:br/></w:r>"
373
+ # Creates a blank line in the word document
374
+ class Newline < Run
375
+ def initialize(*)
376
+ @properties = nil
377
+ @attributes = {}
169
378
  end
170
379
 
171
380
  def inspect
172
381
  "<Newline>"
173
382
  end
383
+
384
+ private
385
+
386
+ def children_to_docx
387
+ "<w:br/>"
388
+ end
174
389
  end
175
390
  end
176
391
  end