sablon 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +36 -5
  4. data/lib/sablon.rb +0 -3
  5. data/lib/sablon/configuration/html_tag.rb +1 -1
  6. data/lib/sablon/content.rb +56 -0
  7. data/lib/sablon/context.rb +2 -0
  8. data/lib/sablon/document_object_model/content_types.rb +35 -0
  9. data/lib/sablon/document_object_model/file_handler.rb +26 -0
  10. data/lib/sablon/document_object_model/model.rb +94 -0
  11. data/lib/sablon/document_object_model/numbering.rb +94 -0
  12. data/lib/sablon/document_object_model/relationships.rb +111 -0
  13. data/lib/sablon/environment.rb +13 -16
  14. data/lib/sablon/html/ast.rb +14 -13
  15. data/lib/sablon/html/ast_builder.rb +18 -5
  16. data/lib/sablon/html/node_properties.rb +3 -3
  17. data/lib/sablon/operations.rb +59 -0
  18. data/lib/sablon/processor/document.rb +48 -11
  19. data/lib/sablon/processor/section_properties.rb +11 -4
  20. data/lib/sablon/template.rb +88 -47
  21. data/lib/sablon/version.rb +1 -1
  22. data/misc/image-example.png +0 -0
  23. data/test/configuration_test.rb +22 -22
  24. data/test/content_test.rb +50 -0
  25. data/test/context_test.rb +37 -1
  26. data/test/environment_test.rb +4 -1
  27. data/test/executable_test.rb +0 -2
  28. data/test/fixtures/cv_sample.docx +0 -0
  29. data/test/fixtures/html_sample.docx +0 -0
  30. data/test/fixtures/images/c3po.jpg +0 -0
  31. data/test/fixtures/images/clone.jpg +0 -0
  32. data/test/fixtures/images/darth_vader.jpg +0 -0
  33. data/test/fixtures/images/r2d2.jpg +0 -0
  34. data/test/fixtures/images_sample.docx +0 -0
  35. data/test/fixtures/images_template.docx +0 -0
  36. data/test/fixtures/loops_sample.docx +0 -0
  37. data/test/fixtures/loops_template.docx +0 -0
  38. data/test/fixtures/recipe_sample.docx +0 -0
  39. data/test/fixtures/xml/image.xml +91 -0
  40. data/test/fixtures/xml/loop_with_unique_ids.xml +152 -0
  41. data/test/fixtures/xml/mock_document/word/document.xml +12 -0
  42. data/test/html/ast_test.rb +10 -5
  43. data/test/html/converter_style_test.rb +9 -9
  44. data/test/html/converter_test.rb +66 -81
  45. data/test/html/node_properties_test.rb +2 -2
  46. data/test/html_test.rb +2 -6
  47. data/test/processor/document_test.rb +80 -3
  48. data/test/processor/section_properties_test.rb +68 -0
  49. data/test/sablon_test.rb +77 -5
  50. data/test/test_helper.rb +109 -9
  51. metadata +33 -9
  52. data/lib/sablon/numbering.rb +0 -23
  53. data/lib/sablon/processor/numbering.rb +0 -47
  54. data/lib/sablon/relationship.rb +0 -47
  55. data/lib/sablon/test/assertions.rb +0 -22
  56. data/test/section_properties_test.rb +0 -41
@@ -0,0 +1,111 @@
1
+ require 'pathname'
2
+ require 'sablon/document_object_model/file_handler'
3
+
4
+ module Sablon
5
+ module DOM
6
+ # Adds new relationships to the entry's corresponding relationships file
7
+ class Relationships < FileHandler
8
+ #
9
+ # extends the Model class so it now has an "add_relationship" method
10
+ def self.extend_model(model_klass)
11
+ super do
12
+ #
13
+ # adds a relationship to the rels file for the current entry
14
+ define_method(:add_relationship) do |rel_attr|
15
+ # detemine name of rels file to augment
16
+ rels_name = Relationships.rels_entry_name_for(@current_entry)
17
+
18
+ # create the file if needed and update DOM
19
+ create_entry_if_not_exist(rels_name, Relationships.file_template)
20
+ @dom[rels_name].add_relationship(rel_attr)
21
+ end
22
+ #
23
+ # adds file to the /word/media folder without overwriting an
24
+ # existing file
25
+ define_method(:add_media) do |name, data, rel_attr|
26
+ rel_attr[:Target] = "media/#{name}"
27
+ extension = name.match(/\.(\w+?)$/).to_a[1]
28
+ type = rel_attr[:Type].match(%r{/(\w+?)$}).to_a[1] + "/#{extension}"
29
+ #
30
+ if @zip_contents["word/#{rel_attr[:Target]}"]
31
+ names = @zip_contents.keys.map { |n| File.basename(n) }
32
+ pattern = "^(\\d+)-#{name}"
33
+ max_val = names.collect { |n| n.match(pattern).to_a[1].to_i }.max
34
+ rel_attr[:Target] = "media/#{max_val + 1}-#{name}"
35
+ end
36
+ #
37
+ # add the content to the zip and create the relationship
38
+ @zip_contents["word/#{rel_attr[:Target]}"] = data
39
+ add_content_type(extension, type)
40
+ add_relationship(rel_attr)
41
+ end
42
+ #
43
+ # locates an existing rId in the approprirate rels file
44
+ define_method(:find_relationship_by) do |attribute, value, entry = nil|
45
+ entry = @current_entry if entry.nil?
46
+ # find the rels file and search it if it exists
47
+ rels_name = Relationships.rels_entry_name_for(entry)
48
+ return unless @dom[rels_name]
49
+ #
50
+ @dom[rels_name].find_relationship_by(attribute, value)
51
+ end
52
+ end
53
+ end
54
+
55
+ def self.file_template
56
+ <<-XML.gsub(/^\s+|\n/, '')
57
+ <?xml version="1.0" encoding="UTF-8"?>
58
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
59
+ </Relationships>
60
+ XML
61
+ end
62
+
63
+ def self.rels_entry_name_for(entry_name)
64
+ par_dir = Pathname.new(File.dirname(entry_name))
65
+ par_dir.join('_rels', "#{File.basename(entry_name)}.rels").to_s
66
+ end
67
+
68
+ # Sets up the class instance to handle new relationships for a document.
69
+ # I only care about tags that have an integer component
70
+ def initialize(xml_node)
71
+ super
72
+ #
73
+ @relationships = xml_node.root
74
+ @max_rid = max_attribute_value('Relationship', 'Id')
75
+ end
76
+
77
+ # Finds the maximum value of an attribute by converting it to an
78
+ # integer. Non numeric portions of values are ignored.
79
+ def max_attribute_value(selector, attr_name)
80
+ super(@relationships, selector, attr_name, query_method: :css)
81
+ end
82
+
83
+ # adds a new relationship and returns the corresponding rId for it
84
+ def add_relationship(rel_attr)
85
+ rel_attr['Id'] = "rId#{next_rid}"
86
+ @relationships << relationship_tag(rel_attr)
87
+ #
88
+ rel_attr['Id']
89
+ end
90
+
91
+ # Reurns an XML node based on the attribute value or nil if one does
92
+ # not exist
93
+ def find_relationship_by(attribute, value)
94
+ @relationships.css(%(Relationship[#{attribute}="#{value}"])).first
95
+ end
96
+
97
+ private
98
+
99
+ # increments the max rid and returns it
100
+ def next_rid
101
+ @max_rid += 1
102
+ end
103
+
104
+ # Builds the relationship WordML tag and returns it
105
+ def relationship_tag(rel_attr)
106
+ attr_str = rel_attr.map { |k, v| %(#{k}="#{v}") }.join(' ')
107
+ "<Relationship #{attr_str}/>"
108
+ end
109
+ end
110
+ end
111
+ end
@@ -3,31 +3,28 @@ module Sablon
3
3
  # to manage data during template processing.
4
4
  class Environment
5
5
  attr_reader :template
6
- attr_reader :numbering
7
6
  attr_reader :context
8
- attr_reader :relationship
7
+ attr_reader :section_properties
9
8
 
10
9
  # returns a new environment with merged contexts
11
10
  def alter_context(context = {})
12
11
  new_context = @context.merge(context)
13
- Environment.new(nil, new_context, self)
12
+ Environment.new(template, new_context)
13
+ end
14
+
15
+ # reader method for the DOM::Model instance stored on the template
16
+ def document
17
+ @template.document
18
+ end
19
+
20
+ def section_properties=(properties)
21
+ @section_properties = Context.transform_hash(properties)
14
22
  end
15
23
 
16
24
  private
17
25
 
18
- def initialize(template, context = {}, parent_env = nil)
19
- # pass attributes of the supplied environment to the new one or
20
- # create new references
21
- if parent_env
22
- @template = parent_env.template
23
- @numbering = parent_env.numbering
24
- @relationship = parent_env.relationship
25
- else
26
- @template = template
27
- @numbering = Numbering.new
28
- @relationship = Relationship.new
29
- end
30
- #
26
+ def initialize(template, context = {})
27
+ @template = template
31
28
  @context = Context.transform_hash(context)
32
29
  end
33
30
  end
@@ -30,8 +30,11 @@ module Sablon
30
30
  # process the styles as a hash and store values
31
31
  style_attrs = {}
32
32
  properties.each do |key, value|
33
+ key = key.strip if key.respond_to? :strip
34
+ value = value.strip if value.respond_to? :strip
35
+ #
33
36
  unless key.is_a? Symbol
34
- key, value = *convert_style_property(key.strip, value.strip)
37
+ key, value = *convert_style_property(key, value)
35
38
  end
36
39
  style_attrs[key] = value if key
37
40
  end
@@ -127,8 +130,11 @@ module Sablon
127
130
  class Root < Collection
128
131
  def initialize(env, node)
129
132
  # strip text nodes from the root level element, these are typically
130
- # extra whitespace from indenting the markup
131
- node.search('./text()').remove
133
+ # extra whitespace from indenting the markup if there are any
134
+ # block level tags at the top level
135
+ if ASTBuilder.any_block_tags?(node.children)
136
+ node.search('./text()').remove
137
+ end
132
138
 
133
139
  # convert children from HTML to AST nodes
134
140
  super(ASTBuilder.html_to_ast(env, node.children, {}))
@@ -204,8 +210,8 @@ module Sablon
204
210
  #
205
211
  @definition = nil
206
212
  if node.ancestors(".//#{@list_tag}").length.zero?
207
- # Only register a definition when upon the first list tag encountered
208
- @definition = env.numbering.register(properties[:pStyle])
213
+ # Only register a definition upon the first list tag encountered
214
+ @definition = env.document.add_list_definition(properties['pStyle'])
209
215
  end
210
216
 
211
217
  # update attributes of all child nodes
@@ -214,10 +220,6 @@ module Sablon
214
220
  # Move any list tags that are a child of a list item up one level
215
221
  process_child_nodes(node)
216
222
 
217
- # strip text nodes from the list level element, this is typically
218
- # extra whitespace from indenting the markup
219
- node.search('./text()').remove
220
-
221
223
  # convert children from HTML to AST nodes
222
224
  super(ASTBuilder.html_to_ast(env, node.children, properties))
223
225
  end
@@ -532,14 +534,13 @@ module Sablon
532
534
  @runs = Collection.new(@runs)
533
535
  @target = node.attributes['href'].value
534
536
  #
535
- hyperlink_relation = {
536
- Id: 'rId' + SecureRandom.uuid.delete('-'),
537
+ rel_attr = {
537
538
  Type: 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink',
538
539
  Target: @target,
539
540
  TargetMode: 'External'
540
541
  }
541
- env.relationship.relationships << hyperlink_relation
542
- @attributes = { 'r:id' => hyperlink_relation[:Id] }
542
+ rid = env.document.add_relationship(rel_attr)
543
+ @attributes = { 'r:id' => rid }
543
544
  end
544
545
 
545
546
  def to_docx
@@ -9,6 +9,23 @@ module Sablon
9
9
  builder.nodes
10
10
  end
11
11
 
12
+ # Checks if there are any block level tags in the current node set
13
+ # this is used at the root level to determine if top level text nodes
14
+ # should be removed
15
+ def self.any_block_tags?(nodes)
16
+ nodes.detect { |node| fetch_tag(node.name).type == :block }
17
+ end
18
+
19
+ # Retrieves a HTMLTag instance from the permitted_html_tags hash or
20
+ # raises an ArgumentError if the tag is not registered
21
+ def self.fetch_tag(tag_name)
22
+ tag_name = tag_name.to_sym
23
+ unless Sablon::Configuration.instance.permitted_html_tags[tag_name]
24
+ raise ArgumentError, "Don't know how to handle HTML tag: #{tag_name}"
25
+ end
26
+ Sablon::Configuration.instance.permitted_html_tags[tag_name]
27
+ end
28
+
12
29
  private
13
30
 
14
31
  def initialize(env, nodes, properties)
@@ -42,11 +59,7 @@ module Sablon
42
59
  # retrieves a HTMLTag instance from the cpermitted_html_tags hash or
43
60
  # raises an ArgumentError if the tag is not registered in the hash
44
61
  def fetch_tag(tag_name)
45
- tag_name = tag_name.to_sym
46
- unless Sablon::Configuration.instance.permitted_html_tags[tag_name]
47
- raise ArgumentError, "Don't know how to handle HTML tag: #{tag_name}"
48
- end
49
- Sablon::Configuration.instance.permitted_html_tags[tag_name]
62
+ self.class.fetch_tag(tag_name)
50
63
  end
51
64
 
52
65
  # Checking that the current tag is an allowed child of the parent_tag.
@@ -35,11 +35,11 @@ module Sablon
35
35
  end
36
36
 
37
37
  def [](key)
38
- @properties[key]
38
+ @properties[key.to_sym]
39
39
  end
40
40
 
41
41
  def []=(key, value)
42
- @properties[key] = value
42
+ @properties[key.to_sym] = value
43
43
  end
44
44
 
45
45
  def to_docx
@@ -57,7 +57,7 @@ module Sablon
57
57
  #
58
58
  properties.each do |key, value|
59
59
  if whitelist.include? key.to_s
60
- @properties[key] = value
60
+ @properties[key.to_sym] = value
61
61
  else
62
62
  @transferred_properties[key] = value
63
63
  end
@@ -21,8 +21,36 @@ module Sablon
21
21
  iter_env = env.alter_context(iterator_name => item)
22
22
  block.process(iter_env)
23
23
  end
24
+ update_unique_ids(env, content)
24
25
  block.replace(content.reverse)
25
26
  end
27
+
28
+ private
29
+
30
+ # updates all unique id's present in the xml being copied
31
+ def update_unique_ids(env, content)
32
+ doc_xml = env.document.zip_contents[env.document.current_entry]
33
+ dom_entry = env.document[env.document.current_entry]
34
+ #
35
+ # update all docPr tags created
36
+ selector = "//*[local-name() = 'docPr']"
37
+ init_id_val = dom_entry.max_attribute_value(doc_xml, selector, 'id')
38
+ update_tag_attribute(content, 'docPr', 'id', init_id_val)
39
+ #
40
+ # update all cNvPr tags created
41
+ selector = "//*[local-name() = 'cNvPr']"
42
+ init_id_val = dom_entry.max_attribute_value(doc_xml, selector, 'id')
43
+ update_tag_attribute(content, 'cNvPr', 'id', init_id_val)
44
+ end
45
+
46
+ # Increments the attribute value of each element with the id by 1
47
+ def update_tag_attribute(content, tag_name, attr_name, init_val)
48
+ content.each do |nodeset|
49
+ nodeset.xpath(".//*[local-name() = '#{tag_name}']").each do |node|
50
+ node[attr_name] = (init_val += 1).to_s
51
+ end
52
+ end
53
+ end
26
54
  end
27
55
 
28
56
  class Condition < Struct.new(:conditon_expr, :block, :predicate)
@@ -50,6 +78,37 @@ module Sablon
50
78
  block.replace []
51
79
  end
52
80
  end
81
+
82
+ class Image < Struct.new(:image_reference, :block)
83
+ def evaluate(env)
84
+ image = image_reference.evaluate(env.context)
85
+ set_local_rid(env, image) if image
86
+ block.replace(image)
87
+ end
88
+
89
+ private
90
+
91
+ def set_local_rid(env, image)
92
+ if image.rid_by_file.keys.empty?
93
+ # Only add the image once, it is reused afterwards
94
+ rel_attr = {
95
+ Type: 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
96
+ }
97
+ rid = env.document.add_media(image.name, image.data, rel_attr)
98
+ image.rid_by_file[env.document.current_entry] = rid
99
+ elsif image.rid_by_file[env.document.current_entry].nil?
100
+ # locate an existing relationship and duplicate it
101
+ entry = image.rid_by_file.keys.first
102
+ value = image.rid_by_file[entry]
103
+ #
104
+ rel = env.document.find_relationship_by('Id', value, entry)
105
+ rid = env.document.add_relationship(rel.attributes)
106
+ image.rid_by_file[env.document.current_entry] = rid
107
+ end
108
+ #
109
+ image.local_rid = image.rid_by_file[env.document.current_entry]
110
+ end
111
+ end
53
112
  end
54
113
 
55
114
  module Expression
@@ -2,11 +2,9 @@
2
2
  module Sablon
3
3
  module Processor
4
4
  class Document
5
- def self.process(xml_node, env, properties = {})
5
+ def self.process(xml_node, env)
6
6
  processor = new(parser)
7
7
  processor.manipulate xml_node, env
8
- processor.write_properties xml_node, properties if properties.any?
9
- xml_node
10
8
  end
11
9
 
12
10
  def self.parser
@@ -26,14 +24,8 @@ module Sablon
26
24
  xml_node
27
25
  end
28
26
 
29
- def write_properties(xml_node, properties)
30
- if start_page_number = properties[:start_page_number] || properties["start_page_number"]
31
- section_properties = SectionProperties.from_document(xml_node)
32
- section_properties.start_page_number = start_page_number
33
- end
34
- end
35
-
36
27
  private
28
+
37
29
  def build_operations(fields)
38
30
  OperationConstruction.new(fields).operations
39
31
  end
@@ -51,7 +43,7 @@ module Sablon
51
43
 
52
44
  class Block < Struct.new(:start_field, :end_field)
53
45
  def self.enclosed_by(start_field, end_field)
54
- @blocks ||= [RowBlock, ParagraphBlock, InlineParagraphBlock]
46
+ @blocks ||= [ImageBlock, RowBlock, ParagraphBlock, InlineParagraphBlock]
55
47
  block_class = @blocks.detect { |klass| klass.encloses?(start_field, end_field) }
56
48
  block_class.new start_field, end_field
57
49
  end
@@ -117,6 +109,48 @@ module Sablon
117
109
  end
118
110
  end
119
111
 
112
+ class ImageBlock < ParagraphBlock
113
+ def self.parent(node)
114
+ node.ancestors(".//w:p").first
115
+ end
116
+
117
+ def self.encloses?(start_field, end_field)
118
+ start_field.expression.start_with?('@')
119
+ end
120
+
121
+ def replace(image)
122
+ #
123
+ if image
124
+ nodes_between_fields.each do |node|
125
+ pic_prop = node.at_xpath('.//pic:cNvPr', pic: 'http://schemas.openxmlformats.org/drawingml/2006/picture')
126
+ pic_prop.attributes['name'].value = image.name if pic_prop
127
+ blip = node.at_xpath('.//a:blip', a: 'http://schemas.openxmlformats.org/drawingml/2006/main')
128
+ blip.attributes['embed'].value = image.local_rid if blip
129
+ end
130
+ end
131
+ #
132
+ start_field.remove
133
+ end_field.remove
134
+ end
135
+
136
+ private
137
+
138
+ # Collects all nodes between the two nodes provided into an array.
139
+ # Each entry in the array should be a paragraph tag.
140
+ # https://stackoverflow.com/a/820776
141
+ def nodes_between_fields
142
+ first = self.class.parent(start_field)
143
+ last = self.class.parent(end_field)
144
+ #
145
+ result = [first]
146
+ until first == last
147
+ first = first.next
148
+ result << first
149
+ end
150
+ result
151
+ end
152
+ end
153
+
120
154
  class InlineParagraphBlock < Block
121
155
  def self.parent(node)
122
156
  node.ancestors ".//w:p"
@@ -171,6 +205,9 @@ module Sablon
171
205
  when /([^ ]+):if/
172
206
  block = consume_block("#{$1}:endIf")
173
207
  Statement::Condition.new(Expression.parse($1), block)
208
+ when /^@([^ ]+):start/
209
+ block = consume_block("@#{$1}:end")
210
+ Statement::Image.new(Expression.parse($1), block)
174
211
  when /^comment$/
175
212
  block = consume_block("endComment")
176
213
  Statement::Comment.new(block)