sablon 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/Gemfile.lock +9 -9
  4. data/README.md +120 -11
  5. data/lib/sablon.rb +7 -1
  6. data/lib/sablon/configuration/configuration.rb +165 -0
  7. data/lib/sablon/configuration/html_tag.rb +99 -0
  8. data/lib/sablon/content.rb +12 -9
  9. data/lib/sablon/context.rb +27 -20
  10. data/lib/sablon/environment.rb +31 -0
  11. data/lib/sablon/html/ast.rb +290 -75
  12. data/lib/sablon/html/ast_builder.rb +90 -0
  13. data/lib/sablon/html/converter.rb +3 -123
  14. data/lib/sablon/numbering.rb +0 -5
  15. data/lib/sablon/operations.rb +11 -11
  16. data/lib/sablon/parser/mail_merge.rb +7 -6
  17. data/lib/sablon/processor/document.rb +9 -9
  18. data/lib/sablon/processor/numbering.rb +4 -4
  19. data/lib/sablon/template.rb +5 -4
  20. data/lib/sablon/version.rb +1 -1
  21. data/sablon.gemspec +3 -3
  22. data/test/configuration_test.rb +122 -0
  23. data/test/content_test.rb +7 -6
  24. data/test/context_test.rb +11 -11
  25. data/test/environment_test.rb +27 -0
  26. data/test/expression_test.rb +2 -2
  27. data/test/fixtures/html/html_test_content.html +174 -0
  28. data/test/fixtures/html_sample.docx +0 -0
  29. data/test/fixtures/xml/comment_block_and_comment_as_key.xml +31 -0
  30. data/test/html/ast_builder_test.rb +65 -0
  31. data/test/html/ast_test.rb +117 -0
  32. data/test/html/converter_test.rb +386 -87
  33. data/test/html/node_properties_test.rb +113 -0
  34. data/test/html_test.rb +10 -10
  35. data/test/mail_merge_parser_test.rb +3 -2
  36. data/test/processor/document_test.rb +20 -2
  37. data/test/section_properties_test.rb +1 -1
  38. data/test/support/html_snippets.rb +9 -0
  39. data/test/test_helper.rb +0 -1
  40. metadata +27 -7
Binary file
@@ -0,0 +1,31 @@
1
+ <w:r><w:t xml:space="preserve">Before </w:t></w:r>
2
+ <w:p>
3
+ <w:fldSimple w:instr=" MERGEFIELD comment \* MERGEFORMAT ">
4
+ <w:r>
5
+ <w:rPr><w:noProof/></w:rPr>
6
+ <w:t>«comment»</w:t>
7
+ </w:r>
8
+ </w:fldSimple>
9
+ </w:p>
10
+ <w:p>
11
+ <w:r>
12
+ <w:t>Inside Comment! </w:t>
13
+ </w:r>
14
+ </w:p>
15
+ <w:p>
16
+ <w:fldSimple w:instr=" MERGEFIELD endComment \* MERGEFORMAT ">
17
+ <w:r>
18
+ <w:rPr><w:noProof/></w:rPr>
19
+ <w:t>«endComment»</w:t>
20
+ </w:r>
21
+ </w:fldSimple>
22
+ </w:p>
23
+ <w:p>
24
+ <w:fldSimple w:instr=" MERGEFIELD =comment \* MERGEFORMAT ">
25
+ <w:r w:rsidR="004B49F0">
26
+ <w:rPr><w:noProof/></w:rPr>
27
+ <w:t>«=comment»</w:t>
28
+ </w:r>
29
+ </w:fldSimple>
30
+ </w:p>
31
+ <w:r><w:t xml:space="preserve">After </w:t></w:r>
@@ -0,0 +1,65 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "test_helper"
3
+
4
+ # Tests some low level private methods in the ASTBuilder class. #process_nodes
5
+ # and self.html_to_ast are covered extensively in converter_test.rb
6
+ class HTMLConverterASTBuilderTest < Sablon::TestCase
7
+ def setup
8
+ super
9
+ @env = Sablon::Environment.new(nil)
10
+ end
11
+
12
+ def test_fetch_tag
13
+ @bulider = new_builder
14
+ tag = Sablon::Configuration.instance.permitted_html_tags[:span]
15
+ assert_equal @bulider.send(:fetch_tag, :span), tag
16
+ # check that strings are converted into symbols
17
+ assert_equal @bulider.send(:fetch_tag, 'span'), tag
18
+ # test uknown tag raises error
19
+ e = assert_raises ArgumentError do
20
+ @bulider.send(:fetch_tag, :unknown_tag)
21
+ end
22
+ assert_equal "Don't know how to handle HTML tag: unknown_tag", e.message
23
+ end
24
+
25
+ def test_validate_structure
26
+ @bulider = new_builder
27
+ root = Sablon::Configuration.instance.permitted_html_tags['#document-fragment'.to_sym]
28
+ div = Sablon::Configuration.instance.permitted_html_tags[:div]
29
+ span = Sablon::Configuration.instance.permitted_html_tags[:span]
30
+ # test valid relationship
31
+ assert_nil @bulider.send(:validate_structure, div, span)
32
+ # test inverted relationship
33
+ e = assert_raises ArgumentError do
34
+ @bulider.send(:validate_structure, span, div)
35
+ end
36
+ assert_equal "Invalid HTML structure: div is not a valid child element of span.", e.message
37
+ # test inline tag with no parent
38
+ e = assert_raises ArgumentError do
39
+ @bulider.send(:validate_structure, root, span)
40
+ end
41
+ assert_equal "Invalid HTML structure: span needs to be wrapped in a block level tag.", e.message
42
+ end
43
+
44
+ def test_merge_properties
45
+ @builder = new_builder
46
+ node = Nokogiri::HTML.fragment('<span style="color: #F00; text-decoration: underline wavy">Test</span>').children[0]
47
+ tag = Struct.new(:properties).new(rStyle: 'Normal')
48
+ # test that properties are merged across all three arguments
49
+ props = @builder.send(:merge_node_properties, node, tag, 'background-color' => '#00F')
50
+ assert_equal({ 'background-color' => '#00F', rStyle: 'Normal', 'color' => '#F00', 'text-decoration' => 'underline wavy' }, props)
51
+ # test that parent properties are overriden by tag properties
52
+ props = @builder.send(:merge_node_properties, node, tag, rStyle: 'Citation', 'background-color' => '#00F')
53
+ assert_equal({ 'background-color' => '#00F', rStyle: 'Normal', 'color' => '#F00', 'text-decoration' => 'underline wavy' }, props)
54
+ # test that inline properties override parent styles
55
+ node = Nokogiri::HTML.fragment('<span style="color: #F00">Test</span>').children[0]
56
+ props = @builder.send(:merge_node_properties, node, tag, 'color' => '#00F')
57
+ assert_equal({ rStyle: 'Normal', 'color' => '#F00' }, props)
58
+ end
59
+
60
+ private
61
+
62
+ def new_builder(nodes = [], properties = {})
63
+ Sablon::HTMLConverter::ASTBuilder.new(@env, nodes, properties)
64
+ end
65
+ end
@@ -0,0 +1,117 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "test_helper"
3
+
4
+ class HTMLConverterASTTest < Sablon::TestCase
5
+ def setup
6
+ super
7
+ @converter = Sablon::HTMLConverter.new
8
+ @converter.instance_variable_set(:@env, Sablon::Environment.new(nil))
9
+ end
10
+
11
+ def test_div
12
+ input = '<div>Lorem ipsum dolor sit amet</div>'
13
+ ast = @converter.processed_ast(input)
14
+ assert_equal '<Root: [<Paragraph{Normal}: [<Run{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
15
+ end
16
+
17
+ def test_p
18
+ input = '<p>Lorem ipsum dolor sit amet</p>'
19
+ ast = @converter.processed_ast(input)
20
+ assert_equal '<Root: [<Paragraph{Paragraph}: [<Run{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
21
+ end
22
+
23
+ def test_b
24
+ input = '<p>Lorem <b>ipsum dolor sit amet</b></p>'
25
+ ast = @converter.processed_ast(input)
26
+ assert_equal '<Root: [<Paragraph{Paragraph}: [<Run{}: Lorem >, <Run{b}: ipsum dolor sit amet>]>]>', ast.inspect
27
+ end
28
+
29
+ def test_i
30
+ input = '<p>Lorem <i>ipsum dolor sit amet</i></p>'
31
+ ast = @converter.processed_ast(input)
32
+ assert_equal '<Root: [<Paragraph{Paragraph}: [<Run{}: Lorem >, <Run{i}: ipsum dolor sit amet>]>]>', ast.inspect
33
+ end
34
+
35
+ def test_br_in_strong
36
+ input = '<div><strong>Lorem<br />ipsum<br />dolor</strong></div>'
37
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
38
+ assert_equal "[<Run{b}: Lorem>, <Newline>, <Run{b}: ipsum>, <Newline>, <Run{b}: dolor>]", par.runs.inspect
39
+ end
40
+
41
+ def test_br_in_em
42
+ input = '<div><em>Lorem<br />ipsum<br />dolor</em></div>'
43
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
44
+ assert_equal "[<Run{i}: Lorem>, <Newline>, <Run{i}: ipsum>, <Newline>, <Run{i}: dolor>]", par.runs.inspect
45
+ end
46
+
47
+ def test_nested_strong_and_em
48
+ input = '<div><strong>Lorem <em>ipsum</em> dolor</strong></div>'
49
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
50
+ assert_equal "[<Run{b}: Lorem >, <Run{b;i}: ipsum>, <Run{b}: dolor>]", par.runs.inspect
51
+ end
52
+
53
+ def test_ignore_last_br_in_div
54
+ input = '<div>Lorem ipsum dolor sit amet<br /></div>'
55
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
56
+ assert_equal "[<Run{}: Lorem ipsum dolor sit amet>]", par.runs.inspect
57
+ end
58
+
59
+ def test_ignore_br_in_blank_div
60
+ input = '<div><br /></div>'
61
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
62
+ assert_equal "[]", par.runs.inspect
63
+ end
64
+
65
+ def test_headings
66
+ input = '<h1>First</h1><h2>Second</h2><h3>Third</h3>'
67
+ ast = @converter.processed_ast(input)
68
+ assert_equal "<Root: [<Paragraph{Heading1}: [<Run{}: First>]>, <Paragraph{Heading2}: [<Run{}: Second>]>, <Paragraph{Heading3}: [<Run{}: Third>]>]>", ast.inspect
69
+ end
70
+
71
+ def test_h_with_formatting
72
+ input = '<h1><strong>Lorem</strong> ipsum dolor <em>sit <u>amet</u></em></h1>'
73
+ ast = @converter.processed_ast(input)
74
+ assert_equal "<Root: [<Paragraph{Heading1}: [<Run{b}: Lorem>, <Run{}: ipsum dolor >, <Run{i}: sit >, <Run{i;u=single}: amet>]>]>", ast.inspect
75
+ end
76
+
77
+ def test_ul
78
+ input = '<ul><li>Lorem</li><li>ipsum</li></ul>'
79
+ ast = @converter.processed_ast(input)
80
+ assert_equal "<Root: [<List: [<Paragraph{ListBullet}: [<Run{}: Lorem>]>, <Paragraph{ListBullet}: [<Run{}: ipsum>]>]>]>", ast.inspect
81
+ end
82
+
83
+ def test_ol
84
+ input = '<ol><li>Lorem</li><li>ipsum</li></ol>'
85
+ ast = @converter.processed_ast(input)
86
+ assert_equal "<Root: [<List: [<Paragraph{ListNumber}: [<Run{}: Lorem>]>, <Paragraph{ListNumber}: [<Run{}: ipsum>]>]>]>", ast.inspect
87
+ end
88
+
89
+ def test_num_id
90
+ ast = @converter.processed_ast('<ol><li>Some</li><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li><li>sit</li></ol>')
91
+ assert_equal %w[1001 1001 1002 1003 1003], get_numpr_prop_from_ast(ast, :numId)
92
+ end
93
+
94
+ def test_nested_lists_have_the_same_numid
95
+ ast = @converter.processed_ast('<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>')
96
+ assert_equal %w[1001 1001 1001], get_numpr_prop_from_ast(ast, :numId)
97
+ end
98
+
99
+ def test_keep_nested_list_order
100
+ input = '<ul><li>1<ul><li>1.1<ul><li>1.1.1</li></ul></li><li>1.2</li></ul></li><li>2<ul><li>1.3<ul><li>1.3.1</li></ul></li></ul></li></ul>'
101
+ ast = @converter.processed_ast(input)
102
+ assert_equal %w[1001], get_numpr_prop_from_ast(ast, :numId).uniq
103
+ assert_equal %w[0 1 2 1 0 1 2], get_numpr_prop_from_ast(ast, :ilvl)
104
+ end
105
+
106
+ private
107
+
108
+ # returns the numid attribute from paragraphs
109
+ def get_numpr_prop_from_ast(ast, key)
110
+ values = []
111
+ ast.grep(Sablon::HTMLConverter::ListParagraph).each do |para|
112
+ numpr = para.instance_variable_get('@properties')[:numPr]
113
+ numpr.each { |val| values.push(val[key]) if val[key] }
114
+ end
115
+ values
116
+ end
117
+ end
@@ -4,6 +4,8 @@ require "test_helper"
4
4
  class HTMLConverterTest < Sablon::TestCase
5
5
  def setup
6
6
  super
7
+ @env = Sablon::Environment.new(nil)
8
+ @numbering = @env.numbering
7
9
  @converter = Sablon::HTMLConverter.new
8
10
  end
9
11
 
@@ -15,7 +17,7 @@ class HTMLConverterTest < Sablon::TestCase
15
17
  <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
16
18
  </w:p>
17
19
  DOCX
18
- assert_equal normalize_wordml(expected_output), @converter.process(input)
20
+ assert_equal normalize_wordml(expected_output), process(input)
19
21
  end
20
22
 
21
23
  def test_convert_text_inside_p
@@ -26,7 +28,7 @@ DOCX
26
28
  <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
27
29
  </w:p>
28
30
  DOCX
29
- assert_equal normalize_wordml(expected_output), @converter.process(input)
31
+ assert_equal normalize_wordml(expected_output), process(input)
30
32
  end
31
33
 
32
34
  def test_convert_text_inside_multiple_divs
@@ -41,7 +43,7 @@ DOCX
41
43
  <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
42
44
  </w:p>
43
45
  DOCX
44
- assert_equal normalize_wordml(expected_output), @converter.process(input)
46
+ assert_equal normalize_wordml(expected_output), process(input)
45
47
  end
46
48
 
47
49
  def test_convert_newline_inside_div
@@ -54,7 +56,7 @@ DOCX
54
56
  <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
55
57
  </w:p>
56
58
  DOCX
57
- assert_equal normalize_wordml(expected_output), @converter.process(input)
59
+ assert_equal normalize_wordml(expected_output), process(input)
58
60
  end
59
61
 
60
62
  def test_convert_strong_tags_inside_div
@@ -67,23 +69,36 @@ DOCX
67
69
  <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
68
70
  </w:p>
69
71
  DOCX
70
- assert_equal normalize_wordml(expected_output), @converter.process(input)
72
+ assert_equal normalize_wordml(expected_output), process(input)
73
+ end
74
+
75
+ def test_convert_span_tags_inside_p
76
+ input = '<p>Lorem&nbsp;<span>ipsum dolor</span>&nbsp;sit amet</p>'
77
+ expected_output = <<-DOCX.strip
78
+ <w:p>
79
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
80
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
81
+ <w:r><w:t xml:space="preserve">ipsum dolor</w:t></w:r>
82
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r></w:p>
83
+ DOCX
84
+
85
+ assert_equal normalize_wordml(expected_output), process(input)
71
86
  end
72
87
 
73
88
  def test_convert_u_tags_inside_p
74
- input = '<p>Lorem&nbsp;<u>ipsum dolor</u>&nbsp;sit amet</div>'
89
+ input = '<p>Lorem&nbsp;<u>ipsum dolor</u>&nbsp;sit amet</p>'
75
90
  expected_output = <<-DOCX.strip
76
91
  <w:p>
77
92
  <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
78
93
  <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
79
94
  <w:r>
80
- <w:rPr><w:u w:val="single"/></w:rPr>
95
+ <w:rPr><w:u w:val="single" /></w:rPr>
81
96
  <w:t xml:space="preserve">ipsum dolor</w:t>
82
97
  </w:r>
83
98
  <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
84
99
  </w:p>
85
100
  DOCX
86
- assert_equal normalize_wordml(expected_output), @converter.process(input)
101
+ assert_equal normalize_wordml(expected_output), process(input)
87
102
  end
88
103
 
89
104
  def test_convert_em_tags_inside_div
@@ -96,7 +111,55 @@ DOCX
96
111
  <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
97
112
  </w:p>
98
113
  DOCX
99
- assert_equal normalize_wordml(expected_output), @converter.process(input)
114
+ assert_equal normalize_wordml(expected_output), process(input)
115
+ end
116
+
117
+ def test_convert_s_tags_inside_p
118
+ input = '<p>Lorem&nbsp;<s>ipsum dolor</s>&nbsp;sit amet</p>'
119
+ expected_output = <<-DOCX.strip
120
+ <w:p>
121
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
122
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
123
+ <w:r>
124
+ <w:rPr><w:strike w:val="true" /></w:rPr>
125
+ <w:t xml:space="preserve">ipsum dolor</w:t>
126
+ </w:r>
127
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
128
+ </w:p>
129
+ DOCX
130
+ assert_equal normalize_wordml(expected_output), process(input)
131
+ end
132
+
133
+ def test_convert_sub_tags_inside_p
134
+ input = '<p>Lorem&nbsp;<sub>ipsum dolor</sub>&nbsp;sit amet</p>'
135
+ expected_output = <<-DOCX.strip
136
+ <w:p>
137
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
138
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
139
+ <w:r>
140
+ <w:rPr><w:vertAlign w:val="subscript" /></w:rPr>
141
+ <w:t xml:space="preserve">ipsum dolor</w:t>
142
+ </w:r>
143
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
144
+ </w:p>
145
+ DOCX
146
+ assert_equal normalize_wordml(expected_output), process(input)
147
+ end
148
+
149
+ def test_convert_sup_tags_inside_p
150
+ input = '<p>Lorem&nbsp;<sup>ipsum dolor</sup>&nbsp;sit amet</p>'
151
+ expected_output = <<-DOCX.strip
152
+ <w:p>
153
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
154
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
155
+ <w:r>
156
+ <w:rPr><w:vertAlign w:val="superscript" /></w:rPr>
157
+ <w:t xml:space="preserve">ipsum dolor</w:t>
158
+ </w:r>
159
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
160
+ </w:p>
161
+ DOCX
162
+ assert_equal normalize_wordml(expected_output), process(input)
100
163
  end
101
164
 
102
165
  def test_convert_br_tags_inside_strong
@@ -115,7 +178,7 @@ DOCX
115
178
  </w:r>
116
179
  </w:p>
117
180
  DOCX
118
- assert_equal normalize_wordml(expected_output), @converter.process(input)
181
+ assert_equal normalize_wordml(expected_output), process(input)
119
182
  end
120
183
 
121
184
  def test_convert_h1
@@ -126,7 +189,7 @@ DOCX
126
189
  <w:r><w:t xml:space="preserve">Lorem ipsum dolor</w:t></w:r>
127
190
  </w:p>
128
191
  DOCX
129
- assert_equal normalize_wordml(expected_output), @converter.process(input)
192
+ assert_equal normalize_wordml(expected_output), process(input)
130
193
  end
131
194
 
132
195
  def test_unorderd_lists
@@ -165,9 +228,9 @@ DOCX
165
228
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
166
229
  </w:p>
167
230
  DOCX
168
- assert_equal normalize_wordml(expected_output), @converter.process(input)
231
+ assert_equal normalize_wordml(expected_output), process(input)
169
232
 
170
- assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
233
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], @numbering.definitions
171
234
  end
172
235
 
173
236
  def test_ordered_lists
@@ -206,9 +269,9 @@ DOCX
206
269
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
207
270
  </w:p>
208
271
  DOCX
209
- assert_equal normalize_wordml(expected_output), @converter.process(input)
272
+ assert_equal normalize_wordml(expected_output), process(input)
210
273
 
211
- assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber')], Sablon::Numbering.instance.definitions
274
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber')], @numbering.definitions
212
275
  end
213
276
 
214
277
  def test_mixed_lists
@@ -247,11 +310,11 @@ DOCX
247
310
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
248
311
  </w:p>
249
312
  DOCX
250
- assert_equal normalize_wordml(expected_output), @converter.process(input)
313
+ assert_equal normalize_wordml(expected_output), process(input)
251
314
 
252
315
  assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber'),
253
316
  Sablon::Numbering::Definition.new(1002, 'ListBullet'),
254
- Sablon::Numbering::Definition.new(1003, 'ListNumber')], Sablon::Numbering.instance.definitions
317
+ Sablon::Numbering::Definition.new(1003, 'ListNumber')], @numbering.definitions
255
318
  end
256
319
 
257
320
  def test_nested_unordered_lists
@@ -290,116 +353,352 @@ DOCX
290
353
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
291
354
  </w:p>
292
355
  DOCX
293
- assert_equal normalize_wordml(expected_output), @converter.process(input)
356
+ assert_equal normalize_wordml(expected_output), process(input)
357
+
358
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], @numbering.definitions
359
+ end
294
360
 
295
- assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
361
+ def test_unknown_tag
362
+ e = assert_raises ArgumentError do
363
+ process('<badtag/>')
364
+ end
365
+ assert_match(/Don't know how to handle HTML tag:/, e.message)
296
366
  end
297
367
 
298
368
  private
369
+
370
+ def process(input)
371
+ @converter.process(input, @env)
372
+ end
373
+
299
374
  def normalize_wordml(wordml)
300
375
  wordml.gsub(/^\s+/, '').tr("\n", '')
301
376
  end
302
377
  end
303
378
 
304
- class HTMLConverterASTTest < Sablon::TestCase
379
+ class HTMLConverterStyleTest < Sablon::TestCase
305
380
  def setup
306
381
  super
382
+ @env = Sablon::Environment.new(nil)
307
383
  @converter = Sablon::HTMLConverter.new
308
384
  end
309
385
 
310
- def test_div
311
- input = '<div>Lorem ipsum dolor sit amet</div>'
312
- ast = @converter.processed_ast(input)
313
- assert_equal '<Root: [<Paragraph{Normal}: [<Text{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
314
- end
386
+ # testing direct CSS style -> WordML conversion for paragraphs
387
+
388
+ def test_paragraph_with_background_color
389
+ input = '<p style="background-color: #123456"></p>'
390
+ expected_output = para_with_ppr('<w:shd w:val="clear" w:fill="123456" />')
391
+ assert_equal normalize_wordml(expected_output), process(input)
392
+ end
393
+
394
+ def test_paragraph_with_borders
395
+ # Basic single line black border
396
+ input = '<p style="border: 1px"></p>'
397
+ ppr = <<-DOCX.strip
398
+ <w:pBdr>
399
+ <w:top w:sz="2" w:val="single" w:color="000000" />
400
+ <w:bottom w:sz="2" w:val="single" w:color="000000" />
401
+ <w:left w:sz="2" w:val="single" w:color="000000" />
402
+ <w:right w:sz="2" w:val="single" w:color="000000" />
403
+ </w:pBdr>
404
+ DOCX
405
+ expected_output = para_with_ppr(ppr)
406
+ assert_equal normalize_wordml(expected_output), process(input)
407
+ # border with a line style
408
+ input = '<p style="border: 1px wavy"></p>'
409
+ ppr = <<-DOCX.strip
410
+ <w:pBdr>
411
+ <w:top w:sz="2" w:val="wavy" w:color="000000" />
412
+ <w:bottom w:sz="2" w:val="wavy" w:color="000000" />
413
+ <w:left w:sz="2" w:val="wavy" w:color="000000" />
414
+ <w:right w:sz="2" w:val="wavy" w:color="000000" />
415
+ </w:pBdr>
416
+ DOCX
417
+ expected_output = para_with_ppr(ppr)
418
+ assert_equal normalize_wordml(expected_output), process(input)
419
+ # border with line style and color
420
+ input = '<p style="border: 1px wavy #123456"></p>'
421
+ ppr = <<-DOCX.strip
422
+ <w:pBdr>
423
+ <w:top w:sz="2" w:val="wavy" w:color="123456" />
424
+ <w:bottom w:sz="2" w:val="wavy" w:color="123456" />
425
+ <w:left w:sz="2" w:val="wavy" w:color="123456" />
426
+ <w:right w:sz="2" w:val="wavy" w:color="123456" />
427
+ </w:pBdr>
428
+ DOCX
429
+ expected_output = para_with_ppr(ppr)
430
+ assert_equal normalize_wordml(expected_output), process(input)
431
+ end
432
+
433
+ def test_paragraph_with_text_align
434
+ input = '<p style="text-align: both"></p>'
435
+ expected_output = para_with_ppr('<w:jc w:val="both" />')
436
+ assert_equal normalize_wordml(expected_output), process(input)
437
+ end
315
438
 
316
- def test_p
317
- input = '<p>Lorem ipsum dolor sit amet</p>'
318
- ast = @converter.processed_ast(input)
319
- assert_equal '<Root: [<Paragraph{Paragraph}: [<Text{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
439
+ def test_paragraph_with_vertical_align
440
+ input = '<p style="vertical-align: baseline"></p>'
441
+ expected_output = para_with_ppr('<w:textAlignment w:val="baseline" />')
442
+ assert_equal normalize_wordml(expected_output), process(input)
320
443
  end
321
444
 
322
- def test_b
323
- input = '<p>Lorem <b>ipsum dolor sit amet</b></p>'
324
- ast = @converter.processed_ast(input)
325
- assert_equal '<Root: [<Paragraph{Paragraph}: [<Text{}: Lorem >, <Text{bold}: ipsum dolor sit amet>]>]>', ast.inspect
445
+ def test_paragraph_with_unsupported_property
446
+ input = '<p style="unsupported: true"></p>'
447
+ expected_output = para_with_ppr('')
448
+ assert_equal normalize_wordml(expected_output), process(input)
326
449
  end
327
450
 
328
- def test_i
329
- input = '<p>Lorem <i>ipsum dolor sit amet</i></p>'
330
- ast = @converter.processed_ast(input)
331
- assert_equal '<Root: [<Paragraph{Paragraph}: [<Text{}: Lorem >, <Text{italic}: ipsum dolor sit amet>]>]>', ast.inspect
451
+ def test_run_with_background_color
452
+ input = '<p><span style="background-color: #123456">test</span></p>'
453
+ expected_output = run_with_rpr('<w:shd w:val="clear" w:fill="123456" />')
454
+ assert_equal normalize_wordml(expected_output), process(input)
332
455
  end
333
456
 
334
- def test_br_in_strong
335
- input = '<div><strong>Lorem<br />ipsum<br />dolor</strong></div>'
336
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
337
- assert_equal "[<Text{bold}: Lorem>, <Newline>, <Text{bold}: ipsum>, <Newline>, <Text{bold}: dolor>]", par.runs.inspect
457
+ def test_run_with_color
458
+ input = '<p><span style="color: #123456">test</span></p>'
459
+ expected_output = run_with_rpr('<w:color w:val="123456" />')
460
+ assert_equal normalize_wordml(expected_output), process(input)
338
461
  end
339
462
 
340
- def test_br_in_em
341
- input = '<div><em>Lorem<br />ipsum<br />dolor</em></div>'
342
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
343
- assert_equal "[<Text{italic}: Lorem>, <Newline>, <Text{italic}: ipsum>, <Newline>, <Text{italic}: dolor>]", par.runs.inspect
344
- end
463
+ def test_run_with_font_size
464
+ input = '<p><span style="font-size: 20">test</span></p>'
465
+ expected_output = run_with_rpr('<w:sz w:val="40" />')
466
+ assert_equal normalize_wordml(expected_output), process(input)
345
467
 
346
- def test_nested_strong_and_em
347
- input = '<div><strong>Lorem <em>ipsum</em> dolor</strong></div>'
348
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
349
- assert_equal "[<Text{bold}: Lorem >, <Text{bold|italic}: ipsum>, <Text{bold}: dolor>]", par.runs.inspect
350
- end
468
+ # test that non-numeric are ignored
469
+ input = '<p><span style="font-size: 20pts">test</span></p>'
470
+ assert_equal normalize_wordml(expected_output), process(input)
351
471
 
352
- def test_ignore_last_br_in_div
353
- input = '<div>Lorem ipsum dolor sit amet<br /></div>'
354
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
355
- assert_equal "[<Text{}: Lorem ipsum dolor sit amet>]", par.runs.inspect
472
+ # test that floats round up
473
+ input = '<p><span style="font-size: 19.1pts">test</span></p>'
474
+ assert_equal normalize_wordml(expected_output), process(input)
356
475
  end
357
476
 
358
- def test_ignore_br_in_blank_div
359
- input = '<div><br /></div>'
360
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
361
- assert_equal "[]", par.runs.inspect
477
+ def test_run_with_font_style
478
+ input = '<p><span style="font-style: bold">test</span></p>'
479
+ expected_output = run_with_rpr('<w:b />')
480
+ assert_equal normalize_wordml(expected_output), process(input)
481
+
482
+ # test that non-numeric are ignored
483
+ input = '<p><span style="font-style: italic">test</span></p>'
484
+ expected_output = run_with_rpr('<w:i />')
485
+ assert_equal normalize_wordml(expected_output), process(input)
486
+ end
487
+
488
+ def test_run_with_font_wieght
489
+ input = '<p><span style="font-weight: bold">test</span></p>'
490
+ expected_output = run_with_rpr('<w:b />')
491
+ assert_equal normalize_wordml(expected_output), process(input)
492
+ end
493
+
494
+ def test_run_with_text_decoration
495
+ # testing underline configurations
496
+ input = '<p><span style="text-decoration: underline">test</span></p>'
497
+ expected_output = run_with_rpr('<w:u w:val="single" />')
498
+ assert_equal normalize_wordml(expected_output), process(input)
499
+
500
+ input = '<p><span style="text-decoration: underline dash">test</span></p>'
501
+ expected_output = run_with_rpr('<w:u w:val="dash" w:color="auto" />')
502
+ assert_equal normalize_wordml(expected_output), process(input)
503
+
504
+ input = '<p><span style="text-decoration: underline dash #123456">test</span></p>'
505
+ expected_output = run_with_rpr('<w:u w:val="dash" w:color="123456" />')
506
+ assert_equal normalize_wordml(expected_output), process(input)
507
+
508
+ # testing line-through
509
+ input = '<p><span style="text-decoration: line-through">test</span></p>'
510
+ expected_output = run_with_rpr('<w:strike w:val="true" />')
511
+ assert_equal normalize_wordml(expected_output), process(input)
512
+
513
+ # testing that unsupported values are passed through as a toggle
514
+ input = '<p><span style="text-decoration: strike">test</span></p>'
515
+ expected_output = run_with_rpr('<w:strike w:val="true" />')
516
+ assert_equal normalize_wordml(expected_output), process(input)
517
+
518
+ input = '<p><span style="text-decoration: emboss">test</span></p>'
519
+ expected_output = run_with_rpr('<w:emboss w:val="true" />')
520
+ assert_equal normalize_wordml(expected_output), process(input)
362
521
  end
363
522
 
364
- def test_headings
365
- input = '<h1>First</h1><h2>Second</h2><h3>Third</h3>'
366
- ast = @converter.processed_ast(input)
367
- assert_equal "<Root: [<Paragraph{Heading1}: [<Text{}: First>]>, <Paragraph{Heading2}: [<Text{}: Second>]>, <Paragraph{Heading3}: [<Text{}: Third>]>]>", ast.inspect
523
+ def test_run_with_vertical_align
524
+ input = '<p><span style="vertical-align: subscript">test</span></p>'
525
+ expected_output = run_with_rpr('<w:vertAlign w:val="subscript" />')
526
+ assert_equal normalize_wordml(expected_output), process(input)
527
+
528
+ input = '<p><span style="vertical-align: superscript">test</span></p>'
529
+ expected_output = run_with_rpr('<w:vertAlign w:val="superscript" />')
530
+ assert_equal normalize_wordml(expected_output), process(input)
368
531
  end
369
532
 
370
- def test_h_with_formatting
371
- input = '<h1><strong>Lorem</strong> ipsum dolor <em>sit <u>amet</u></em></h1>'
372
- ast = @converter.processed_ast(input)
373
- assert_equal "<Root: [<Paragraph{Heading1}: [<Text{bold}: Lorem>, <Text{}: ipsum dolor >, <Text{italic}: sit >, <Text{italic|underline}: amet>]>]>", ast.inspect
533
+ def test_run_with_unsupported_property
534
+ input = '<p><span style="unsupported: true">test</span></p>'
535
+ expected_output = '<w:p><w:pPr><w:pStyle w:val="Paragraph" /></w:pPr><w:r><w:t xml:space="preserve">test</w:t></w:r></w:p>'
536
+ assert_equal normalize_wordml(expected_output), process(input)
374
537
  end
375
538
 
376
- def test_ul
377
- input = '<ul><li>Lorem</li><li>ipsum</li></ul>'
378
- ast = @converter.processed_ast(input)
379
- assert_equal "<Root: [<Paragraph{ListBullet}: [<Text{}: Lorem>]>, <Paragraph{ListBullet}: [<Text{}: ipsum>]>]>", ast.inspect
539
+ # tests with nested runs and styles
540
+
541
+ def test_paragraph_props_passed_to_runs
542
+ input = '<p style="color: #123456"><b>Lorem</b><span>ipsum</span></p>'
543
+ expected_output = <<-DOCX.strip
544
+ <w:p>
545
+ <w:pPr>
546
+ <w:pStyle w:val="Paragraph" />
547
+ </w:pPr>
548
+ <w:r>
549
+ <w:rPr>
550
+ <w:color w:val="123456" />
551
+ <w:b />
552
+ </w:rPr>
553
+ <w:t xml:space="preserve">Lorem</w:t>
554
+ </w:r>
555
+ <w:r>
556
+ <w:rPr>
557
+ <w:color w:val="123456" />
558
+ </w:rPr>
559
+ <w:t xml:space="preserve">ipsum</w:t>
560
+ </w:r>
561
+ </w:p>
562
+ DOCX
563
+ assert_equal normalize_wordml(expected_output), process(input)
564
+ end
565
+
566
+ def test_run_prop_override_paragraph_prop
567
+ input = '<p style="text-align: center; color: #FF0000">Lorem<span style="color: blue;">ipsum</span></p>'
568
+ expected_output = <<-DOCX.strip
569
+ <w:p>
570
+ <w:pPr>
571
+ <w:pStyle w:val="Paragraph" />
572
+ <w:jc w:val="center" />
573
+ </w:pPr>
574
+ <w:r>
575
+ <w:rPr>
576
+ <w:color w:val="FF0000" />
577
+ </w:rPr>
578
+ <w:t xml:space="preserve">Lorem</w:t>
579
+ </w:r>
580
+ <w:r>
581
+ <w:rPr>
582
+ <w:color w:val="blue" />
583
+ </w:rPr>
584
+ <w:t xml:space="preserve">ipsum</w:t>
585
+ </w:r>
586
+ </w:p>
587
+ DOCX
588
+ assert_equal normalize_wordml(expected_output), process(input)
589
+ end
590
+
591
+ def test_inline_style_overrides_tag_style
592
+ # Note: a toggle property can not be removed once it becomes a symbol
593
+ # unless there is a specific CSS style that will set it to false. This
594
+ # is because CSS styles can only override parent properties not remove them.
595
+ input = '<p><u style="text-decoration: underline wavyDouble">test</u></p>'
596
+ expected_output = run_with_rpr('<w:u w:val="wavyDouble" w:color="auto" />')
597
+ assert_equal normalize_wordml(expected_output), process(input)
598
+ end
599
+
600
+ def test_conversion_of_a_registered_tag_without_ast_class
601
+ # This registers a new tag with the configuration object and then trys
602
+ # to convert it
603
+ Sablon.configure do |config|
604
+ config.register_html_tag(:bgcyan, :inline, properties: { 'highlight' => { val: 'cyan' } })
605
+ end
606
+ #
607
+ input = '<p><bgcyan>test</bgcyan></p>'
608
+ expected_output = run_with_rpr('<w:highlight w:val="cyan" />')
609
+ assert_equal normalize_wordml(expected_output), process(input)
610
+
611
+ # remove the tag to avoid any accidental side effects
612
+ Sablon.configure do |config|
613
+ config.remove_html_tag(:bgcyan)
614
+ end
615
+ end
616
+
617
+ def test_conversion_of_a_registered_tag_with_ast_class
618
+ Sablon.configure do |config|
619
+ # create the AST class and then pass it onto the register tag method
620
+ ast_class = Class.new(Sablon::HTMLConverter::Node) do
621
+ def self.name
622
+ 'TestInstr'
623
+ end
624
+
625
+ def initialize(_env, node, _properties)
626
+ @content = node.text
627
+ end
628
+
629
+ def inspect
630
+ @content
631
+ end
632
+
633
+ def to_docx
634
+ "<w:instrText xml:space=\"preserve\"> #{@content} </w:instrText>"
635
+ end
636
+ end
637
+ #
638
+ config.register_html_tag(:test_instr, :inline, ast_class: ast_class)
639
+ end
640
+ #
641
+ input = '<p><test_instr>test</test_instr></p>'
642
+ expected_output = <<-DOCX.strip
643
+ <w:p>
644
+ <w:pPr>
645
+ <w:pStyle w:val="Paragraph" />
646
+ </w:pPr>
647
+ <w:instrText xml:space="preserve"> test </w:instrText>
648
+ </w:p>
649
+ DOCX
650
+ assert_equal normalize_wordml(expected_output), process(input)
651
+
652
+ # remove the tag to avoid any accidental side effects
653
+ Sablon.configure do |config|
654
+ config.remove_html_tag(:test_instr)
655
+ end
656
+ end
657
+
658
+ def test_conversion_of_registered_style_attribute
659
+ Sablon.configure do |config|
660
+ converter = ->(v) { return :highlight, v }
661
+ config.register_style_converter(:run, 'test-highlight', converter)
662
+ end
663
+ #
664
+ input = '<p><span style="test-highlight: green">test</span></p>'
665
+ expected_output = run_with_rpr('<w:highlight w:val="green" />')
666
+ assert_equal normalize_wordml(expected_output), process(input)
667
+ #
668
+ Sablon.configure do |config|
669
+ config.remove_style_converter(:run, 'test-highlight')
670
+ end
380
671
  end
381
672
 
382
- def test_ol
383
- input = '<ol><li>Lorem</li><li>ipsum</li></ol>'
384
- ast = @converter.processed_ast(input)
385
- assert_equal "<Root: [<Paragraph{ListNumber}: [<Text{}: Lorem>]>, <Paragraph{ListNumber}: [<Text{}: ipsum>]>]>", ast.inspect
673
+ private
674
+
675
+ def process(input)
676
+ @converter.process(input, @env)
386
677
  end
387
678
 
388
- def test_num_id
389
- ast = @converter.processed_ast('<ol><li>Some</li><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li><li>sit</li></ol>')
390
- assert_equal [1001, 1001, 1002, 1003, 1003], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
679
+ def para_with_ppr(ppr_str)
680
+ para_str = '<w:p><w:pPr><w:pStyle w:val="Paragraph" />%s</w:pPr></w:p>'
681
+ format(para_str, ppr_str)
391
682
  end
392
683
 
393
- def test_nested_lists_have_the_same_numid
394
- ast = @converter.processed_ast('<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>')
395
- assert_equal [1001, 1001, 1001], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
684
+ def run_with_rpr(rpr_str)
685
+ para_str = <<-DOCX.strip
686
+ <w:p>
687
+ <w:pPr>
688
+ <w:pStyle w:val="Paragraph" />
689
+ </w:pPr>
690
+ <w:r>
691
+ <w:rPr>
692
+ %s
693
+ </w:rPr>
694
+ <w:t xml:space="preserve">test</w:t>
695
+ </w:r>
696
+ </w:p>
697
+ DOCX
698
+ format(para_str, rpr_str)
396
699
  end
397
700
 
398
- def test_keep_nested_list_order
399
- input = '<ul><li>1<ul><li>1.1<ul><li>1.1.1</li></ul></li><li>1.2</li></ul></li><li>2<ul><li>1.3<ul><li>1.3.1</li></ul></li></ul></li></ul>'
400
- ast = @converter.processed_ast(input)
401
- list_p = ast.grep(Sablon::HTMLConverter::ListParagraph)
402
- assert_equal [1001], list_p.map(&:numid).uniq
403
- assert_equal [0, 1, 2, 1, 0, 1, 2], list_p.map(&:ilvl)
701
+ def normalize_wordml(wordml)
702
+ wordml.gsub(/^\s+/, '').tr("\n", '')
404
703
  end
405
704
  end