sablon 0.0.21 → 0.0.22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/Gemfile.lock +9 -9
  4. data/README.md +120 -11
  5. data/lib/sablon.rb +7 -1
  6. data/lib/sablon/configuration/configuration.rb +165 -0
  7. data/lib/sablon/configuration/html_tag.rb +99 -0
  8. data/lib/sablon/content.rb +12 -9
  9. data/lib/sablon/context.rb +27 -20
  10. data/lib/sablon/environment.rb +31 -0
  11. data/lib/sablon/html/ast.rb +290 -75
  12. data/lib/sablon/html/ast_builder.rb +90 -0
  13. data/lib/sablon/html/converter.rb +3 -123
  14. data/lib/sablon/numbering.rb +0 -5
  15. data/lib/sablon/operations.rb +11 -11
  16. data/lib/sablon/parser/mail_merge.rb +7 -6
  17. data/lib/sablon/processor/document.rb +9 -9
  18. data/lib/sablon/processor/numbering.rb +4 -4
  19. data/lib/sablon/template.rb +5 -4
  20. data/lib/sablon/version.rb +1 -1
  21. data/sablon.gemspec +3 -3
  22. data/test/configuration_test.rb +122 -0
  23. data/test/content_test.rb +7 -6
  24. data/test/context_test.rb +11 -11
  25. data/test/environment_test.rb +27 -0
  26. data/test/expression_test.rb +2 -2
  27. data/test/fixtures/html/html_test_content.html +174 -0
  28. data/test/fixtures/html_sample.docx +0 -0
  29. data/test/fixtures/xml/comment_block_and_comment_as_key.xml +31 -0
  30. data/test/html/ast_builder_test.rb +65 -0
  31. data/test/html/ast_test.rb +117 -0
  32. data/test/html/converter_test.rb +386 -87
  33. data/test/html/node_properties_test.rb +113 -0
  34. data/test/html_test.rb +10 -10
  35. data/test/mail_merge_parser_test.rb +3 -2
  36. data/test/processor/document_test.rb +20 -2
  37. data/test/section_properties_test.rb +1 -1
  38. data/test/support/html_snippets.rb +9 -0
  39. data/test/test_helper.rb +0 -1
  40. metadata +27 -7
Binary file
@@ -0,0 +1,31 @@
1
+ <w:r><w:t xml:space="preserve">Before </w:t></w:r>
2
+ <w:p>
3
+ <w:fldSimple w:instr=" MERGEFIELD comment \* MERGEFORMAT ">
4
+ <w:r>
5
+ <w:rPr><w:noProof/></w:rPr>
6
+ <w:t>«comment»</w:t>
7
+ </w:r>
8
+ </w:fldSimple>
9
+ </w:p>
10
+ <w:p>
11
+ <w:r>
12
+ <w:t>Inside Comment! </w:t>
13
+ </w:r>
14
+ </w:p>
15
+ <w:p>
16
+ <w:fldSimple w:instr=" MERGEFIELD endComment \* MERGEFORMAT ">
17
+ <w:r>
18
+ <w:rPr><w:noProof/></w:rPr>
19
+ <w:t>«endComment»</w:t>
20
+ </w:r>
21
+ </w:fldSimple>
22
+ </w:p>
23
+ <w:p>
24
+ <w:fldSimple w:instr=" MERGEFIELD =comment \* MERGEFORMAT ">
25
+ <w:r w:rsidR="004B49F0">
26
+ <w:rPr><w:noProof/></w:rPr>
27
+ <w:t>«=comment»</w:t>
28
+ </w:r>
29
+ </w:fldSimple>
30
+ </w:p>
31
+ <w:r><w:t xml:space="preserve">After </w:t></w:r>
@@ -0,0 +1,65 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "test_helper"
3
+
4
+ # Tests some low level private methods in the ASTBuilder class. #process_nodes
5
+ # and self.html_to_ast are covered extensively in converter_test.rb
6
+ class HTMLConverterASTBuilderTest < Sablon::TestCase
7
+ def setup
8
+ super
9
+ @env = Sablon::Environment.new(nil)
10
+ end
11
+
12
+ def test_fetch_tag
13
+ @bulider = new_builder
14
+ tag = Sablon::Configuration.instance.permitted_html_tags[:span]
15
+ assert_equal @bulider.send(:fetch_tag, :span), tag
16
+ # check that strings are converted into symbols
17
+ assert_equal @bulider.send(:fetch_tag, 'span'), tag
18
+ # test uknown tag raises error
19
+ e = assert_raises ArgumentError do
20
+ @bulider.send(:fetch_tag, :unknown_tag)
21
+ end
22
+ assert_equal "Don't know how to handle HTML tag: unknown_tag", e.message
23
+ end
24
+
25
+ def test_validate_structure
26
+ @bulider = new_builder
27
+ root = Sablon::Configuration.instance.permitted_html_tags['#document-fragment'.to_sym]
28
+ div = Sablon::Configuration.instance.permitted_html_tags[:div]
29
+ span = Sablon::Configuration.instance.permitted_html_tags[:span]
30
+ # test valid relationship
31
+ assert_nil @bulider.send(:validate_structure, div, span)
32
+ # test inverted relationship
33
+ e = assert_raises ArgumentError do
34
+ @bulider.send(:validate_structure, span, div)
35
+ end
36
+ assert_equal "Invalid HTML structure: div is not a valid child element of span.", e.message
37
+ # test inline tag with no parent
38
+ e = assert_raises ArgumentError do
39
+ @bulider.send(:validate_structure, root, span)
40
+ end
41
+ assert_equal "Invalid HTML structure: span needs to be wrapped in a block level tag.", e.message
42
+ end
43
+
44
+ def test_merge_properties
45
+ @builder = new_builder
46
+ node = Nokogiri::HTML.fragment('<span style="color: #F00; text-decoration: underline wavy">Test</span>').children[0]
47
+ tag = Struct.new(:properties).new(rStyle: 'Normal')
48
+ # test that properties are merged across all three arguments
49
+ props = @builder.send(:merge_node_properties, node, tag, 'background-color' => '#00F')
50
+ assert_equal({ 'background-color' => '#00F', rStyle: 'Normal', 'color' => '#F00', 'text-decoration' => 'underline wavy' }, props)
51
+ # test that parent properties are overriden by tag properties
52
+ props = @builder.send(:merge_node_properties, node, tag, rStyle: 'Citation', 'background-color' => '#00F')
53
+ assert_equal({ 'background-color' => '#00F', rStyle: 'Normal', 'color' => '#F00', 'text-decoration' => 'underline wavy' }, props)
54
+ # test that inline properties override parent styles
55
+ node = Nokogiri::HTML.fragment('<span style="color: #F00">Test</span>').children[0]
56
+ props = @builder.send(:merge_node_properties, node, tag, 'color' => '#00F')
57
+ assert_equal({ rStyle: 'Normal', 'color' => '#F00' }, props)
58
+ end
59
+
60
+ private
61
+
62
+ def new_builder(nodes = [], properties = {})
63
+ Sablon::HTMLConverter::ASTBuilder.new(@env, nodes, properties)
64
+ end
65
+ end
@@ -0,0 +1,117 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "test_helper"
3
+
4
+ class HTMLConverterASTTest < Sablon::TestCase
5
+ def setup
6
+ super
7
+ @converter = Sablon::HTMLConverter.new
8
+ @converter.instance_variable_set(:@env, Sablon::Environment.new(nil))
9
+ end
10
+
11
+ def test_div
12
+ input = '<div>Lorem ipsum dolor sit amet</div>'
13
+ ast = @converter.processed_ast(input)
14
+ assert_equal '<Root: [<Paragraph{Normal}: [<Run{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
15
+ end
16
+
17
+ def test_p
18
+ input = '<p>Lorem ipsum dolor sit amet</p>'
19
+ ast = @converter.processed_ast(input)
20
+ assert_equal '<Root: [<Paragraph{Paragraph}: [<Run{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
21
+ end
22
+
23
+ def test_b
24
+ input = '<p>Lorem <b>ipsum dolor sit amet</b></p>'
25
+ ast = @converter.processed_ast(input)
26
+ assert_equal '<Root: [<Paragraph{Paragraph}: [<Run{}: Lorem >, <Run{b}: ipsum dolor sit amet>]>]>', ast.inspect
27
+ end
28
+
29
+ def test_i
30
+ input = '<p>Lorem <i>ipsum dolor sit amet</i></p>'
31
+ ast = @converter.processed_ast(input)
32
+ assert_equal '<Root: [<Paragraph{Paragraph}: [<Run{}: Lorem >, <Run{i}: ipsum dolor sit amet>]>]>', ast.inspect
33
+ end
34
+
35
+ def test_br_in_strong
36
+ input = '<div><strong>Lorem<br />ipsum<br />dolor</strong></div>'
37
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
38
+ assert_equal "[<Run{b}: Lorem>, <Newline>, <Run{b}: ipsum>, <Newline>, <Run{b}: dolor>]", par.runs.inspect
39
+ end
40
+
41
+ def test_br_in_em
42
+ input = '<div><em>Lorem<br />ipsum<br />dolor</em></div>'
43
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
44
+ assert_equal "[<Run{i}: Lorem>, <Newline>, <Run{i}: ipsum>, <Newline>, <Run{i}: dolor>]", par.runs.inspect
45
+ end
46
+
47
+ def test_nested_strong_and_em
48
+ input = '<div><strong>Lorem <em>ipsum</em> dolor</strong></div>'
49
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
50
+ assert_equal "[<Run{b}: Lorem >, <Run{b;i}: ipsum>, <Run{b}: dolor>]", par.runs.inspect
51
+ end
52
+
53
+ def test_ignore_last_br_in_div
54
+ input = '<div>Lorem ipsum dolor sit amet<br /></div>'
55
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
56
+ assert_equal "[<Run{}: Lorem ipsum dolor sit amet>]", par.runs.inspect
57
+ end
58
+
59
+ def test_ignore_br_in_blank_div
60
+ input = '<div><br /></div>'
61
+ par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
62
+ assert_equal "[]", par.runs.inspect
63
+ end
64
+
65
+ def test_headings
66
+ input = '<h1>First</h1><h2>Second</h2><h3>Third</h3>'
67
+ ast = @converter.processed_ast(input)
68
+ assert_equal "<Root: [<Paragraph{Heading1}: [<Run{}: First>]>, <Paragraph{Heading2}: [<Run{}: Second>]>, <Paragraph{Heading3}: [<Run{}: Third>]>]>", ast.inspect
69
+ end
70
+
71
+ def test_h_with_formatting
72
+ input = '<h1><strong>Lorem</strong> ipsum dolor <em>sit <u>amet</u></em></h1>'
73
+ ast = @converter.processed_ast(input)
74
+ assert_equal "<Root: [<Paragraph{Heading1}: [<Run{b}: Lorem>, <Run{}: ipsum dolor >, <Run{i}: sit >, <Run{i;u=single}: amet>]>]>", ast.inspect
75
+ end
76
+
77
+ def test_ul
78
+ input = '<ul><li>Lorem</li><li>ipsum</li></ul>'
79
+ ast = @converter.processed_ast(input)
80
+ assert_equal "<Root: [<List: [<Paragraph{ListBullet}: [<Run{}: Lorem>]>, <Paragraph{ListBullet}: [<Run{}: ipsum>]>]>]>", ast.inspect
81
+ end
82
+
83
+ def test_ol
84
+ input = '<ol><li>Lorem</li><li>ipsum</li></ol>'
85
+ ast = @converter.processed_ast(input)
86
+ assert_equal "<Root: [<List: [<Paragraph{ListNumber}: [<Run{}: Lorem>]>, <Paragraph{ListNumber}: [<Run{}: ipsum>]>]>]>", ast.inspect
87
+ end
88
+
89
+ def test_num_id
90
+ ast = @converter.processed_ast('<ol><li>Some</li><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li><li>sit</li></ol>')
91
+ assert_equal %w[1001 1001 1002 1003 1003], get_numpr_prop_from_ast(ast, :numId)
92
+ end
93
+
94
+ def test_nested_lists_have_the_same_numid
95
+ ast = @converter.processed_ast('<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>')
96
+ assert_equal %w[1001 1001 1001], get_numpr_prop_from_ast(ast, :numId)
97
+ end
98
+
99
+ def test_keep_nested_list_order
100
+ input = '<ul><li>1<ul><li>1.1<ul><li>1.1.1</li></ul></li><li>1.2</li></ul></li><li>2<ul><li>1.3<ul><li>1.3.1</li></ul></li></ul></li></ul>'
101
+ ast = @converter.processed_ast(input)
102
+ assert_equal %w[1001], get_numpr_prop_from_ast(ast, :numId).uniq
103
+ assert_equal %w[0 1 2 1 0 1 2], get_numpr_prop_from_ast(ast, :ilvl)
104
+ end
105
+
106
+ private
107
+
108
+ # returns the numid attribute from paragraphs
109
+ def get_numpr_prop_from_ast(ast, key)
110
+ values = []
111
+ ast.grep(Sablon::HTMLConverter::ListParagraph).each do |para|
112
+ numpr = para.instance_variable_get('@properties')[:numPr]
113
+ numpr.each { |val| values.push(val[key]) if val[key] }
114
+ end
115
+ values
116
+ end
117
+ end
@@ -4,6 +4,8 @@ require "test_helper"
4
4
  class HTMLConverterTest < Sablon::TestCase
5
5
  def setup
6
6
  super
7
+ @env = Sablon::Environment.new(nil)
8
+ @numbering = @env.numbering
7
9
  @converter = Sablon::HTMLConverter.new
8
10
  end
9
11
 
@@ -15,7 +17,7 @@ class HTMLConverterTest < Sablon::TestCase
15
17
  <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
16
18
  </w:p>
17
19
  DOCX
18
- assert_equal normalize_wordml(expected_output), @converter.process(input)
20
+ assert_equal normalize_wordml(expected_output), process(input)
19
21
  end
20
22
 
21
23
  def test_convert_text_inside_p
@@ -26,7 +28,7 @@ DOCX
26
28
  <w:r><w:t xml:space="preserve">Lorem ipsum dolor sit amet</w:t></w:r>
27
29
  </w:p>
28
30
  DOCX
29
- assert_equal normalize_wordml(expected_output), @converter.process(input)
31
+ assert_equal normalize_wordml(expected_output), process(input)
30
32
  end
31
33
 
32
34
  def test_convert_text_inside_multiple_divs
@@ -41,7 +43,7 @@ DOCX
41
43
  <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
42
44
  </w:p>
43
45
  DOCX
44
- assert_equal normalize_wordml(expected_output), @converter.process(input)
46
+ assert_equal normalize_wordml(expected_output), process(input)
45
47
  end
46
48
 
47
49
  def test_convert_newline_inside_div
@@ -54,7 +56,7 @@ DOCX
54
56
  <w:r><w:t xml:space="preserve">dolor sit amet</w:t></w:r>
55
57
  </w:p>
56
58
  DOCX
57
- assert_equal normalize_wordml(expected_output), @converter.process(input)
59
+ assert_equal normalize_wordml(expected_output), process(input)
58
60
  end
59
61
 
60
62
  def test_convert_strong_tags_inside_div
@@ -67,23 +69,36 @@ DOCX
67
69
  <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
68
70
  </w:p>
69
71
  DOCX
70
- assert_equal normalize_wordml(expected_output), @converter.process(input)
72
+ assert_equal normalize_wordml(expected_output), process(input)
73
+ end
74
+
75
+ def test_convert_span_tags_inside_p
76
+ input = '<p>Lorem&nbsp;<span>ipsum dolor</span>&nbsp;sit amet</p>'
77
+ expected_output = <<-DOCX.strip
78
+ <w:p>
79
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
80
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
81
+ <w:r><w:t xml:space="preserve">ipsum dolor</w:t></w:r>
82
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r></w:p>
83
+ DOCX
84
+
85
+ assert_equal normalize_wordml(expected_output), process(input)
71
86
  end
72
87
 
73
88
  def test_convert_u_tags_inside_p
74
- input = '<p>Lorem&nbsp;<u>ipsum dolor</u>&nbsp;sit amet</div>'
89
+ input = '<p>Lorem&nbsp;<u>ipsum dolor</u>&nbsp;sit amet</p>'
75
90
  expected_output = <<-DOCX.strip
76
91
  <w:p>
77
92
  <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
78
93
  <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
79
94
  <w:r>
80
- <w:rPr><w:u w:val="single"/></w:rPr>
95
+ <w:rPr><w:u w:val="single" /></w:rPr>
81
96
  <w:t xml:space="preserve">ipsum dolor</w:t>
82
97
  </w:r>
83
98
  <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
84
99
  </w:p>
85
100
  DOCX
86
- assert_equal normalize_wordml(expected_output), @converter.process(input)
101
+ assert_equal normalize_wordml(expected_output), process(input)
87
102
  end
88
103
 
89
104
  def test_convert_em_tags_inside_div
@@ -96,7 +111,55 @@ DOCX
96
111
  <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
97
112
  </w:p>
98
113
  DOCX
99
- assert_equal normalize_wordml(expected_output), @converter.process(input)
114
+ assert_equal normalize_wordml(expected_output), process(input)
115
+ end
116
+
117
+ def test_convert_s_tags_inside_p
118
+ input = '<p>Lorem&nbsp;<s>ipsum dolor</s>&nbsp;sit amet</p>'
119
+ expected_output = <<-DOCX.strip
120
+ <w:p>
121
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
122
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
123
+ <w:r>
124
+ <w:rPr><w:strike w:val="true" /></w:rPr>
125
+ <w:t xml:space="preserve">ipsum dolor</w:t>
126
+ </w:r>
127
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
128
+ </w:p>
129
+ DOCX
130
+ assert_equal normalize_wordml(expected_output), process(input)
131
+ end
132
+
133
+ def test_convert_sub_tags_inside_p
134
+ input = '<p>Lorem&nbsp;<sub>ipsum dolor</sub>&nbsp;sit amet</p>'
135
+ expected_output = <<-DOCX.strip
136
+ <w:p>
137
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
138
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
139
+ <w:r>
140
+ <w:rPr><w:vertAlign w:val="subscript" /></w:rPr>
141
+ <w:t xml:space="preserve">ipsum dolor</w:t>
142
+ </w:r>
143
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
144
+ </w:p>
145
+ DOCX
146
+ assert_equal normalize_wordml(expected_output), process(input)
147
+ end
148
+
149
+ def test_convert_sup_tags_inside_p
150
+ input = '<p>Lorem&nbsp;<sup>ipsum dolor</sup>&nbsp;sit amet</p>'
151
+ expected_output = <<-DOCX.strip
152
+ <w:p>
153
+ <w:pPr><w:pStyle w:val="Paragraph" /></w:pPr>
154
+ <w:r><w:t xml:space="preserve">Lorem </w:t></w:r>
155
+ <w:r>
156
+ <w:rPr><w:vertAlign w:val="superscript" /></w:rPr>
157
+ <w:t xml:space="preserve">ipsum dolor</w:t>
158
+ </w:r>
159
+ <w:r><w:t xml:space="preserve"> sit amet</w:t></w:r>
160
+ </w:p>
161
+ DOCX
162
+ assert_equal normalize_wordml(expected_output), process(input)
100
163
  end
101
164
 
102
165
  def test_convert_br_tags_inside_strong
@@ -115,7 +178,7 @@ DOCX
115
178
  </w:r>
116
179
  </w:p>
117
180
  DOCX
118
- assert_equal normalize_wordml(expected_output), @converter.process(input)
181
+ assert_equal normalize_wordml(expected_output), process(input)
119
182
  end
120
183
 
121
184
  def test_convert_h1
@@ -126,7 +189,7 @@ DOCX
126
189
  <w:r><w:t xml:space="preserve">Lorem ipsum dolor</w:t></w:r>
127
190
  </w:p>
128
191
  DOCX
129
- assert_equal normalize_wordml(expected_output), @converter.process(input)
192
+ assert_equal normalize_wordml(expected_output), process(input)
130
193
  end
131
194
 
132
195
  def test_unorderd_lists
@@ -165,9 +228,9 @@ DOCX
165
228
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
166
229
  </w:p>
167
230
  DOCX
168
- assert_equal normalize_wordml(expected_output), @converter.process(input)
231
+ assert_equal normalize_wordml(expected_output), process(input)
169
232
 
170
- assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
233
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], @numbering.definitions
171
234
  end
172
235
 
173
236
  def test_ordered_lists
@@ -206,9 +269,9 @@ DOCX
206
269
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
207
270
  </w:p>
208
271
  DOCX
209
- assert_equal normalize_wordml(expected_output), @converter.process(input)
272
+ assert_equal normalize_wordml(expected_output), process(input)
210
273
 
211
- assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber')], Sablon::Numbering.instance.definitions
274
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber')], @numbering.definitions
212
275
  end
213
276
 
214
277
  def test_mixed_lists
@@ -247,11 +310,11 @@ DOCX
247
310
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
248
311
  </w:p>
249
312
  DOCX
250
- assert_equal normalize_wordml(expected_output), @converter.process(input)
313
+ assert_equal normalize_wordml(expected_output), process(input)
251
314
 
252
315
  assert_equal [Sablon::Numbering::Definition.new(1001, 'ListNumber'),
253
316
  Sablon::Numbering::Definition.new(1002, 'ListBullet'),
254
- Sablon::Numbering::Definition.new(1003, 'ListNumber')], Sablon::Numbering.instance.definitions
317
+ Sablon::Numbering::Definition.new(1003, 'ListNumber')], @numbering.definitions
255
318
  end
256
319
 
257
320
  def test_nested_unordered_lists
@@ -290,116 +353,352 @@ DOCX
290
353
  <w:r><w:t xml:space="preserve">dolor</w:t></w:r>
291
354
  </w:p>
292
355
  DOCX
293
- assert_equal normalize_wordml(expected_output), @converter.process(input)
356
+ assert_equal normalize_wordml(expected_output), process(input)
357
+
358
+ assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], @numbering.definitions
359
+ end
294
360
 
295
- assert_equal [Sablon::Numbering::Definition.new(1001, 'ListBullet')], Sablon::Numbering.instance.definitions
361
+ def test_unknown_tag
362
+ e = assert_raises ArgumentError do
363
+ process('<badtag/>')
364
+ end
365
+ assert_match(/Don't know how to handle HTML tag:/, e.message)
296
366
  end
297
367
 
298
368
  private
369
+
370
+ def process(input)
371
+ @converter.process(input, @env)
372
+ end
373
+
299
374
  def normalize_wordml(wordml)
300
375
  wordml.gsub(/^\s+/, '').tr("\n", '')
301
376
  end
302
377
  end
303
378
 
304
- class HTMLConverterASTTest < Sablon::TestCase
379
+ class HTMLConverterStyleTest < Sablon::TestCase
305
380
  def setup
306
381
  super
382
+ @env = Sablon::Environment.new(nil)
307
383
  @converter = Sablon::HTMLConverter.new
308
384
  end
309
385
 
310
- def test_div
311
- input = '<div>Lorem ipsum dolor sit amet</div>'
312
- ast = @converter.processed_ast(input)
313
- assert_equal '<Root: [<Paragraph{Normal}: [<Text{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
314
- end
386
+ # testing direct CSS style -> WordML conversion for paragraphs
387
+
388
+ def test_paragraph_with_background_color
389
+ input = '<p style="background-color: #123456"></p>'
390
+ expected_output = para_with_ppr('<w:shd w:val="clear" w:fill="123456" />')
391
+ assert_equal normalize_wordml(expected_output), process(input)
392
+ end
393
+
394
+ def test_paragraph_with_borders
395
+ # Basic single line black border
396
+ input = '<p style="border: 1px"></p>'
397
+ ppr = <<-DOCX.strip
398
+ <w:pBdr>
399
+ <w:top w:sz="2" w:val="single" w:color="000000" />
400
+ <w:bottom w:sz="2" w:val="single" w:color="000000" />
401
+ <w:left w:sz="2" w:val="single" w:color="000000" />
402
+ <w:right w:sz="2" w:val="single" w:color="000000" />
403
+ </w:pBdr>
404
+ DOCX
405
+ expected_output = para_with_ppr(ppr)
406
+ assert_equal normalize_wordml(expected_output), process(input)
407
+ # border with a line style
408
+ input = '<p style="border: 1px wavy"></p>'
409
+ ppr = <<-DOCX.strip
410
+ <w:pBdr>
411
+ <w:top w:sz="2" w:val="wavy" w:color="000000" />
412
+ <w:bottom w:sz="2" w:val="wavy" w:color="000000" />
413
+ <w:left w:sz="2" w:val="wavy" w:color="000000" />
414
+ <w:right w:sz="2" w:val="wavy" w:color="000000" />
415
+ </w:pBdr>
416
+ DOCX
417
+ expected_output = para_with_ppr(ppr)
418
+ assert_equal normalize_wordml(expected_output), process(input)
419
+ # border with line style and color
420
+ input = '<p style="border: 1px wavy #123456"></p>'
421
+ ppr = <<-DOCX.strip
422
+ <w:pBdr>
423
+ <w:top w:sz="2" w:val="wavy" w:color="123456" />
424
+ <w:bottom w:sz="2" w:val="wavy" w:color="123456" />
425
+ <w:left w:sz="2" w:val="wavy" w:color="123456" />
426
+ <w:right w:sz="2" w:val="wavy" w:color="123456" />
427
+ </w:pBdr>
428
+ DOCX
429
+ expected_output = para_with_ppr(ppr)
430
+ assert_equal normalize_wordml(expected_output), process(input)
431
+ end
432
+
433
+ def test_paragraph_with_text_align
434
+ input = '<p style="text-align: both"></p>'
435
+ expected_output = para_with_ppr('<w:jc w:val="both" />')
436
+ assert_equal normalize_wordml(expected_output), process(input)
437
+ end
315
438
 
316
- def test_p
317
- input = '<p>Lorem ipsum dolor sit amet</p>'
318
- ast = @converter.processed_ast(input)
319
- assert_equal '<Root: [<Paragraph{Paragraph}: [<Text{}: Lorem ipsum dolor sit amet>]>]>', ast.inspect
439
+ def test_paragraph_with_vertical_align
440
+ input = '<p style="vertical-align: baseline"></p>'
441
+ expected_output = para_with_ppr('<w:textAlignment w:val="baseline" />')
442
+ assert_equal normalize_wordml(expected_output), process(input)
320
443
  end
321
444
 
322
- def test_b
323
- input = '<p>Lorem <b>ipsum dolor sit amet</b></p>'
324
- ast = @converter.processed_ast(input)
325
- assert_equal '<Root: [<Paragraph{Paragraph}: [<Text{}: Lorem >, <Text{bold}: ipsum dolor sit amet>]>]>', ast.inspect
445
+ def test_paragraph_with_unsupported_property
446
+ input = '<p style="unsupported: true"></p>'
447
+ expected_output = para_with_ppr('')
448
+ assert_equal normalize_wordml(expected_output), process(input)
326
449
  end
327
450
 
328
- def test_i
329
- input = '<p>Lorem <i>ipsum dolor sit amet</i></p>'
330
- ast = @converter.processed_ast(input)
331
- assert_equal '<Root: [<Paragraph{Paragraph}: [<Text{}: Lorem >, <Text{italic}: ipsum dolor sit amet>]>]>', ast.inspect
451
+ def test_run_with_background_color
452
+ input = '<p><span style="background-color: #123456">test</span></p>'
453
+ expected_output = run_with_rpr('<w:shd w:val="clear" w:fill="123456" />')
454
+ assert_equal normalize_wordml(expected_output), process(input)
332
455
  end
333
456
 
334
- def test_br_in_strong
335
- input = '<div><strong>Lorem<br />ipsum<br />dolor</strong></div>'
336
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
337
- assert_equal "[<Text{bold}: Lorem>, <Newline>, <Text{bold}: ipsum>, <Newline>, <Text{bold}: dolor>]", par.runs.inspect
457
+ def test_run_with_color
458
+ input = '<p><span style="color: #123456">test</span></p>'
459
+ expected_output = run_with_rpr('<w:color w:val="123456" />')
460
+ assert_equal normalize_wordml(expected_output), process(input)
338
461
  end
339
462
 
340
- def test_br_in_em
341
- input = '<div><em>Lorem<br />ipsum<br />dolor</em></div>'
342
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
343
- assert_equal "[<Text{italic}: Lorem>, <Newline>, <Text{italic}: ipsum>, <Newline>, <Text{italic}: dolor>]", par.runs.inspect
344
- end
463
+ def test_run_with_font_size
464
+ input = '<p><span style="font-size: 20">test</span></p>'
465
+ expected_output = run_with_rpr('<w:sz w:val="40" />')
466
+ assert_equal normalize_wordml(expected_output), process(input)
345
467
 
346
- def test_nested_strong_and_em
347
- input = '<div><strong>Lorem <em>ipsum</em> dolor</strong></div>'
348
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
349
- assert_equal "[<Text{bold}: Lorem >, <Text{bold|italic}: ipsum>, <Text{bold}: dolor>]", par.runs.inspect
350
- end
468
+ # test that non-numeric are ignored
469
+ input = '<p><span style="font-size: 20pts">test</span></p>'
470
+ assert_equal normalize_wordml(expected_output), process(input)
351
471
 
352
- def test_ignore_last_br_in_div
353
- input = '<div>Lorem ipsum dolor sit amet<br /></div>'
354
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
355
- assert_equal "[<Text{}: Lorem ipsum dolor sit amet>]", par.runs.inspect
472
+ # test that floats round up
473
+ input = '<p><span style="font-size: 19.1pts">test</span></p>'
474
+ assert_equal normalize_wordml(expected_output), process(input)
356
475
  end
357
476
 
358
- def test_ignore_br_in_blank_div
359
- input = '<div><br /></div>'
360
- par = @converter.processed_ast(input).grep(Sablon::HTMLConverter::Paragraph).first
361
- assert_equal "[]", par.runs.inspect
477
+ def test_run_with_font_style
478
+ input = '<p><span style="font-style: bold">test</span></p>'
479
+ expected_output = run_with_rpr('<w:b />')
480
+ assert_equal normalize_wordml(expected_output), process(input)
481
+
482
+ # test that non-numeric are ignored
483
+ input = '<p><span style="font-style: italic">test</span></p>'
484
+ expected_output = run_with_rpr('<w:i />')
485
+ assert_equal normalize_wordml(expected_output), process(input)
486
+ end
487
+
488
+ def test_run_with_font_wieght
489
+ input = '<p><span style="font-weight: bold">test</span></p>'
490
+ expected_output = run_with_rpr('<w:b />')
491
+ assert_equal normalize_wordml(expected_output), process(input)
492
+ end
493
+
494
+ def test_run_with_text_decoration
495
+ # testing underline configurations
496
+ input = '<p><span style="text-decoration: underline">test</span></p>'
497
+ expected_output = run_with_rpr('<w:u w:val="single" />')
498
+ assert_equal normalize_wordml(expected_output), process(input)
499
+
500
+ input = '<p><span style="text-decoration: underline dash">test</span></p>'
501
+ expected_output = run_with_rpr('<w:u w:val="dash" w:color="auto" />')
502
+ assert_equal normalize_wordml(expected_output), process(input)
503
+
504
+ input = '<p><span style="text-decoration: underline dash #123456">test</span></p>'
505
+ expected_output = run_with_rpr('<w:u w:val="dash" w:color="123456" />')
506
+ assert_equal normalize_wordml(expected_output), process(input)
507
+
508
+ # testing line-through
509
+ input = '<p><span style="text-decoration: line-through">test</span></p>'
510
+ expected_output = run_with_rpr('<w:strike w:val="true" />')
511
+ assert_equal normalize_wordml(expected_output), process(input)
512
+
513
+ # testing that unsupported values are passed through as a toggle
514
+ input = '<p><span style="text-decoration: strike">test</span></p>'
515
+ expected_output = run_with_rpr('<w:strike w:val="true" />')
516
+ assert_equal normalize_wordml(expected_output), process(input)
517
+
518
+ input = '<p><span style="text-decoration: emboss">test</span></p>'
519
+ expected_output = run_with_rpr('<w:emboss w:val="true" />')
520
+ assert_equal normalize_wordml(expected_output), process(input)
362
521
  end
363
522
 
364
- def test_headings
365
- input = '<h1>First</h1><h2>Second</h2><h3>Third</h3>'
366
- ast = @converter.processed_ast(input)
367
- assert_equal "<Root: [<Paragraph{Heading1}: [<Text{}: First>]>, <Paragraph{Heading2}: [<Text{}: Second>]>, <Paragraph{Heading3}: [<Text{}: Third>]>]>", ast.inspect
523
+ def test_run_with_vertical_align
524
+ input = '<p><span style="vertical-align: subscript">test</span></p>'
525
+ expected_output = run_with_rpr('<w:vertAlign w:val="subscript" />')
526
+ assert_equal normalize_wordml(expected_output), process(input)
527
+
528
+ input = '<p><span style="vertical-align: superscript">test</span></p>'
529
+ expected_output = run_with_rpr('<w:vertAlign w:val="superscript" />')
530
+ assert_equal normalize_wordml(expected_output), process(input)
368
531
  end
369
532
 
370
- def test_h_with_formatting
371
- input = '<h1><strong>Lorem</strong> ipsum dolor <em>sit <u>amet</u></em></h1>'
372
- ast = @converter.processed_ast(input)
373
- assert_equal "<Root: [<Paragraph{Heading1}: [<Text{bold}: Lorem>, <Text{}: ipsum dolor >, <Text{italic}: sit >, <Text{italic|underline}: amet>]>]>", ast.inspect
533
+ def test_run_with_unsupported_property
534
+ input = '<p><span style="unsupported: true">test</span></p>'
535
+ expected_output = '<w:p><w:pPr><w:pStyle w:val="Paragraph" /></w:pPr><w:r><w:t xml:space="preserve">test</w:t></w:r></w:p>'
536
+ assert_equal normalize_wordml(expected_output), process(input)
374
537
  end
375
538
 
376
- def test_ul
377
- input = '<ul><li>Lorem</li><li>ipsum</li></ul>'
378
- ast = @converter.processed_ast(input)
379
- assert_equal "<Root: [<Paragraph{ListBullet}: [<Text{}: Lorem>]>, <Paragraph{ListBullet}: [<Text{}: ipsum>]>]>", ast.inspect
539
+ # tests with nested runs and styles
540
+
541
+ def test_paragraph_props_passed_to_runs
542
+ input = '<p style="color: #123456"><b>Lorem</b><span>ipsum</span></p>'
543
+ expected_output = <<-DOCX.strip
544
+ <w:p>
545
+ <w:pPr>
546
+ <w:pStyle w:val="Paragraph" />
547
+ </w:pPr>
548
+ <w:r>
549
+ <w:rPr>
550
+ <w:color w:val="123456" />
551
+ <w:b />
552
+ </w:rPr>
553
+ <w:t xml:space="preserve">Lorem</w:t>
554
+ </w:r>
555
+ <w:r>
556
+ <w:rPr>
557
+ <w:color w:val="123456" />
558
+ </w:rPr>
559
+ <w:t xml:space="preserve">ipsum</w:t>
560
+ </w:r>
561
+ </w:p>
562
+ DOCX
563
+ assert_equal normalize_wordml(expected_output), process(input)
564
+ end
565
+
566
+ def test_run_prop_override_paragraph_prop
567
+ input = '<p style="text-align: center; color: #FF0000">Lorem<span style="color: blue;">ipsum</span></p>'
568
+ expected_output = <<-DOCX.strip
569
+ <w:p>
570
+ <w:pPr>
571
+ <w:pStyle w:val="Paragraph" />
572
+ <w:jc w:val="center" />
573
+ </w:pPr>
574
+ <w:r>
575
+ <w:rPr>
576
+ <w:color w:val="FF0000" />
577
+ </w:rPr>
578
+ <w:t xml:space="preserve">Lorem</w:t>
579
+ </w:r>
580
+ <w:r>
581
+ <w:rPr>
582
+ <w:color w:val="blue" />
583
+ </w:rPr>
584
+ <w:t xml:space="preserve">ipsum</w:t>
585
+ </w:r>
586
+ </w:p>
587
+ DOCX
588
+ assert_equal normalize_wordml(expected_output), process(input)
589
+ end
590
+
591
+ def test_inline_style_overrides_tag_style
592
+ # Note: a toggle property can not be removed once it becomes a symbol
593
+ # unless there is a specific CSS style that will set it to false. This
594
+ # is because CSS styles can only override parent properties not remove them.
595
+ input = '<p><u style="text-decoration: underline wavyDouble">test</u></p>'
596
+ expected_output = run_with_rpr('<w:u w:val="wavyDouble" w:color="auto" />')
597
+ assert_equal normalize_wordml(expected_output), process(input)
598
+ end
599
+
600
+ def test_conversion_of_a_registered_tag_without_ast_class
601
+ # This registers a new tag with the configuration object and then trys
602
+ # to convert it
603
+ Sablon.configure do |config|
604
+ config.register_html_tag(:bgcyan, :inline, properties: { 'highlight' => { val: 'cyan' } })
605
+ end
606
+ #
607
+ input = '<p><bgcyan>test</bgcyan></p>'
608
+ expected_output = run_with_rpr('<w:highlight w:val="cyan" />')
609
+ assert_equal normalize_wordml(expected_output), process(input)
610
+
611
+ # remove the tag to avoid any accidental side effects
612
+ Sablon.configure do |config|
613
+ config.remove_html_tag(:bgcyan)
614
+ end
615
+ end
616
+
617
+ def test_conversion_of_a_registered_tag_with_ast_class
618
+ Sablon.configure do |config|
619
+ # create the AST class and then pass it onto the register tag method
620
+ ast_class = Class.new(Sablon::HTMLConverter::Node) do
621
+ def self.name
622
+ 'TestInstr'
623
+ end
624
+
625
+ def initialize(_env, node, _properties)
626
+ @content = node.text
627
+ end
628
+
629
+ def inspect
630
+ @content
631
+ end
632
+
633
+ def to_docx
634
+ "<w:instrText xml:space=\"preserve\"> #{@content} </w:instrText>"
635
+ end
636
+ end
637
+ #
638
+ config.register_html_tag(:test_instr, :inline, ast_class: ast_class)
639
+ end
640
+ #
641
+ input = '<p><test_instr>test</test_instr></p>'
642
+ expected_output = <<-DOCX.strip
643
+ <w:p>
644
+ <w:pPr>
645
+ <w:pStyle w:val="Paragraph" />
646
+ </w:pPr>
647
+ <w:instrText xml:space="preserve"> test </w:instrText>
648
+ </w:p>
649
+ DOCX
650
+ assert_equal normalize_wordml(expected_output), process(input)
651
+
652
+ # remove the tag to avoid any accidental side effects
653
+ Sablon.configure do |config|
654
+ config.remove_html_tag(:test_instr)
655
+ end
656
+ end
657
+
658
+ def test_conversion_of_registered_style_attribute
659
+ Sablon.configure do |config|
660
+ converter = ->(v) { return :highlight, v }
661
+ config.register_style_converter(:run, 'test-highlight', converter)
662
+ end
663
+ #
664
+ input = '<p><span style="test-highlight: green">test</span></p>'
665
+ expected_output = run_with_rpr('<w:highlight w:val="green" />')
666
+ assert_equal normalize_wordml(expected_output), process(input)
667
+ #
668
+ Sablon.configure do |config|
669
+ config.remove_style_converter(:run, 'test-highlight')
670
+ end
380
671
  end
381
672
 
382
- def test_ol
383
- input = '<ol><li>Lorem</li><li>ipsum</li></ol>'
384
- ast = @converter.processed_ast(input)
385
- assert_equal "<Root: [<Paragraph{ListNumber}: [<Text{}: Lorem>]>, <Paragraph{ListNumber}: [<Text{}: ipsum>]>]>", ast.inspect
673
+ private
674
+
675
+ def process(input)
676
+ @converter.process(input, @env)
386
677
  end
387
678
 
388
- def test_num_id
389
- ast = @converter.processed_ast('<ol><li>Some</li><li>Lorem</li></ol><ul><li>ipsum</li></ul><ol><li>dolor</li><li>sit</li></ol>')
390
- assert_equal [1001, 1001, 1002, 1003, 1003], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
679
+ def para_with_ppr(ppr_str)
680
+ para_str = '<w:p><w:pPr><w:pStyle w:val="Paragraph" />%s</w:pPr></w:p>'
681
+ format(para_str, ppr_str)
391
682
  end
392
683
 
393
- def test_nested_lists_have_the_same_numid
394
- ast = @converter.processed_ast('<ul><li>Lorem<ul><li>ipsum<ul><li>dolor</li></ul></li></ul></li></ul>')
395
- assert_equal [1001, 1001, 1001], ast.grep(Sablon::HTMLConverter::ListParagraph).map(&:numid)
684
+ def run_with_rpr(rpr_str)
685
+ para_str = <<-DOCX.strip
686
+ <w:p>
687
+ <w:pPr>
688
+ <w:pStyle w:val="Paragraph" />
689
+ </w:pPr>
690
+ <w:r>
691
+ <w:rPr>
692
+ %s
693
+ </w:rPr>
694
+ <w:t xml:space="preserve">test</w:t>
695
+ </w:r>
696
+ </w:p>
697
+ DOCX
698
+ format(para_str, rpr_str)
396
699
  end
397
700
 
398
- def test_keep_nested_list_order
399
- input = '<ul><li>1<ul><li>1.1<ul><li>1.1.1</li></ul></li><li>1.2</li></ul></li><li>2<ul><li>1.3<ul><li>1.3.1</li></ul></li></ul></li></ul>'
400
- ast = @converter.processed_ast(input)
401
- list_p = ast.grep(Sablon::HTMLConverter::ListParagraph)
402
- assert_equal [1001], list_p.map(&:numid).uniq
403
- assert_equal [0, 1, 2, 1, 0, 1, 2], list_p.map(&:ilvl)
701
+ def normalize_wordml(wordml)
702
+ wordml.gsub(/^\s+/, '').tr("\n", '')
404
703
  end
405
704
  end