hlsv 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,602 @@
1
+ # Copyright (c) 2026 AdClin
2
+ # Licensed under the GNU Affero General Public License v3.0 or later.
3
+ # See the LICENSE file for details.
4
+
5
+ # Install:
6
+ # gem install nokogiri rubyzip
7
+ #
8
+ # Usage:
9
+ # ruby html_to_docx.rb input.html output.docx
10
+ # ruby html_to_docx.rb # uses defaults below
11
+
12
+ require 'nokogiri'
13
+ require 'zip'
14
+ require 'time'
15
+
16
+ FONT = 'Verdana'
17
+
18
+ STATUS_COLORS = {
19
+ 'success-box' => { prefix: '[OK]', color: '2E7D32' },
20
+ 'warning-box' => { prefix: '[WARNING]', color: 'E65100' },
21
+ 'error-box' => { prefix: '[ERROR]', color: 'C62828' }
22
+ }.freeze
23
+
24
+ SPLIT_RE = / -- /
25
+
26
+ module Hlsv
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Parser
30
+ # ---------------------------------------------------------------------------
31
+ class RiReportParser
32
+ Block = Struct.new(:type, :content, :meta, keyword_init: true)
33
+
34
+ def initialize(html_path)
35
+ html = File.read(html_path, encoding: 'UTF-8')
36
+ @doc = Nokogiri::HTML(html)
37
+ @html_path = html_path
38
+ end
39
+
40
+ def parse
41
+ blocks = []
42
+
43
+ h1 = @doc.at_css('header h1')
44
+ info = @doc.at_css('.info-box')
45
+
46
+ downloaded_at = Time.now.strftime('Downloaded on %Y-%m-%d at %H:%M')
47
+
48
+ # Structure : <div class="info-box"><div flex><p>date</p><button>📄 Word</button></div></div>
49
+ # target <p> to ignore the button
50
+ date_text = if info
51
+ p_tag = info.at_css('p')
52
+ p_tag ? p_tag.text.strip : begin
53
+ info.css('button, a').each(&:remove)
54
+ info.text.gsub(/[^\x00-\x7F]/, '').gsub(/\s+/, ' ').strip
55
+ end
56
+ else
57
+ ''
58
+ end
59
+
60
+ blocks << Block.new(type: :cover,
61
+ content: { title: h1&.text&.strip.to_s,
62
+ date: date_text,
63
+ downloaded_at: downloaded_at },
64
+ meta: {})
65
+
66
+ blocks << Block.new(type: :toc, content: nil, meta: {})
67
+
68
+ main = @doc.at_css('main.container') || @doc.at_css('main')
69
+ return blocks unless main
70
+
71
+ main.children.each do |node|
72
+ next if node.text? && node.text.strip.empty?
73
+ next if node['class']&.include?('info-box')
74
+ blocks.concat(parse_node(node))
75
+ end
76
+
77
+ blocks
78
+ end
79
+
80
+ private
81
+
82
+ def split_to_labeled_list(text, color)
83
+ parts = text.split(SPLIT_RE).map(&:strip).reject(&:empty?)
84
+ return nil if parts.size < 2
85
+ Block.new(type: :list,
86
+ content: parts[1..],
87
+ meta: { color: color, label: parts.first })
88
+ end
89
+
90
+ def parse_node(node)
91
+ blocks = []
92
+ tag = node.name.downcase
93
+
94
+ case tag
95
+ when 'h1'
96
+ t = node.text.strip
97
+ blocks << Block.new(type: :h2, content: t, meta: {}) unless t.empty?
98
+
99
+ when 'h2'
100
+ t = node.text.strip
101
+ blocks << Block.new(type: :h2, content: t, meta: {}) unless t.empty?
102
+
103
+ when 'h3', 'h4', 'h5', 'h6'
104
+ t = node.text.strip
105
+ blocks << Block.new(type: :h3, content: t, meta: {}) unless t.empty?
106
+
107
+ when 'article'
108
+ css_classes = (node['class'] || '').split
109
+ if css_classes.include?('dataset-section')
110
+ sub_blocks = []
111
+ node.children.each do |child|
112
+ next if child.text? && child.text.strip.empty?
113
+ sub_blocks.concat(parse_node(child))
114
+ end
115
+ sub_blocks.each_with_index do |blk, idx|
116
+ last = (idx == sub_blocks.size - 1)
117
+ blk.meta.merge!(keep_lines: true, keep_next: !last)
118
+ end
119
+ blocks.concat(sub_blocks)
120
+ return blocks
121
+ end
122
+ node.children.each do |child|
123
+ next if child.text? && child.text.strip.empty?
124
+ blocks.concat(parse_node(child))
125
+ end
126
+
127
+ when 'section', 'div'
128
+ css_classes = (node['class'] || '').split
129
+ status_cfg = css_classes.map { |c| STATUS_COLORS[c] }.compact.first
130
+
131
+ if status_cfg
132
+ color = status_cfg[:color]
133
+ prefix = status_cfg[:prefix]
134
+ title = node.at_css('strong')&.text&.strip.to_s
135
+ label = "#{prefix} #{title}".strip
136
+
137
+ details = []
138
+ node.css('p').each { |p| details << clean(p) }
139
+ node.css('li').each { |li| details << clean(li) }
140
+ details.reject!(&:empty?)
141
+
142
+ if details.any?
143
+ blocks << Block.new(type: :list,
144
+ content: details,
145
+ meta: { color: color, label: label })
146
+ else
147
+ blocks << Block.new(type: :status, content: label, meta: { color: color })
148
+ end
149
+ else
150
+ node.children.each do |child|
151
+ next if child.text? && child.text.strip.empty?
152
+ blocks.concat(parse_node(child))
153
+ end
154
+ end
155
+
156
+ when 'p'
157
+ t = clean(node)
158
+ unless t.empty?
159
+ lb = split_to_labeled_list(t, '000000')
160
+ blocks << (lb || Block.new(type: :paragraph, content: t, meta: {}))
161
+ end
162
+
163
+ when 'ul', 'ol'
164
+ items = node.css('> li').map { |li| clean(li) }.reject(&:empty?)
165
+ blocks << Block.new(type: :list,
166
+ content: items,
167
+ meta: { color: '000000' }) unless items.empty?
168
+ end
169
+
170
+ blocks
171
+ end
172
+
173
+ def clean(node)
174
+ node.text.gsub(/\s+/, ' ').strip
175
+ end
176
+ end
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # OOXML Builder
180
+ # ---------------------------------------------------------------------------
181
+ module OoxmlBuilder
182
+ def self.x(str)
183
+ str.to_s
184
+ .gsub('&', '&amp;')
185
+ .gsub('<', '&lt;')
186
+ .gsub('>', '&gt;')
187
+ .gsub('"', '&quot;')
188
+ end
189
+
190
+ def self.page_break
191
+ "<w:p><w:r><w:br w:type=\"page\"/></w:r></w:p>"
192
+ end
193
+
194
+ # Construit le <w:pPr> avec les options communes
195
+ def self.build_ppr(opts = {})
196
+ style = opts[:style]
197
+ align = opts[:align]
198
+ space_before = opts[:space_before] || 0
199
+ space_after = opts[:space_after] || 80
200
+ keep_lines = opts[:keep_lines] || false
201
+ keep_next = opts[:keep_next] || false
202
+
203
+ inner = ''
204
+ inner += "<w:pStyle w:val=\"#{style}\"/>" if style
205
+ inner += '<w:keepNext/>' if keep_next
206
+ inner += '<w:keepLines/>' if keep_lines
207
+ inner += "<w:spacing w:before=\"#{space_before}\" w:after=\"#{space_after}\"/>"
208
+ inner += "<w:jc w:val=\"#{align}\"/>" if align
209
+ "<w:pPr>#{inner}</w:pPr>"
210
+ end
211
+
212
+ # Paragraphe générique
213
+ def self.paragraph(text, opts = {})
214
+ sz = opts[:sz] || 20
215
+ bold = opts[:bold] || false
216
+ color = opts[:color] || '000000'
217
+ ppr = build_ppr(opts)
218
+
219
+ rpr = "<w:rFonts w:ascii=\"#{FONT}\" w:hAnsi=\"#{FONT}\" w:cs=\"#{FONT}\"/>"
220
+ rpr += "<w:sz w:val=\"#{sz}\"/><w:szCs w:val=\"#{sz}\"/>"
221
+ rpr += '<w:b/><w:bCs/>' if bold
222
+ rpr += "<w:color w:val=\"#{color}\"/>"
223
+
224
+ "<w:p>#{ppr}<w:r><w:rPr>#{rpr}</w:rPr><w:t xml:space=\"preserve\">#{x(text)}</w:t></w:r></w:p>"
225
+ end
226
+
227
+ # Paragraphe avec style Word nommé
228
+ def self.heading(text, opts = {})
229
+ ppr = build_ppr(opts)
230
+ "<w:p>#{ppr}<w:r><w:t xml:space=\"preserve\">#{x(text)}</w:t></w:r></w:p>"
231
+ end
232
+
233
+ # Paragraphe coloré pour les statuts
234
+ def self.status_paragraph(text, opts = {})
235
+ color = opts[:color] || '000000'
236
+ ppr = build_ppr(opts.merge(space_before: 0, space_after: 80))
237
+ rpr = "<w:rFonts w:ascii=\"#{FONT}\" w:hAnsi=\"#{FONT}\" w:cs=\"#{FONT}\"/>"
238
+ rpr += "<w:color w:val=\"#{color}\"/><w:sz w:val=\"20\"/><w:szCs w:val=\"20\"/>"
239
+ "<w:p>#{ppr}<w:r><w:rPr>#{rpr}</w:rPr><w:t xml:space=\"preserve\">#{x(text)}</w:t></w:r></w:p>"
240
+ end
241
+
242
+ # Item de liste à puces
243
+ def self.list_item(text, opts = {})
244
+ color = opts[:color] || '000000'
245
+ sz = opts[:sz] || 20
246
+ keep_lines = opts[:keep_lines] || false
247
+ keep_next = opts[:keep_next] || false
248
+ kl = keep_lines ? '<w:keepLines/>' : ''
249
+ kn = keep_next ? '<w:keepNext/>' : ''
250
+
251
+ rpr = "<w:rFonts w:ascii=\"#{FONT}\" w:hAnsi=\"#{FONT}\" w:cs=\"#{FONT}\"/>"
252
+ rpr += "<w:sz w:val=\"#{sz}\"/><w:szCs w:val=\"#{sz}\"/><w:color w:val=\"#{color}\"/>"
253
+
254
+ <<~XML
255
+ <w:p>
256
+ <w:pPr>
257
+ #{kl}#{kn}
258
+ <w:numPr><w:ilvl w:val="0"/><w:numId w:val="1"/></w:numPr>
259
+ <w:spacing w:before="0" w:after="60"/>
260
+ </w:pPr>
261
+ <w:r><w:rPr>#{rpr}</w:rPr><w:t xml:space="preserve">#{x(text)}</w:t></w:r>
262
+ </w:p>
263
+ XML
264
+ end
265
+
266
+ # ── Footer Word ────────────────────────────────────────────────────────────
267
+ # Deux zones séparées par une tabulation centrale/droite :
268
+ # gauche : titre du document
269
+ # droite : PAGE / NUMPAGES (champs Word natifs)
270
+ #
271
+ # Les champs PAGE et NUMPAGES sont insérés via <w:fldChar> / <w:instrText>
272
+ # — mécanisme identique à la TOC, sans dépendance externe.
273
+ def self.footer_xml(doc_title)
274
+ rpr_base = "<w:rFonts w:ascii=\"#{FONT}\" w:hAnsi=\"#{FONT}\" w:cs=\"#{FONT}\"/>" \
275
+ "<w:sz w:val=\"18\"/><w:szCs w:val=\"18\"/><w:color w:val=\"666666\"/>"
276
+
277
+ # Champ PAGE
278
+ page_field = <<~XML
279
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:fldChar w:fldCharType="begin"/></w:r>
280
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:instrText xml:space="preserve"> PAGE </w:instrText></w:r>
281
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:fldChar w:fldCharType="separate"/></w:r>
282
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:t>1</w:t></w:r>
283
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:fldChar w:fldCharType="end"/></w:r>
284
+ XML
285
+
286
+ # Champ NUMPAGES
287
+ numpages_field = <<~XML
288
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:fldChar w:fldCharType="begin"/></w:r>
289
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:instrText xml:space="preserve"> NUMPAGES </w:instrText></w:r>
290
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:fldChar w:fldCharType="separate"/></w:r>
291
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:t>1</w:t></w:r>
292
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:fldChar w:fldCharType="end"/></w:r>
293
+ XML
294
+
295
+ <<~XML
296
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
297
+ <w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
298
+ <w:p>
299
+ <w:pPr>
300
+ <w:spacing w:before="80" w:after="0"/>
301
+ <w:pBdr>
302
+ <w:top w:val="single" w:sz="6" w:space="1" w:color="CCCCCC"/>
303
+ </w:pBdr>
304
+ <w:tabs>
305
+ <w:tab w:val="right" w:pos="9360"/>
306
+ </w:tabs>
307
+ </w:pPr>
308
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:t xml:space="preserve">#{x(doc_title)}</w:t></w:r>
309
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:tab/></w:r>
310
+ #{page_field.strip}
311
+ <w:r><w:rPr>#{rpr_base}</w:rPr><w:t xml:space="preserve"> / </w:t></w:r>
312
+ #{numpages_field.strip}
313
+ </w:p>
314
+ </w:ftr>
315
+ XML
316
+ end
317
+
318
+ # Page de couverture
319
+ def self.cover_page(title:, date:, downloaded_at:)
320
+ title_para = paragraph(title,
321
+ sz: 44, bold: true, color: '1F3864',
322
+ align: 'right',
323
+ space_before: 2880, space_after: 120)
324
+ hr = <<~XML
325
+ <w:p>
326
+ <w:pPr>
327
+ <w:jc w:val="right"/>
328
+ <w:pBdr><w:bottom w:val="single" w:sz="12" w:space="1" w:color="2E75B6"/></w:pBdr>
329
+ <w:spacing w:before="0" w:after="120"/>
330
+ </w:pPr>
331
+ </w:p>
332
+ XML
333
+ date_para = paragraph(date, sz: 20, color: '444444', align: 'right',
334
+ space_before: 0, space_after: 0)
335
+ downloaded_para = paragraph(downloaded_at, sz: 20, color: '444444', align: 'right',
336
+ space_before: 0, space_after: 0)
337
+ title_para + hr + date_para + downloaded_para + page_break
338
+ end
339
+
340
+ # TOC sur sa propre page
341
+ def self.toc_field
342
+ toc_title = heading('Table of Contents',
343
+ style: 'Heading1', space_before: 0, space_after: 160)
344
+ toc_content = <<~'XML'
345
+ <w:p>
346
+ <w:pPr><w:spacing w:before="0" w:after="80"/></w:pPr>
347
+ <w:r><w:fldChar w:fldCharType="begin"/></w:r>
348
+ <w:r><w:instrText xml:space="preserve"> TOC \o "1-3" \h \z \u </w:instrText></w:r>
349
+ <w:r><w:fldChar w:fldCharType="separate"/></w:r>
350
+ <w:r>
351
+ <w:rPr><w:color w:val="888888"/></w:rPr>
352
+ <w:t>Right-click and select "Update Field" to generate the table of contents.</w:t>
353
+ </w:r>
354
+ <w:r><w:fldChar w:fldCharType="end"/></w:r>
355
+ </w:p>
356
+ XML
357
+ toc_title + toc_content + page_break
358
+ end
359
+
360
+ # Corps du document
361
+ def self.document(blocks)
362
+ paras = blocks.map do |b|
363
+ m = b.meta
364
+
365
+ case b.type
366
+ when :cover
367
+ cover_page(title: b.content[:title], date: b.content[:date], downloaded_at: b.content[:downloaded_at].to_s)
368
+
369
+ when :toc
370
+ toc_field
371
+
372
+ when :h2
373
+ heading(b.content,
374
+ style: 'Heading2', space_before: 280, space_after: 120,
375
+ keep_lines: m[:keep_lines], keep_next: m[:keep_next])
376
+
377
+ when :h3
378
+ heading(b.content,
379
+ style: 'Heading3', space_before: 200, space_after: 80,
380
+ keep_lines: m[:keep_lines], keep_next: m[:keep_next])
381
+
382
+ when :paragraph
383
+ paragraph(b.content, sz: 20, color: '000000',
384
+ keep_lines: m[:keep_lines], keep_next: m[:keep_next])
385
+
386
+ when :status
387
+ status_paragraph(b.content,
388
+ color: m[:color] || '000000',
389
+ keep_lines: m[:keep_lines], keep_next: m[:keep_next])
390
+
391
+ when :list
392
+ color = m[:color] || '000000'
393
+ label = m[:label]
394
+ kl = m[:keep_lines] || false
395
+ kn = m[:keep_next] || false
396
+
397
+ out = ''
398
+ out += status_paragraph(label, color: color, keep_lines: kl, keep_next: true) if label
399
+ out += b.content.each_with_index.map do |item, idx|
400
+ item_kn = (idx == b.content.size - 1) ? kn : true
401
+ list_item(item, color: color, keep_lines: kl, keep_next: item_kn)
402
+ end.join
403
+ out
404
+ end
405
+ end.join("\n")
406
+
407
+ <<~XML
408
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
409
+ <w:document
410
+ xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
411
+ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
412
+ xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
413
+ xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
414
+ mc:Ignorable="w14">
415
+ <w:body>
416
+ #{paras}
417
+ <w:sectPr>
418
+ <w:footerReference w:type="default" r:id="rId11"/>
419
+ <w:pgSz w:w="12240" w:h="15840"/>
420
+ <w:pgMar w:top="1080" w:right="1080" w:bottom="1080" w:left="1080"
421
+ w:header="720" w:footer="720" w:gutter="0"/>
422
+ </w:sectPr>
423
+ </w:body>
424
+ </w:document>
425
+ XML
426
+ end
427
+
428
+ def self.styles
429
+ <<~XML
430
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
431
+ <w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
432
+ <w:style w:type="paragraph" w:default="1" w:styleId="Normal">
433
+ <w:name w:val="Normal"/>
434
+ <w:rPr>
435
+ <w:rFonts w:ascii="#{FONT}" w:hAnsi="#{FONT}" w:cs="#{FONT}"/>
436
+ <w:sz w:val="20"/><w:szCs w:val="20"/><w:color w:val="000000"/>
437
+ </w:rPr>
438
+ </w:style>
439
+ <w:style w:type="paragraph" w:styleId="Heading1">
440
+ <w:name w:val="heading 1"/><w:basedOn w:val="Normal"/><w:next w:val="Normal"/>
441
+ <w:pPr><w:outlineLvl w:val="0"/></w:pPr>
442
+ <w:rPr>
443
+ <w:rFonts w:ascii="#{FONT}" w:hAnsi="#{FONT}" w:cs="#{FONT}"/>
444
+ <w:b/><w:bCs/><w:sz w:val="40"/><w:szCs w:val="40"/><w:color w:val="1F3864"/>
445
+ </w:rPr>
446
+ </w:style>
447
+ <w:style w:type="paragraph" w:styleId="Heading2">
448
+ <w:name w:val="heading 2"/><w:basedOn w:val="Normal"/><w:next w:val="Normal"/>
449
+ <w:pPr><w:outlineLvl w:val="1"/></w:pPr>
450
+ <w:rPr>
451
+ <w:rFonts w:ascii="#{FONT}" w:hAnsi="#{FONT}" w:cs="#{FONT}"/>
452
+ <w:b/><w:bCs/><w:sz w:val="30"/><w:szCs w:val="30"/><w:color w:val="2E75B6"/>
453
+ </w:rPr>
454
+ </w:style>
455
+ <w:style w:type="paragraph" w:styleId="Heading3">
456
+ <w:name w:val="heading 3"/><w:basedOn w:val="Normal"/><w:next w:val="Normal"/>
457
+ <w:pPr><w:outlineLvl w:val="2"/></w:pPr>
458
+ <w:rPr>
459
+ <w:rFonts w:ascii="#{FONT}" w:hAnsi="#{FONT}" w:cs="#{FONT}"/>
460
+ <w:b/><w:bCs/><w:sz w:val="24"/><w:szCs w:val="24"/><w:color w:val="404040"/>
461
+ </w:rPr>
462
+ </w:style>
463
+ </w:styles>
464
+ XML
465
+ end
466
+
467
+ def self.numbering
468
+ <<~XML
469
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
470
+ <w:numbering xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
471
+ <w:abstractNum w:abstractNumId="0">
472
+ <w:lvl w:ilvl="0">
473
+ <w:start w:val="1"/><w:numFmt w:val="bullet"/>
474
+ <w:lvlText w:val="&#x2022;"/><w:lvlJc w:val="left"/>
475
+ <w:pPr><w:ind w:left="720" w:hanging="360"/></w:pPr>
476
+ <w:rPr><w:rFonts w:ascii="#{FONT}" w:hAnsi="#{FONT}" w:cs="#{FONT}"/></w:rPr>
477
+ </w:lvl>
478
+ </w:abstractNum>
479
+ <w:num w:numId="1"><w:abstractNumId w:val="0"/></w:num>
480
+ </w:numbering>
481
+ XML
482
+ end
483
+
484
+ def self.content_types
485
+ <<~XML
486
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
487
+ <Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
488
+ <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
489
+ <Default Extension="xml" ContentType="application/xml"/>
490
+ <Override PartName="/word/document.xml"
491
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
492
+ <Override PartName="/word/styles.xml"
493
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
494
+ <Override PartName="/word/numbering.xml"
495
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"/>
496
+ <Override PartName="/word/settings.xml"
497
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"/>
498
+ <Override PartName="/word/footer1.xml"
499
+ ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"/>
500
+ </Types>
501
+ XML
502
+ end
503
+
504
+ def self.rels
505
+ <<~XML
506
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
507
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
508
+ <Relationship Id="rId1"
509
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
510
+ Target="word/document.xml"/>
511
+ </Relationships>
512
+ XML
513
+ end
514
+
515
+ # Relations du document : styles, numbering, settings, header, footer
516
+ def self.word_rels
517
+ <<~XML
518
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
519
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
520
+ <Relationship Id="rId1"
521
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"
522
+ Target="styles.xml"/>
523
+ <Relationship Id="rId2"
524
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering"
525
+ Target="numbering.xml"/>
526
+ <Relationship Id="rId3"
527
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings"
528
+ Target="settings.xml"/>
529
+ <Relationship Id="rId11"
530
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
531
+ Target="footer1.xml"/>
532
+ </Relationships>
533
+ XML
534
+ end
535
+
536
+ def self.empty_rels
537
+ <<~XML
538
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
539
+ <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
540
+ </Relationships>
541
+ XML
542
+ end
543
+
544
+ def self.settings
545
+ <<~XML
546
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
547
+ <w:settings xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
548
+ <w:updateFields w:val="true"/>
549
+ <w:defaultTabStop w:val="720"/>
550
+ </w:settings>
551
+ XML
552
+ end
553
+ end
554
+
555
+ # ---------------------------------------------------------------------------
556
+ # DOCX Writer
557
+ # ---------------------------------------------------------------------------
558
+ class DocxWriter
559
+ def initialize(output_path)
560
+ @output_path = output_path
561
+ end
562
+
563
+ def write(blocks)
564
+ cover_block = blocks.find { |b| b.type == :cover }
565
+ doc_title = cover_block&.content&.dig(:title).to_s
566
+
567
+ ::Zip::OutputStream.open(@output_path) do |zip|
568
+ add_xml(zip, '[Content_Types].xml', OoxmlBuilder.content_types)
569
+ add_xml(zip, '_rels/.rels', OoxmlBuilder.rels)
570
+ add_xml(zip, 'word/document.xml', OoxmlBuilder.document(blocks))
571
+ add_xml(zip, 'word/styles.xml', OoxmlBuilder.styles)
572
+ add_xml(zip, 'word/numbering.xml', OoxmlBuilder.numbering)
573
+ add_xml(zip, 'word/settings.xml', OoxmlBuilder.settings)
574
+ add_xml(zip, 'word/_rels/document.xml.rels', OoxmlBuilder.word_rels)
575
+ add_xml(zip, 'word/footer1.xml', OoxmlBuilder.footer_xml(doc_title))
576
+ add_xml(zip, 'word/_rels/footer1.xml.rels', OoxmlBuilder.empty_rels)
577
+ end
578
+ end
579
+
580
+ private
581
+
582
+ def add_xml(zip, name, content)
583
+ zip.put_next_entry(name)
584
+ zip.write(content.encode('UTF-8'))
585
+ end
586
+ end
587
+ end
588
+ # ---------------------------------------------------------------------------
589
+ # Main
590
+ # ---------------------------------------------------------------------------
591
+ # INPUT_FILE = ARGV[0] || '../hlsv_results/RI/RI_key_search.html'
592
+ # OUTPUT_FILE = ARGV[1] || "#{File.dirname(INPUT_FILE)}/RI_key_search.docx"
593
+
594
+ # abort "ERROR: File not found -- #{INPUT_FILE}" unless File.exist?(INPUT_FILE)
595
+
596
+ # puts "Parsing #{INPUT_FILE}..."
597
+ # blocks = RiReportParser.new(INPUT_FILE).parse
598
+ # puts " -> #{blocks.size} blocks extracted"
599
+
600
+ # puts "Building #{OUTPUT_FILE}..."
601
+ # DocxWriter.new(OUTPUT_FILE).write(blocks)
602
+ # puts " -> Done: #{OUTPUT_FILE}"