libis-format 1.3.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -10
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/data/PDFA_def.ps +3 -3
  13. data/data/eciRGB_v2.icc +0 -0
  14. data/data/types.yml +4 -17
  15. data/docker_cfg.yml +1 -0
  16. data/lib/libis/format/cli/convert.rb +4 -4
  17. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  18. data/lib/libis/format/command_line.rb +3 -2
  19. data/lib/libis/format/config.rb +22 -20
  20. data/lib/libis/format/converter/audio_converter.rb +31 -56
  21. data/lib/libis/format/converter/base.rb +36 -16
  22. data/lib/libis/format/converter/chain.rb +32 -52
  23. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  24. data/lib/libis/format/converter/image_assembler.rb +82 -0
  25. data/lib/libis/format/converter/image_converter.rb +40 -153
  26. data/lib/libis/format/converter/image_splitter.rb +80 -0
  27. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  28. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  29. data/lib/libis/format/converter/office_converter.rb +28 -22
  30. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  31. data/lib/libis/format/converter/pdf_converter.rb +50 -111
  32. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  33. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  34. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  35. data/lib/libis/format/converter/repository.rb +13 -7
  36. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  37. data/lib/libis/format/converter/video_converter.rb +58 -47
  38. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  39. data/lib/libis/format/converter.rb +1 -1
  40. data/lib/libis/format/identifier.rb +46 -44
  41. data/lib/libis/format/info.rb +27 -0
  42. data/lib/libis/format/library.rb +147 -0
  43. data/lib/libis/format/tool/droid.rb +30 -29
  44. data/lib/libis/format/tool/extension_identification.rb +26 -24
  45. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  46. data/lib/libis/format/tool/fido.rb +27 -22
  47. data/lib/libis/format/tool/file_tool.rb +24 -11
  48. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  49. data/lib/libis/format/tool/identification_tool.rb +40 -38
  50. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  51. data/lib/libis/format/tool/pdf_copy.rb +15 -24
  52. data/lib/libis/format/tool/pdf_merge.rb +14 -24
  53. data/lib/libis/format/tool/pdf_optimizer.rb +17 -24
  54. data/lib/libis/format/tool/pdf_split.rb +16 -25
  55. data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
  56. data/lib/libis/format/tool/pdfa_validator.rb +30 -25
  57. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  58. data/lib/libis/format/tool.rb +3 -4
  59. data/lib/libis/format/version.rb +1 -3
  60. data/lib/libis/format/yaml_loader.rb +71 -0
  61. data/lib/libis/format.rb +7 -5
  62. data/lib/libis-format.rb +0 -2
  63. data/libis-format.gemspec +18 -24
  64. metadata +78 -120
  65. data/data/AdobeRGB1998.icc +0 -0
  66. data/lib/libis/format/converter/email_converter.rb +0 -35
  67. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  68. data/lib/libis/format/type_database.rb +0 -156
  69. data/lib/libis/format/type_database_impl.rb +0 -153
  70. data/tools/pdf2pdfa +0 -395
  71. /data/bin/{droid_tool → droid} +0 -0
  72. /data/bin/{fido_tool → fido} +0 -0
metadata CHANGED
@@ -1,101 +1,87 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libis-format
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kris Dekeyser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-16 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: chromaprint
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: 0.0.2
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: 0.0.2
27
- - !ruby/object:Gem::Dependency
28
- name: deep_dive
14
+ name: rake
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - "~>"
32
18
  - !ruby/object:Gem::Version
33
- version: '0.3'
34
- type: :runtime
19
+ version: '10.3'
20
+ type: :development
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '0.3'
26
+ version: '10.3'
41
27
  - !ruby/object:Gem::Dependency
42
- name: libis-mapi
28
+ name: rspec
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - "~>"
46
32
  - !ruby/object:Gem::Version
47
- version: '0.3'
48
- type: :runtime
33
+ version: '3.1'
34
+ type: :development
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
38
  - - "~>"
53
39
  - !ruby/object:Gem::Version
54
- version: '0.3'
40
+ version: '3.1'
55
41
  - !ruby/object:Gem::Dependency
56
- name: libis-tools
42
+ name: awesome_print
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - "~>"
45
+ - - ">="
60
46
  - !ruby/object:Gem::Version
61
- version: '1.1'
62
- type: :runtime
47
+ version: '0'
48
+ type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - "~>"
52
+ - - ">="
67
53
  - !ruby/object:Gem::Version
68
- version: '1.1'
54
+ version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
- name: mini_magick
56
+ name: equivalent-xml
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '4.12'
76
- type: :runtime
61
+ version: '0.5'
62
+ type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '4.12'
68
+ version: '0.5'
83
69
  - !ruby/object:Gem::Dependency
84
- name: naturally
70
+ name: nokogiri
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - "~>"
73
+ - - ">="
88
74
  - !ruby/object:Gem::Version
89
- version: '2.2'
90
- type: :runtime
75
+ version: '0'
76
+ type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - "~>"
80
+ - - ">="
95
81
  - !ruby/object:Gem::Version
96
- version: '2.2'
82
+ version: '0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: new_rfc_2047
84
+ name: libis-tools
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - "~>"
@@ -112,142 +98,109 @@ dependencies:
112
98
  name: os
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - "~>"
101
+ - - '='
116
102
  - !ruby/object:Gem::Version
117
- version: '1.1'
103
+ version: 0.9.6
118
104
  type: :runtime
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - "~>"
108
+ - - '='
123
109
  - !ruby/object:Gem::Version
124
- version: '1.1'
110
+ version: 0.9.6
125
111
  - !ruby/object:Gem::Dependency
126
- name: pdfinfo
112
+ name: mini_magick
127
113
  requirement: !ruby/object:Gem::Requirement
128
114
  requirements:
129
115
  - - "~>"
130
116
  - !ruby/object:Gem::Version
131
- version: '1.4'
117
+ version: '4.3'
132
118
  type: :runtime
133
119
  prerelease: false
134
120
  version_requirements: !ruby/object:Gem::Requirement
135
121
  requirements:
136
122
  - - "~>"
137
123
  - !ruby/object:Gem::Version
138
- version: '1.4'
124
+ version: '4.3'
139
125
  - !ruby/object:Gem::Dependency
140
- name: pdfkit
126
+ name: deep_dive
141
127
  requirement: !ruby/object:Gem::Requirement
142
128
  requirements:
143
129
  - - "~>"
144
130
  - !ruby/object:Gem::Version
145
- version: '0.8'
131
+ version: '0.3'
146
132
  type: :runtime
147
133
  prerelease: false
148
134
  version_requirements: !ruby/object:Gem::Requirement
149
135
  requirements:
150
136
  - - "~>"
151
137
  - !ruby/object:Gem::Version
152
- version: '0.8'
153
- - !ruby/object:Gem::Dependency
154
- name: awesome_print
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - ">="
158
- - !ruby/object:Gem::Version
159
- version: '0'
160
- type: :development
161
- prerelease: false
162
- version_requirements: !ruby/object:Gem::Requirement
163
- requirements:
164
- - - ">="
165
- - !ruby/object:Gem::Version
166
- version: '0'
167
- - !ruby/object:Gem::Dependency
168
- name: equivalent-xml
169
- requirement: !ruby/object:Gem::Requirement
170
- requirements:
171
- - - ">="
172
- - !ruby/object:Gem::Version
173
- version: '0'
174
- type: :development
175
- prerelease: false
176
- version_requirements: !ruby/object:Gem::Requirement
177
- requirements:
178
- - - ">="
179
- - !ruby/object:Gem::Version
180
- version: '0'
181
- - !ruby/object:Gem::Dependency
182
- name: rake
183
- requirement: !ruby/object:Gem::Requirement
184
- requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- version: '0'
188
- type: :development
189
- prerelease: false
190
- version_requirements: !ruby/object:Gem::Requirement
191
- requirements:
192
- - - ">="
193
- - !ruby/object:Gem::Version
194
- version: '0'
138
+ version: '0.3'
195
139
  - !ruby/object:Gem::Dependency
196
- name: rspec
140
+ name: chromaprint
197
141
  requirement: !ruby/object:Gem::Requirement
198
142
  requirements:
199
- - - ">="
143
+ - - "~>"
200
144
  - !ruby/object:Gem::Version
201
- version: '0'
202
- type: :development
145
+ version: 0.0.2
146
+ type: :runtime
203
147
  prerelease: false
204
148
  version_requirements: !ruby/object:Gem::Requirement
205
149
  requirements:
206
- - - ">="
150
+ - - "~>"
207
151
  - !ruby/object:Gem::Version
208
- version: '0'
152
+ version: 0.0.2
209
153
  - !ruby/object:Gem::Dependency
210
- name: nokogiri
154
+ name: naturally
211
155
  requirement: !ruby/object:Gem::Requirement
212
156
  requirements:
213
- - - ">="
157
+ - - "~>"
214
158
  - !ruby/object:Gem::Version
215
- version: '0'
216
- type: :development
159
+ version: '2.1'
160
+ type: :runtime
217
161
  prerelease: false
218
162
  version_requirements: !ruby/object:Gem::Requirement
219
163
  requirements:
220
- - - ">="
164
+ - - "~>"
221
165
  - !ruby/object:Gem::Version
222
- version: '0'
223
- description: Collection of tools and classes that help to identify file formats and
224
- create derivative copies.
166
+ version: '2.1'
167
+ description: Collection of tools and classes that help to identify formats of binary
168
+ files and create derivative copies (e.g. PDF from Word).
225
169
  email:
226
170
  - kris.dekeyser@libis.be
227
171
  executables:
228
- - droid_tool
229
- - fido_tool
172
+ - droid
173
+ - fido
230
174
  - formatinfo
231
175
  - libis_format
232
176
  - pdf_copy
233
177
  extensions: []
234
178
  extra_rdoc_files: []
235
179
  files:
180
+ - ".coveralls.yml"
181
+ - ".gitignore"
182
+ - ".travis.yml"
236
183
  - Gemfile
237
184
  - LICENSE.txt
238
185
  - README.md
239
- - bin/droid_tool
240
- - bin/fido_tool
186
+ - Rakefile
187
+ - base/Dockerfile
188
+ - base/Dockerfile.alpine
189
+ - base/Dockerfile.rvm
190
+ - base/rework_path
191
+ - bin/droid
192
+ - bin/fido
241
193
  - bin/formatinfo
242
194
  - bin/libis_format
243
195
  - bin/pdf_copy
244
- - data/AdobeRGB1998.icc
245
196
  - data/ISOcoated_v2_eci.icc
246
197
  - data/PDFA_def.ps
247
198
  - data/ead.xsd
199
+ - data/eciRGB_v2.icc
248
200
  - data/lias_formats.xml
249
201
  - data/types.yml
250
202
  - data/xlink.xsd
203
+ - docker_cfg.yml
251
204
  - lib/libis-format.rb
252
205
  - lib/libis/format.rb
253
206
  - lib/libis/format/cli/convert.rb
@@ -260,26 +213,33 @@ files:
260
213
  - lib/libis/format/converter/audio_converter.rb
261
214
  - lib/libis/format/converter/base.rb
262
215
  - lib/libis/format/converter/chain.rb
263
- - lib/libis/format/converter/email_converter.rb
264
216
  - lib/libis/format/converter/fop_pdf_converter.rb
217
+ - lib/libis/format/converter/image_assembler.rb
265
218
  - lib/libis/format/converter/image_converter.rb
219
+ - lib/libis/format/converter/image_splitter.rb
220
+ - lib/libis/format/converter/image_watermarker.rb
266
221
  - lib/libis/format/converter/jp2_converter.rb
267
222
  - lib/libis/format/converter/office_converter.rb
223
+ - lib/libis/format/converter/pdf_assembler.rb
268
224
  - lib/libis/format/converter/pdf_converter.rb
225
+ - lib/libis/format/converter/pdf_optimizer.rb
226
+ - lib/libis/format/converter/pdf_splitter.rb
227
+ - lib/libis/format/converter/pdf_watermarker.rb
269
228
  - lib/libis/format/converter/repository.rb
270
229
  - lib/libis/format/converter/spreadsheet_converter.rb
271
230
  - lib/libis/format/converter/video_converter.rb
272
231
  - lib/libis/format/converter/xslt_converter.rb
273
232
  - lib/libis/format/identifier.rb
233
+ - lib/libis/format/info.rb
234
+ - lib/libis/format/library.rb
274
235
  - lib/libis/format/tool.rb
275
236
  - lib/libis/format/tool/droid.rb
276
237
  - lib/libis/format/tool/extension_identification.rb
277
- - lib/libis/format/tool/ff_mpeg.rb
238
+ - lib/libis/format/tool/ffmpeg.rb
278
239
  - lib/libis/format/tool/fido.rb
279
240
  - lib/libis/format/tool/file_tool.rb
280
241
  - lib/libis/format/tool/fop_pdf.rb
281
242
  - lib/libis/format/tool/identification_tool.rb
282
- - lib/libis/format/tool/msg_to_pdf.rb
283
243
  - lib/libis/format/tool/office_to_pdf.rb
284
244
  - lib/libis/format/tool/pdf_copy.rb
285
245
  - lib/libis/format/tool/pdf_merge.rb
@@ -288,9 +248,8 @@ files:
288
248
  - lib/libis/format/tool/pdf_to_pdfa.rb
289
249
  - lib/libis/format/tool/pdfa_validator.rb
290
250
  - lib/libis/format/tool/spreadsheet_to_ods.rb
291
- - lib/libis/format/type_database.rb
292
- - lib/libis/format/type_database_impl.rb
293
251
  - lib/libis/format/version.rb
252
+ - lib/libis/format/yaml_loader.rb
294
253
  - libis-format.gemspec
295
254
  - tools/PdfTool.jar
296
255
  - tools/bcpkix-jdk15on-1.49.jar
@@ -345,7 +304,6 @@ files:
345
304
  - tools/fop/lib/xmlgraphics-commons-2.3.jar
346
305
  - tools/fop/lib/xmlgraphics-commons.LICENSE.txt
347
306
  - tools/fop/lib/xmlgraphics-commons.NOTICE.txt
348
- - tools/pdf2pdfa
349
307
  - tools/pdfbox/pdfbox-app-2.0.13.jar
350
308
  - tools/pdfbox/preflight-app-2.0.13.jar
351
309
  homepage: ''
@@ -360,14 +318,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
360
318
  requirements:
361
319
  - - ">="
362
320
  - !ruby/object:Gem::Version
363
- version: '3.2'
321
+ version: '0'
364
322
  required_rubygems_version: !ruby/object:Gem::Requirement
365
323
  requirements:
366
324
  - - ">="
367
325
  - !ruby/object:Gem::Version
368
326
  version: '0'
369
327
  requirements: []
370
- rubygems_version: 3.5.1
328
+ rubygems_version: 3.0.8
371
329
  signing_key:
372
330
  specification_version: 4
373
331
  summary: LIBIS File format format services.
Binary file
@@ -1,35 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'base'
4
-
5
- require 'libis/format/tool/msg_to_pdf'
6
- require 'libis/format/type_database'
7
- require 'rexml/document'
8
-
9
- module Libis
10
- module Format
11
- module Converter
12
- class EmailConverter < Libis::Format::Converter::Base
13
- def self.input_types
14
- [:MSG]
15
- end
16
-
17
- def self.output_types(format = nil)
18
- return [] unless input_types.include?(format)
19
-
20
- [:PDF]
21
- end
22
-
23
- def email_convert(_)
24
- # force usage of this converter
25
- end
26
-
27
- def convert(source, target, format, opts = {})
28
- super
29
-
30
- Format::Tool::MsgToPdf.run(source, target)
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,270 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'mapi/msg'
4
- require 'rfc_2047'
5
- require 'cgi'
6
- require 'pdfkit'
7
- require 'time'
8
- require 'fileutils'
9
- require 'pathname'
10
- require 'libis/format/config'
11
-
12
- module Libis
13
- module Format
14
- module Tool
15
- class MsgToPdf
16
- include ::Libis::Tools::Logger
17
-
18
- HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
19
- HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
20
- HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
21
- HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
22
-
23
- IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
24
- IMG_CID_HTML_REGEX = /cid:([^"]*)/m
25
-
26
- def self.installed?
27
- File.exist?(Libis::Format::Config[:wkhtmltopdf])
28
- end
29
-
30
- def self.run(source, target, **options)
31
- new.run source, target, **options
32
- end
33
-
34
- def run(source, target, **options)
35
- # Preliminary checks
36
- # ------------------
37
-
38
- @warnings = []
39
-
40
- # Check if source file exists
41
- raise "File #{source} does not exist" unless File.exist?(source)
42
-
43
- # Retrieving the message
44
- # ----------------------
45
-
46
- # Open the message
47
- msg = Mapi::Msg.open(source)
48
-
49
- target_format = options.delete(:to_html) ? :HTML : :PDF
50
- result = msg_to_pdf(msg, target, target_format, options)
51
- msg.close
52
- result
53
- end
54
-
55
- def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
56
- # Make sure the target directory exists
57
- outdir = File.dirname(target)
58
- FileUtils.mkdir_p(outdir)
59
-
60
- # Get the body of the message in HTML
61
- body = msg.properties.body_html
62
-
63
- # Embed plain body in HTML as a fallback
64
- body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
65
-
66
- # Check and fix the character encoding
67
- begin
68
- # Try to encode into UTF-8
69
- body.encode!('UTF-8', universal_newline: true)
70
- rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
71
- begin
72
- # If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
73
- body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
74
- rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
75
- # If that fails too, log a warning and replace the invalid/unknown with a ? character.
76
- @warnings << "#{e.class}: #{e.message}"
77
- body.encode!('UTF-8', universal_newline: true, invalid: :replace, undef: :replace)
78
- end
79
- end
80
-
81
- # Process headers
82
- # ---------------
83
- headers = {}
84
- hdr_html = ''
85
-
86
- %w[From To Cc Subject Date].each do |key|
87
- value = find_hdr(msg.headers, key)
88
- if value
89
- headers[key.downcase.to_sym] = value
90
- hdr_html += hdr_html(key, value)
91
- end
92
- end
93
-
94
- [:date].each do |key|
95
- next unless headers[key]
96
-
97
- headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
98
- end
99
-
100
- # Add header section to the HTML body
101
- unless hdr_html.empty?
102
- # Insert header block styles
103
- if body =~ %r{</head>}
104
- # if head exists, append the style block
105
- body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
106
- elsif body =~ %r{<head/>}
107
- # empty head, replace with the style block
108
- body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
109
- else
110
- # otherwise insert a head section before the body tag
111
- body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
112
- end
113
- # Add the headers html table as first element in the body section
114
- body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
115
- end
116
-
117
- # Embed inline images
118
- # -------------------
119
- attachments = msg.attachments
120
- used_files = []
121
-
122
- # First process plaintext cid entries
123
- body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
124
- data = get_attachment_data(attachments, ::Regexp.last_match(1))
125
- if data
126
- used_files << ::Regexp.last_match(1)
127
- "<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
128
- else
129
- '<img src=""/>'
130
- end
131
- end
132
-
133
- # Then process HTML img tags with CID entries
134
- body.gsub!(IMG_CID_HTML_REGEX) do |_match|
135
- data = get_attachment_data(attachments, ::Regexp.last_match(1))
136
- if data
137
- used_files << ::Regexp.last_match(1)
138
- "data:#{data[:mime_type]};base64,#{data[:base64]}"
139
- else
140
- ''
141
- end
142
- end
143
-
144
- # Create PDF
145
- # ----------
146
- files = []
147
-
148
- if target_format == :PDF
149
- # PDF creation options
150
- pdf_options = {
151
- page_size: 'A4',
152
- margin_top: '10mm',
153
- margin_bottom: '10mm',
154
- margin_left: '10mm',
155
- margin_right: '10mm',
156
- # image_quality: 100,
157
- # viewport_size: '2480x3508',
158
- dpi: 300
159
- }.merge pdf_options
160
-
161
- subject = find_hdr(msg.headers, 'Subject')
162
- kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
163
- pdf = kit.to_pdf
164
- File.open(target, 'wb') { |f| f.write(pdf) }
165
- else
166
- File.open(target, 'wb') { |f| f.write(body) }
167
- end
168
- files << target if File.exist?(target)
169
-
170
- # Save attachments
171
- # ----------------
172
- outdir = File.join(outdir, "#{File.basename(target)}.attachments")
173
- digits = ((attachments.count + 1) / 10) + 1
174
- i = 1
175
- attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
176
- prefix = "#{format('%0*d', digits, i)}-"
177
- if (sub_msg = a.instance_variable_get(:@embedded_msg))
178
- subject = a.properties[:display_name] || sub_msg.subject || ''
179
- file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
180
- result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
181
- if (e = result[:error])
182
- raise e
183
- end
184
-
185
- files += result[:files]
186
- elsif a.filename
187
- next if used_files.include?(a.filename)
188
-
189
- file = File.join(outdir, "#{prefix}#{a.filename}")
190
- FileUtils.mkdir_p(File.dirname(file))
191
- File.open(file, 'wb') { |f| a.save(f) }
192
- files << file
193
- else
194
- @warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
195
- next
196
- end
197
- i += 1
198
- end
199
-
200
- if root_msg
201
- p = Pathname(File.dirname(files.first))
202
- files[1..].each do |f|
203
- (headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
204
- end
205
- end
206
-
207
- {
208
- command: { status: 0 },
209
- files:,
210
- headers:,
211
- warnings: @warnings
212
- }
213
- rescue Exception => e
214
- raise unless root_msg
215
-
216
- msg.close
217
- {
218
- command: { status: -1 },
219
- files: [],
220
- headers: {},
221
- errors: [
222
- {
223
- error: e.message,
224
- error_class: e.class.name,
225
- error_trace: e.backtrace
226
- }
227
- ],
228
- warnings: @warnings
229
- }
230
- end
231
-
232
- protected
233
-
234
- def eml_to_html; end
235
-
236
- private
237
-
238
- def find_hdr(list, key)
239
- keys = list.keys
240
- if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
241
- v = list[k]
242
- v = v.first if v.is_a? Array
243
- v = Rfc2047.decode(v).strip if v.is_a? String
244
- return v
245
- end
246
- nil
247
- end
248
-
249
- def hdr_html(key, value)
250
- return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
251
-
252
- ''
253
- end
254
-
255
- def get_attachment_data(attachments, cid)
256
- attachments.each do |attachment|
257
- next unless attachment.properties.attach_content_id == cid
258
-
259
- attachment.data.rewind
260
- return {
261
- mime_type: attachment.properties.attach_mime_tag,
262
- base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
263
- }
264
- end
265
- nil
266
- end
267
- end
268
- end
269
- end
270
- end