libis-format 1.3.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +40 -153
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +48 -0
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +47 -0
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +85 -125
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
metadata CHANGED
@@ -1,101 +1,87 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libis-format
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.2
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kris Dekeyser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-30 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: chromaprint
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: 0.0.2
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: 0.0.2
27
- - !ruby/object:Gem::Dependency
28
- name: deep_dive
14
+ name: rake
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - "~>"
32
18
  - !ruby/object:Gem::Version
33
- version: '0.3'
34
- type: :runtime
19
+ version: '10.3'
20
+ type: :development
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '0.3'
26
+ version: '10.3'
41
27
  - !ruby/object:Gem::Dependency
42
- name: libis-mapi
28
+ name: rspec
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - "~>"
46
32
  - !ruby/object:Gem::Version
47
- version: '0.3'
48
- type: :runtime
33
+ version: '3.1'
34
+ type: :development
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
38
  - - "~>"
53
39
  - !ruby/object:Gem::Version
54
- version: '0.3'
40
+ version: '3.1'
55
41
  - !ruby/object:Gem::Dependency
56
- name: libis-tools
42
+ name: awesome_print
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - "~>"
45
+ - - ">="
60
46
  - !ruby/object:Gem::Version
61
- version: '1.1'
62
- type: :runtime
47
+ version: '0'
48
+ type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - "~>"
52
+ - - ">="
67
53
  - !ruby/object:Gem::Version
68
- version: '1.1'
54
+ version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
- name: mini_magick
56
+ name: equivalent-xml
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '4.12'
76
- type: :runtime
61
+ version: '0.5'
62
+ type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '4.12'
68
+ version: '0.5'
83
69
  - !ruby/object:Gem::Dependency
84
- name: naturally
70
+ name: nokogiri
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - "~>"
73
+ - - ">="
88
74
  - !ruby/object:Gem::Version
89
- version: '2.2'
90
- type: :runtime
75
+ version: '0'
76
+ type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - "~>"
80
+ - - ">="
95
81
  - !ruby/object:Gem::Version
96
- version: '2.2'
82
+ version: '0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: new_rfc_2047
84
+ name: libis-tools
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - "~>"
@@ -112,142 +98,109 @@ dependencies:
112
98
  name: os
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - "~>"
101
+ - - '='
116
102
  - !ruby/object:Gem::Version
117
- version: '1.1'
103
+ version: 0.9.6
118
104
  type: :runtime
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - "~>"
108
+ - - '='
123
109
  - !ruby/object:Gem::Version
124
- version: '1.1'
110
+ version: 0.9.6
125
111
  - !ruby/object:Gem::Dependency
126
- name: pdfinfo
112
+ name: mini_magick
127
113
  requirement: !ruby/object:Gem::Requirement
128
114
  requirements:
129
115
  - - "~>"
130
116
  - !ruby/object:Gem::Version
131
- version: '1.4'
117
+ version: '4.3'
132
118
  type: :runtime
133
119
  prerelease: false
134
120
  version_requirements: !ruby/object:Gem::Requirement
135
121
  requirements:
136
122
  - - "~>"
137
123
  - !ruby/object:Gem::Version
138
- version: '1.4'
124
+ version: '4.3'
139
125
  - !ruby/object:Gem::Dependency
140
- name: pdfkit
126
+ name: deep_dive
141
127
  requirement: !ruby/object:Gem::Requirement
142
128
  requirements:
143
129
  - - "~>"
144
130
  - !ruby/object:Gem::Version
145
- version: '0.8'
131
+ version: '0.3'
146
132
  type: :runtime
147
133
  prerelease: false
148
134
  version_requirements: !ruby/object:Gem::Requirement
149
135
  requirements:
150
136
  - - "~>"
151
137
  - !ruby/object:Gem::Version
152
- version: '0.8'
153
- - !ruby/object:Gem::Dependency
154
- name: awesome_print
155
- requirement: !ruby/object:Gem::Requirement
156
- requirements:
157
- - - ">="
158
- - !ruby/object:Gem::Version
159
- version: '0'
160
- type: :development
161
- prerelease: false
162
- version_requirements: !ruby/object:Gem::Requirement
163
- requirements:
164
- - - ">="
165
- - !ruby/object:Gem::Version
166
- version: '0'
167
- - !ruby/object:Gem::Dependency
168
- name: equivalent-xml
169
- requirement: !ruby/object:Gem::Requirement
170
- requirements:
171
- - - ">="
172
- - !ruby/object:Gem::Version
173
- version: '0'
174
- type: :development
175
- prerelease: false
176
- version_requirements: !ruby/object:Gem::Requirement
177
- requirements:
178
- - - ">="
179
- - !ruby/object:Gem::Version
180
- version: '0'
181
- - !ruby/object:Gem::Dependency
182
- name: rake
183
- requirement: !ruby/object:Gem::Requirement
184
- requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- version: '0'
188
- type: :development
189
- prerelease: false
190
- version_requirements: !ruby/object:Gem::Requirement
191
- requirements:
192
- - - ">="
193
- - !ruby/object:Gem::Version
194
- version: '0'
138
+ version: '0.3'
195
139
  - !ruby/object:Gem::Dependency
196
- name: rspec
140
+ name: chromaprint
197
141
  requirement: !ruby/object:Gem::Requirement
198
142
  requirements:
199
- - - ">="
143
+ - - "~>"
200
144
  - !ruby/object:Gem::Version
201
- version: '0'
202
- type: :development
145
+ version: 0.0.2
146
+ type: :runtime
203
147
  prerelease: false
204
148
  version_requirements: !ruby/object:Gem::Requirement
205
149
  requirements:
206
- - - ">="
150
+ - - "~>"
207
151
  - !ruby/object:Gem::Version
208
- version: '0'
152
+ version: 0.0.2
209
153
  - !ruby/object:Gem::Dependency
210
- name: nokogiri
154
+ name: naturally
211
155
  requirement: !ruby/object:Gem::Requirement
212
156
  requirements:
213
- - - ">="
157
+ - - "~>"
214
158
  - !ruby/object:Gem::Version
215
- version: '0'
216
- type: :development
159
+ version: '2.1'
160
+ type: :runtime
217
161
  prerelease: false
218
162
  version_requirements: !ruby/object:Gem::Requirement
219
163
  requirements:
220
- - - ">="
164
+ - - "~>"
221
165
  - !ruby/object:Gem::Version
222
- version: '0'
223
- description: Collection of tools and classes that help to identify file formats and
224
- create derivative copies.
166
+ version: '2.1'
167
+ description: Collection of tools and classes that help to identify formats of binary
168
+ files and create derivative copies (e.g. PDF from Word).
225
169
  email:
226
170
  - kris.dekeyser@libis.be
227
171
  executables:
228
- - droid_tool
229
- - fido_tool
172
+ - droid
173
+ - fido
230
174
  - formatinfo
231
175
  - libis_format
232
- - pdf_tool
176
+ - pdf_copy
233
177
  extensions: []
234
178
  extra_rdoc_files: []
235
179
  files:
180
+ - ".coveralls.yml"
181
+ - ".gitignore"
182
+ - ".travis.yml"
236
183
  - Gemfile
237
184
  - LICENSE.txt
238
185
  - README.md
239
- - bin/droid_tool
240
- - bin/fido_tool
186
+ - Rakefile
187
+ - base/Dockerfile
188
+ - base/Dockerfile.alpine
189
+ - base/Dockerfile.rvm
190
+ - base/rework_path
191
+ - bin/droid
192
+ - bin/fido
241
193
  - bin/formatinfo
242
194
  - bin/libis_format
243
- - bin/pdf_tool
244
- - data/AdobeRGB1998.icc
195
+ - bin/pdf_copy
245
196
  - data/ISOcoated_v2_eci.icc
246
197
  - data/PDFA_def.ps
247
198
  - data/ead.xsd
199
+ - data/eciRGB_v2.icc
248
200
  - data/lias_formats.xml
249
201
  - data/types.yml
250
202
  - data/xlink.xsd
203
+ - docker_cfg.yml
251
204
  - lib/libis-format.rb
252
205
  - lib/libis/format.rb
253
206
  - lib/libis/format/cli/convert.rb
@@ -260,35 +213,43 @@ files:
260
213
  - lib/libis/format/converter/audio_converter.rb
261
214
  - lib/libis/format/converter/base.rb
262
215
  - lib/libis/format/converter/chain.rb
263
- - lib/libis/format/converter/email_converter.rb
264
216
  - lib/libis/format/converter/fop_pdf_converter.rb
217
+ - lib/libis/format/converter/image_assembler.rb
265
218
  - lib/libis/format/converter/image_converter.rb
219
+ - lib/libis/format/converter/image_splitter.rb
220
+ - lib/libis/format/converter/image_watermarker.rb
266
221
  - lib/libis/format/converter/jp2_converter.rb
267
222
  - lib/libis/format/converter/office_converter.rb
223
+ - lib/libis/format/converter/pdf_assembler.rb
268
224
  - lib/libis/format/converter/pdf_converter.rb
225
+ - lib/libis/format/converter/pdf_optimizer.rb
226
+ - lib/libis/format/converter/pdf_splitter.rb
227
+ - lib/libis/format/converter/pdf_watermarker.rb
269
228
  - lib/libis/format/converter/repository.rb
270
229
  - lib/libis/format/converter/spreadsheet_converter.rb
271
230
  - lib/libis/format/converter/video_converter.rb
272
231
  - lib/libis/format/converter/xslt_converter.rb
273
232
  - lib/libis/format/identifier.rb
233
+ - lib/libis/format/info.rb
234
+ - lib/libis/format/library.rb
274
235
  - lib/libis/format/tool.rb
275
236
  - lib/libis/format/tool/droid.rb
276
237
  - lib/libis/format/tool/extension_identification.rb
277
- - lib/libis/format/tool/ff_mpeg.rb
238
+ - lib/libis/format/tool/ffmpeg.rb
278
239
  - lib/libis/format/tool/fido.rb
279
240
  - lib/libis/format/tool/file_tool.rb
280
241
  - lib/libis/format/tool/fop_pdf.rb
281
242
  - lib/libis/format/tool/identification_tool.rb
282
- - lib/libis/format/tool/msg_to_pdf.rb
283
243
  - lib/libis/format/tool/office_to_pdf.rb
244
+ - lib/libis/format/tool/pdf_copy.rb
245
+ - lib/libis/format/tool/pdf_merge.rb
284
246
  - lib/libis/format/tool/pdf_optimizer.rb
247
+ - lib/libis/format/tool/pdf_split.rb
285
248
  - lib/libis/format/tool/pdf_to_pdfa.rb
286
- - lib/libis/format/tool/pdf_tool.rb
287
249
  - lib/libis/format/tool/pdfa_validator.rb
288
250
  - lib/libis/format/tool/spreadsheet_to_ods.rb
289
- - lib/libis/format/type_database.rb
290
- - lib/libis/format/type_database_impl.rb
291
251
  - lib/libis/format/version.rb
252
+ - lib/libis/format/yaml_loader.rb
292
253
  - libis-format.gemspec
293
254
  - tools/PdfTool.jar
294
255
  - tools/bcpkix-jdk15on-1.49.jar
@@ -343,9 +304,8 @@ files:
343
304
  - tools/fop/lib/xmlgraphics-commons-2.3.jar
344
305
  - tools/fop/lib/xmlgraphics-commons.LICENSE.txt
345
306
  - tools/fop/lib/xmlgraphics-commons.NOTICE.txt
346
- - tools/pdf2pdfa
347
- - tools/pdfbox/pdfbox-app-3.0.3.jar
348
- - tools/pdfbox/preflight-app-3.0.3.jar
307
+ - tools/pdfbox/pdfbox-app-2.0.13.jar
308
+ - tools/pdfbox/preflight-app-2.0.13.jar
349
309
  homepage: ''
350
310
  licenses:
351
311
  - MIT
@@ -358,14 +318,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
358
318
  requirements:
359
319
  - - ">="
360
320
  - !ruby/object:Gem::Version
361
- version: '3.2'
321
+ version: '0'
362
322
  required_rubygems_version: !ruby/object:Gem::Requirement
363
323
  requirements:
364
324
  - - ">="
365
325
  - !ruby/object:Gem::Version
366
326
  version: '0'
367
327
  requirements: []
368
- rubygems_version: 3.5.18
328
+ rubygems_version: 3.0.8
369
329
  signing_key:
370
330
  specification_version: 4
371
331
  summary: LIBIS File format format services.
Binary file
@@ -1,35 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'base'
4
-
5
- require 'libis/format/tool/msg_to_pdf'
6
- require 'libis/format/type_database'
7
- require 'rexml/document'
8
-
9
- module Libis
10
- module Format
11
- module Converter
12
- class EmailConverter < Libis::Format::Converter::Base
13
- def self.input_types
14
- [:MSG]
15
- end
16
-
17
- def self.output_types(format = nil)
18
- return [] unless input_types.include?(format)
19
-
20
- [:PDF]
21
- end
22
-
23
- def email_convert(_)
24
- # force usage of this converter
25
- end
26
-
27
- def convert(source, target, format, opts = {})
28
- super
29
-
30
- Format::Tool::MsgToPdf.run(source, target)
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,270 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'mapi/msg'
4
- require 'rfc_2047'
5
- require 'cgi'
6
- require 'pdfkit'
7
- require 'time'
8
- require 'fileutils'
9
- require 'pathname'
10
- require 'libis/format/config'
11
-
12
- module Libis
13
- module Format
14
- module Tool
15
- class MsgToPdf
16
- include ::Libis::Tools::Logger
17
-
18
- HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
19
- HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
20
- HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
21
- HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
22
-
23
- IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
24
- IMG_CID_HTML_REGEX = /cid:([^"]*)/m
25
-
26
- def self.installed?
27
- File.exist?(Libis::Format::Config[:wkhtmltopdf])
28
- end
29
-
30
- def self.run(source, target, **options)
31
- new.run source, target, **options
32
- end
33
-
34
- def run(source, target, **options)
35
- # Preliminary checks
36
- # ------------------
37
-
38
- @warnings = []
39
-
40
- # Check if source file exists
41
- raise "File #{source} does not exist" unless File.exist?(source)
42
-
43
- # Retrieving the message
44
- # ----------------------
45
-
46
- # Open the message
47
- msg = Mapi::Msg.open(source)
48
-
49
- target_format = options.delete(:to_html) ? :HTML : :PDF
50
- result = msg_to_pdf(msg, target, target_format, options)
51
- msg.close
52
- result
53
- end
54
-
55
- def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
56
- # Make sure the target directory exists
57
- outdir = File.dirname(target)
58
- FileUtils.mkdir_p(outdir)
59
-
60
- # Get the body of the message in HTML
61
- body = msg.properties.body_html
62
-
63
- # Embed plain body in HTML as a fallback
64
- body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
65
-
66
- # Check and fix the character encoding
67
- begin
68
- # Try to encode into UTF-8
69
- body.encode!('UTF-8', universal_newline: true)
70
- rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
71
- begin
72
- # If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
73
- body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
74
- rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
75
- # If that fails too, log a warning and replace the invalid/unknown with a ? character.
76
- @warnings << "#{e.class}: #{e.message}"
77
- body.encode!('UTF-8', universal_newline: true, invalid: :replace, undef: :replace)
78
- end
79
- end
80
-
81
- # Process headers
82
- # ---------------
83
- headers = {}
84
- hdr_html = ''
85
-
86
- %w[From To Cc Subject Date].each do |key|
87
- value = find_hdr(msg.headers, key)
88
- if value
89
- headers[key.downcase.to_sym] = value
90
- hdr_html += hdr_html(key, value)
91
- end
92
- end
93
-
94
- [:date].each do |key|
95
- next unless headers[key]
96
-
97
- headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
98
- end
99
-
100
- # Add header section to the HTML body
101
- unless hdr_html.empty?
102
- # Insert header block styles
103
- if body =~ %r{</head>}
104
- # if head exists, append the style block
105
- body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
106
- elsif body =~ %r{<head/>}
107
- # empty head, replace with the style block
108
- body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
109
- else
110
- # otherwise insert a head section before the body tag
111
- body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
112
- end
113
- # Add the headers html table as first element in the body section
114
- body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
115
- end
116
-
117
- # Embed inline images
118
- # -------------------
119
- attachments = msg.attachments
120
- used_files = []
121
-
122
- # First process plaintext cid entries
123
- body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
124
- data = get_attachment_data(attachments, ::Regexp.last_match(1))
125
- if data
126
- used_files << ::Regexp.last_match(1)
127
- "<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
128
- else
129
- '<img src=""/>'
130
- end
131
- end
132
-
133
- # Then process HTML img tags with CID entries
134
- body.gsub!(IMG_CID_HTML_REGEX) do |_match|
135
- data = get_attachment_data(attachments, ::Regexp.last_match(1))
136
- if data
137
- used_files << ::Regexp.last_match(1)
138
- "data:#{data[:mime_type]};base64,#{data[:base64]}"
139
- else
140
- ''
141
- end
142
- end
143
-
144
- # Create PDF
145
- # ----------
146
- files = []
147
-
148
- if target_format == :PDF
149
- # PDF creation options
150
- pdf_options = {
151
- page_size: 'A4',
152
- margin_top: '10mm',
153
- margin_bottom: '10mm',
154
- margin_left: '10mm',
155
- margin_right: '10mm',
156
- # image_quality: 100,
157
- # viewport_size: '2480x3508',
158
- dpi: 300
159
- }.merge pdf_options
160
-
161
- subject = find_hdr(msg.headers, 'Subject')
162
- kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
163
- pdf = kit.to_pdf
164
- File.open(target, 'wb') { |f| f.write(pdf) }
165
- else
166
- File.open(target, 'wb') { |f| f.write(body) }
167
- end
168
- files << target if File.exist?(target)
169
-
170
- # Save attachments
171
- # ----------------
172
- outdir = File.join(outdir, "#{File.basename(target)}.attachments")
173
- digits = ((attachments.count + 1) / 10) + 1
174
- i = 1
175
- attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
176
- prefix = "#{format('%0*d', digits, i)}-"
177
- if (sub_msg = a.instance_variable_get(:@embedded_msg))
178
- subject = a.properties[:display_name] || sub_msg.subject || ''
179
- file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
180
- result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
181
- if (e = result[:error])
182
- raise e
183
- end
184
-
185
- files += result[:files]
186
- elsif a.filename
187
- next if used_files.include?(a.filename)
188
-
189
- file = File.join(outdir, "#{prefix}#{a.filename}")
190
- FileUtils.mkdir_p(File.dirname(file))
191
- File.open(file, 'wb') { |f| a.save(f) }
192
- files << file
193
- else
194
- @warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
195
- next
196
- end
197
- i += 1
198
- end
199
-
200
- if root_msg
201
- p = Pathname(File.dirname(files.first))
202
- files[1..].each do |f|
203
- (headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
204
- end
205
- end
206
-
207
- {
208
- command: { status: 0 },
209
- files:,
210
- headers:,
211
- warnings: @warnings
212
- }
213
- rescue Exception => e
214
- raise unless root_msg
215
-
216
- msg.close
217
- {
218
- command: { status: -1 },
219
- files: [],
220
- headers: {},
221
- errors: [
222
- {
223
- error: e.message,
224
- error_class: e.class.name,
225
- error_trace: e.backtrace
226
- }
227
- ],
228
- warnings: @warnings
229
- }
230
- end
231
-
232
- protected
233
-
234
- def eml_to_html; end
235
-
236
- private
237
-
238
- def find_hdr(list, key)
239
- keys = list.keys
240
- if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
241
- v = list[k]
242
- v = v.first if v.is_a? Array
243
- v = Rfc2047.decode(v).strip if v.is_a? String
244
- return v
245
- end
246
- nil
247
- end
248
-
249
- def hdr_html(key, value)
250
- return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
251
-
252
- ''
253
- end
254
-
255
- def get_attachment_data(attachments, cid)
256
- attachments.each do |attachment|
257
- next unless attachment.properties.attach_content_id == cid
258
-
259
- attachment.data.rewind
260
- return {
261
- mime_type: attachment.properties.attach_mime_tag,
262
- base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
263
- }
264
- end
265
- nil
266
- end
267
- end
268
- end
269
- end
270
- end