libis-format 1.2.9 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -8
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +3 -16
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/config.rb +4 -3
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +21 -8
- data/lib/libis/format/converter/chain.rb +6 -18
- data/lib/libis/format/converter/fop_pdf_converter.rb +2 -0
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +21 -141
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +4 -4
- data/lib/libis/format/converter/office_converter.rb +5 -3
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +31 -98
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/spreadsheet_converter.rb +5 -3
- data/lib/libis/format/converter/video_converter.rb +3 -6
- data/lib/libis/format/converter/xslt_converter.rb +14 -15
- data/lib/libis/format/identifier.rb +4 -4
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +1 -10
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +1 -11
- data/lib/libis/format/tool/pdf_merge.rb +1 -11
- data/lib/libis/format/tool/pdf_optimizer.rb +2 -11
- data/lib/libis/format/tool/pdf_split.rb +16 -25
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
- data/lib/libis/format/tool/pdfa_validator.rb +30 -25
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +2 -10
- data/lib/libis/format/tool.rb +1 -2
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +5 -2
- data/libis-format.gemspec +18 -24
- metadata +78 -120
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -38
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/type_database.rb +0 -133
- data/lib/libis/format/type_database_impl.rb +0 -120
- data/tools/pdf2pdfa +0 -395
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
metadata
CHANGED
@@ -1,101 +1,87 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libis-format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Dekeyser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.0.2
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.0.2
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: deep_dive
|
14
|
+
name: rake
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
30
16
|
requirements:
|
31
17
|
- - "~>"
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
34
|
-
type: :
|
19
|
+
version: '10.3'
|
20
|
+
type: :development
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
24
|
- - "~>"
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
26
|
+
version: '10.3'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
28
|
+
name: rspec
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
31
|
- - "~>"
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
48
|
-
type: :
|
33
|
+
version: '3.1'
|
34
|
+
type: :development
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
40
|
+
version: '3.1'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: awesome_print
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
|
-
- - "
|
45
|
+
- - ">="
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- - "
|
52
|
+
- - ">="
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
54
|
+
version: '0'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: equivalent-xml
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - "~>"
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
76
|
-
type: :
|
61
|
+
version: '0.5'
|
62
|
+
type: :development
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
66
|
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
68
|
+
version: '0.5'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
70
|
+
name: nokogiri
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
|
-
- - "
|
73
|
+
- - ">="
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
-
type: :
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
91
77
|
prerelease: false
|
92
78
|
version_requirements: !ruby/object:Gem::Requirement
|
93
79
|
requirements:
|
94
|
-
- - "
|
80
|
+
- - ">="
|
95
81
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
82
|
+
version: '0'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
84
|
+
name: libis-tools
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
100
86
|
requirements:
|
101
87
|
- - "~>"
|
@@ -112,142 +98,109 @@ dependencies:
|
|
112
98
|
name: os
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - '='
|
116
102
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
103
|
+
version: 0.9.6
|
118
104
|
type: :runtime
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
107
|
requirements:
|
122
|
-
- -
|
108
|
+
- - '='
|
123
109
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
110
|
+
version: 0.9.6
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
112
|
+
name: mini_magick
|
127
113
|
requirement: !ruby/object:Gem::Requirement
|
128
114
|
requirements:
|
129
115
|
- - "~>"
|
130
116
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
117
|
+
version: '4.3'
|
132
118
|
type: :runtime
|
133
119
|
prerelease: false
|
134
120
|
version_requirements: !ruby/object:Gem::Requirement
|
135
121
|
requirements:
|
136
122
|
- - "~>"
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
124
|
+
version: '4.3'
|
139
125
|
- !ruby/object:Gem::Dependency
|
140
|
-
name:
|
126
|
+
name: deep_dive
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
142
128
|
requirements:
|
143
129
|
- - "~>"
|
144
130
|
- !ruby/object:Gem::Version
|
145
|
-
version: '0.
|
131
|
+
version: '0.3'
|
146
132
|
type: :runtime
|
147
133
|
prerelease: false
|
148
134
|
version_requirements: !ruby/object:Gem::Requirement
|
149
135
|
requirements:
|
150
136
|
- - "~>"
|
151
137
|
- !ruby/object:Gem::Version
|
152
|
-
version: '0.
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: awesome_print
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :development
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
167
|
-
- !ruby/object:Gem::Dependency
|
168
|
-
name: equivalent-xml
|
169
|
-
requirement: !ruby/object:Gem::Requirement
|
170
|
-
requirements:
|
171
|
-
- - ">="
|
172
|
-
- !ruby/object:Gem::Version
|
173
|
-
version: '0'
|
174
|
-
type: :development
|
175
|
-
prerelease: false
|
176
|
-
version_requirements: !ruby/object:Gem::Requirement
|
177
|
-
requirements:
|
178
|
-
- - ">="
|
179
|
-
- !ruby/object:Gem::Version
|
180
|
-
version: '0'
|
181
|
-
- !ruby/object:Gem::Dependency
|
182
|
-
name: rake
|
183
|
-
requirement: !ruby/object:Gem::Requirement
|
184
|
-
requirements:
|
185
|
-
- - ">="
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
188
|
-
type: :development
|
189
|
-
prerelease: false
|
190
|
-
version_requirements: !ruby/object:Gem::Requirement
|
191
|
-
requirements:
|
192
|
-
- - ">="
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
138
|
+
version: '0.3'
|
195
139
|
- !ruby/object:Gem::Dependency
|
196
|
-
name:
|
140
|
+
name: chromaprint
|
197
141
|
requirement: !ruby/object:Gem::Requirement
|
198
142
|
requirements:
|
199
|
-
- - "
|
143
|
+
- - "~>"
|
200
144
|
- !ruby/object:Gem::Version
|
201
|
-
version:
|
202
|
-
type: :
|
145
|
+
version: 0.0.2
|
146
|
+
type: :runtime
|
203
147
|
prerelease: false
|
204
148
|
version_requirements: !ruby/object:Gem::Requirement
|
205
149
|
requirements:
|
206
|
-
- - "
|
150
|
+
- - "~>"
|
207
151
|
- !ruby/object:Gem::Version
|
208
|
-
version:
|
152
|
+
version: 0.0.2
|
209
153
|
- !ruby/object:Gem::Dependency
|
210
|
-
name:
|
154
|
+
name: naturally
|
211
155
|
requirement: !ruby/object:Gem::Requirement
|
212
156
|
requirements:
|
213
|
-
- - "
|
157
|
+
- - "~>"
|
214
158
|
- !ruby/object:Gem::Version
|
215
|
-
version: '
|
216
|
-
type: :
|
159
|
+
version: '2.1'
|
160
|
+
type: :runtime
|
217
161
|
prerelease: false
|
218
162
|
version_requirements: !ruby/object:Gem::Requirement
|
219
163
|
requirements:
|
220
|
-
- - "
|
164
|
+
- - "~>"
|
221
165
|
- !ruby/object:Gem::Version
|
222
|
-
version: '
|
223
|
-
description: Collection of tools and classes that help to identify
|
224
|
-
create derivative copies.
|
166
|
+
version: '2.1'
|
167
|
+
description: Collection of tools and classes that help to identify formats of binary
|
168
|
+
files and create derivative copies (e.g. PDF from Word).
|
225
169
|
email:
|
226
170
|
- kris.dekeyser@libis.be
|
227
171
|
executables:
|
228
|
-
-
|
229
|
-
-
|
172
|
+
- droid
|
173
|
+
- fido
|
230
174
|
- formatinfo
|
231
175
|
- libis_format
|
232
176
|
- pdf_copy
|
233
177
|
extensions: []
|
234
178
|
extra_rdoc_files: []
|
235
179
|
files:
|
180
|
+
- ".coveralls.yml"
|
181
|
+
- ".gitignore"
|
182
|
+
- ".travis.yml"
|
236
183
|
- Gemfile
|
237
184
|
- LICENSE.txt
|
238
185
|
- README.md
|
239
|
-
-
|
240
|
-
-
|
186
|
+
- Rakefile
|
187
|
+
- base/Dockerfile
|
188
|
+
- base/Dockerfile.alpine
|
189
|
+
- base/Dockerfile.rvm
|
190
|
+
- base/rework_path
|
191
|
+
- bin/droid
|
192
|
+
- bin/fido
|
241
193
|
- bin/formatinfo
|
242
194
|
- bin/libis_format
|
243
195
|
- bin/pdf_copy
|
244
|
-
- data/AdobeRGB1998.icc
|
245
196
|
- data/ISOcoated_v2_eci.icc
|
246
197
|
- data/PDFA_def.ps
|
247
198
|
- data/ead.xsd
|
199
|
+
- data/eciRGB_v2.icc
|
248
200
|
- data/lias_formats.xml
|
249
201
|
- data/types.yml
|
250
202
|
- data/xlink.xsd
|
203
|
+
- docker_cfg.yml
|
251
204
|
- lib/libis-format.rb
|
252
205
|
- lib/libis/format.rb
|
253
206
|
- lib/libis/format/cli/convert.rb
|
@@ -260,26 +213,33 @@ files:
|
|
260
213
|
- lib/libis/format/converter/audio_converter.rb
|
261
214
|
- lib/libis/format/converter/base.rb
|
262
215
|
- lib/libis/format/converter/chain.rb
|
263
|
-
- lib/libis/format/converter/email_converter.rb
|
264
216
|
- lib/libis/format/converter/fop_pdf_converter.rb
|
217
|
+
- lib/libis/format/converter/image_assembler.rb
|
265
218
|
- lib/libis/format/converter/image_converter.rb
|
219
|
+
- lib/libis/format/converter/image_splitter.rb
|
220
|
+
- lib/libis/format/converter/image_watermarker.rb
|
266
221
|
- lib/libis/format/converter/jp2_converter.rb
|
267
222
|
- lib/libis/format/converter/office_converter.rb
|
223
|
+
- lib/libis/format/converter/pdf_assembler.rb
|
268
224
|
- lib/libis/format/converter/pdf_converter.rb
|
225
|
+
- lib/libis/format/converter/pdf_optimizer.rb
|
226
|
+
- lib/libis/format/converter/pdf_splitter.rb
|
227
|
+
- lib/libis/format/converter/pdf_watermarker.rb
|
269
228
|
- lib/libis/format/converter/repository.rb
|
270
229
|
- lib/libis/format/converter/spreadsheet_converter.rb
|
271
230
|
- lib/libis/format/converter/video_converter.rb
|
272
231
|
- lib/libis/format/converter/xslt_converter.rb
|
273
232
|
- lib/libis/format/identifier.rb
|
233
|
+
- lib/libis/format/info.rb
|
234
|
+
- lib/libis/format/library.rb
|
274
235
|
- lib/libis/format/tool.rb
|
275
236
|
- lib/libis/format/tool/droid.rb
|
276
237
|
- lib/libis/format/tool/extension_identification.rb
|
277
|
-
- lib/libis/format/tool/
|
238
|
+
- lib/libis/format/tool/ffmpeg.rb
|
278
239
|
- lib/libis/format/tool/fido.rb
|
279
240
|
- lib/libis/format/tool/file_tool.rb
|
280
241
|
- lib/libis/format/tool/fop_pdf.rb
|
281
242
|
- lib/libis/format/tool/identification_tool.rb
|
282
|
-
- lib/libis/format/tool/msg_to_pdf.rb
|
283
243
|
- lib/libis/format/tool/office_to_pdf.rb
|
284
244
|
- lib/libis/format/tool/pdf_copy.rb
|
285
245
|
- lib/libis/format/tool/pdf_merge.rb
|
@@ -288,9 +248,8 @@ files:
|
|
288
248
|
- lib/libis/format/tool/pdf_to_pdfa.rb
|
289
249
|
- lib/libis/format/tool/pdfa_validator.rb
|
290
250
|
- lib/libis/format/tool/spreadsheet_to_ods.rb
|
291
|
-
- lib/libis/format/type_database.rb
|
292
|
-
- lib/libis/format/type_database_impl.rb
|
293
251
|
- lib/libis/format/version.rb
|
252
|
+
- lib/libis/format/yaml_loader.rb
|
294
253
|
- libis-format.gemspec
|
295
254
|
- tools/PdfTool.jar
|
296
255
|
- tools/bcpkix-jdk15on-1.49.jar
|
@@ -345,7 +304,6 @@ files:
|
|
345
304
|
- tools/fop/lib/xmlgraphics-commons-2.3.jar
|
346
305
|
- tools/fop/lib/xmlgraphics-commons.LICENSE.txt
|
347
306
|
- tools/fop/lib/xmlgraphics-commons.NOTICE.txt
|
348
|
-
- tools/pdf2pdfa
|
349
307
|
- tools/pdfbox/pdfbox-app-2.0.13.jar
|
350
308
|
- tools/pdfbox/preflight-app-2.0.13.jar
|
351
309
|
homepage: ''
|
@@ -360,14 +318,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
360
318
|
requirements:
|
361
319
|
- - ">="
|
362
320
|
- !ruby/object:Gem::Version
|
363
|
-
version: '
|
321
|
+
version: '0'
|
364
322
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
365
323
|
requirements:
|
366
324
|
- - ">="
|
367
325
|
- !ruby/object:Gem::Version
|
368
326
|
version: '0'
|
369
327
|
requirements: []
|
370
|
-
rubygems_version: 3.
|
328
|
+
rubygems_version: 3.0.8
|
371
329
|
signing_key:
|
372
330
|
specification_version: 4
|
373
331
|
summary: LIBIS File format format services.
|
data/data/AdobeRGB1998.icc
DELETED
Binary file
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require_relative 'base'
|
4
|
-
|
5
|
-
require 'libis/format/tool/msg_to_pdf'
|
6
|
-
require 'libis/format/type_database'
|
7
|
-
require 'rexml/document'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
module Converter
|
12
|
-
|
13
|
-
class EmailConverter < Libis::Format::Converter::Base
|
14
|
-
|
15
|
-
def self.input_types
|
16
|
-
[ :MSG ]
|
17
|
-
end
|
18
|
-
|
19
|
-
def self.output_types(format = nil)
|
20
|
-
return [] unless input_types.include?(format)
|
21
|
-
[ :PDF ]
|
22
|
-
end
|
23
|
-
|
24
|
-
def email_convert(_)
|
25
|
-
#force usage of this converter
|
26
|
-
end
|
27
|
-
|
28
|
-
def convert(source, target, format, opts = {})
|
29
|
-
super
|
30
|
-
|
31
|
-
Format::Tool::MsgToPdf.run(source, target)
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,270 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'mapi/msg'
|
4
|
-
require 'rfc_2047'
|
5
|
-
require 'cgi'
|
6
|
-
require 'pdfkit'
|
7
|
-
require 'time'
|
8
|
-
require 'fileutils'
|
9
|
-
require 'pathname'
|
10
|
-
require 'libis/format/config'
|
11
|
-
|
12
|
-
module Libis
|
13
|
-
module Format
|
14
|
-
module Tool
|
15
|
-
class MsgToPdf
|
16
|
-
include ::Libis::Tools::Logger
|
17
|
-
|
18
|
-
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
|
19
|
-
HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
|
20
|
-
HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
|
21
|
-
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
|
22
|
-
|
23
|
-
IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
|
24
|
-
IMG_CID_HTML_REGEX = /cid:([^"]*)/m
|
25
|
-
|
26
|
-
def self.installed?
|
27
|
-
File.exist?(Libis::Format::Config[:wkhtmltopdf])
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.run(source, target, **options)
|
31
|
-
new.run source, target, **options
|
32
|
-
end
|
33
|
-
|
34
|
-
def run(source, target, **options)
|
35
|
-
# Preliminary checks
|
36
|
-
# ------------------
|
37
|
-
|
38
|
-
@warnings = []
|
39
|
-
|
40
|
-
# Check if source file exists
|
41
|
-
raise "File #{source} does not exist" unless File.exist?(source)
|
42
|
-
|
43
|
-
# Retrieving the message
|
44
|
-
# ----------------------
|
45
|
-
|
46
|
-
# Open the message
|
47
|
-
msg = Mapi::Msg.open(source)
|
48
|
-
|
49
|
-
target_format = options.delete(:to_html) ? :HTML : :PDF
|
50
|
-
result = msg_to_pdf(msg, target, target_format, options)
|
51
|
-
msg.close
|
52
|
-
result
|
53
|
-
end
|
54
|
-
|
55
|
-
def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
|
56
|
-
# Make sure the target directory exists
|
57
|
-
outdir = File.dirname(target)
|
58
|
-
FileUtils.mkdir_p(outdir)
|
59
|
-
|
60
|
-
# Get the body of the message in HTML
|
61
|
-
body = msg.properties.body_html
|
62
|
-
|
63
|
-
# Embed plain body in HTML as a fallback
|
64
|
-
body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
|
65
|
-
|
66
|
-
# Check and fix the character encoding
|
67
|
-
begin
|
68
|
-
# Try to encode into UTF-8
|
69
|
-
body.encode!('UTF-8', universal_newline: true)
|
70
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
71
|
-
begin
|
72
|
-
# If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
|
73
|
-
body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
|
74
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
75
|
-
# If that fails too, log a warning and replace the invalid/unknown with a ? character.
|
76
|
-
@warnings << "#{e.class}: #{e.message}"
|
77
|
-
body.encode!('UTF-8', universal_newline: true, invalid: :replace, undef: :replace)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
# Process headers
|
82
|
-
# ---------------
|
83
|
-
headers = {}
|
84
|
-
hdr_html = ''
|
85
|
-
|
86
|
-
%w[From To Cc Subject Date].each do |key|
|
87
|
-
value = find_hdr(msg.headers, key)
|
88
|
-
if value
|
89
|
-
headers[key.downcase.to_sym] = value
|
90
|
-
hdr_html += hdr_html(key, value)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
[:date].each do |key|
|
95
|
-
next unless headers[key]
|
96
|
-
|
97
|
-
headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
|
98
|
-
end
|
99
|
-
|
100
|
-
# Add header section to the HTML body
|
101
|
-
unless hdr_html.empty?
|
102
|
-
# Insert header block styles
|
103
|
-
if body =~ %r{</head>}
|
104
|
-
# if head exists, append the style block
|
105
|
-
body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
|
106
|
-
elsif body =~ %r{<head/>}
|
107
|
-
# empty head, replace with the style block
|
108
|
-
body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
|
109
|
-
else
|
110
|
-
# otherwise insert a head section before the body tag
|
111
|
-
body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
|
112
|
-
end
|
113
|
-
# Add the headers html table as first element in the body section
|
114
|
-
body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
|
115
|
-
end
|
116
|
-
|
117
|
-
# Embed inline images
|
118
|
-
# -------------------
|
119
|
-
attachments = msg.attachments
|
120
|
-
used_files = []
|
121
|
-
|
122
|
-
# First process plaintext cid entries
|
123
|
-
body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
|
124
|
-
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
125
|
-
if data
|
126
|
-
used_files << ::Regexp.last_match(1)
|
127
|
-
"<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
|
128
|
-
else
|
129
|
-
'<img src=""/>'
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
# Then process HTML img tags with CID entries
|
134
|
-
body.gsub!(IMG_CID_HTML_REGEX) do |_match|
|
135
|
-
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
136
|
-
if data
|
137
|
-
used_files << ::Regexp.last_match(1)
|
138
|
-
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
139
|
-
else
|
140
|
-
''
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
# Create PDF
|
145
|
-
# ----------
|
146
|
-
files = []
|
147
|
-
|
148
|
-
if target_format == :PDF
|
149
|
-
# PDF creation options
|
150
|
-
pdf_options = {
|
151
|
-
page_size: 'A4',
|
152
|
-
margin_top: '10mm',
|
153
|
-
margin_bottom: '10mm',
|
154
|
-
margin_left: '10mm',
|
155
|
-
margin_right: '10mm',
|
156
|
-
# image_quality: 100,
|
157
|
-
# viewport_size: '2480x3508',
|
158
|
-
dpi: 300
|
159
|
-
}.merge pdf_options
|
160
|
-
|
161
|
-
subject = find_hdr(msg.headers, 'Subject')
|
162
|
-
kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
|
163
|
-
pdf = kit.to_pdf
|
164
|
-
File.open(target, 'wb') { |f| f.write(pdf) }
|
165
|
-
else
|
166
|
-
File.open(target, 'wb') { |f| f.write(body) }
|
167
|
-
end
|
168
|
-
files << target if File.exist?(target)
|
169
|
-
|
170
|
-
# Save attachments
|
171
|
-
# ----------------
|
172
|
-
outdir = File.join(outdir, "#{File.basename(target)}.attachments")
|
173
|
-
digits = ((attachments.count + 1) / 10) + 1
|
174
|
-
i = 1
|
175
|
-
attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
|
176
|
-
prefix = "#{format('%0*d', digits, i)}-"
|
177
|
-
if (sub_msg = a.instance_variable_get(:@embedded_msg))
|
178
|
-
subject = a.properties[:display_name] || sub_msg.subject || ''
|
179
|
-
file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
|
180
|
-
result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
|
181
|
-
if (e = result[:error])
|
182
|
-
raise e
|
183
|
-
end
|
184
|
-
|
185
|
-
files += result[:files]
|
186
|
-
elsif a.filename
|
187
|
-
next if used_files.include?(a.filename)
|
188
|
-
|
189
|
-
file = File.join(outdir, "#{prefix}#{a.filename}")
|
190
|
-
FileUtils.mkdir_p(File.dirname(file))
|
191
|
-
File.open(file, 'wb') { |f| a.save(f) }
|
192
|
-
files << file
|
193
|
-
else
|
194
|
-
@warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
|
195
|
-
next
|
196
|
-
end
|
197
|
-
i += 1
|
198
|
-
end
|
199
|
-
|
200
|
-
if root_msg
|
201
|
-
p = Pathname(File.dirname(files.first))
|
202
|
-
files[1..].each do |f|
|
203
|
-
(headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
{
|
208
|
-
command: { status: 0 },
|
209
|
-
files:,
|
210
|
-
headers:,
|
211
|
-
warnings: @warnings
|
212
|
-
}
|
213
|
-
rescue Exception => e
|
214
|
-
raise unless root_msg
|
215
|
-
|
216
|
-
msg.close
|
217
|
-
{
|
218
|
-
command: { status: -1 },
|
219
|
-
files: [],
|
220
|
-
headers: {},
|
221
|
-
errors: [
|
222
|
-
{
|
223
|
-
error: e.message,
|
224
|
-
error_class: e.class.name,
|
225
|
-
error_trace: e.backtrace
|
226
|
-
}
|
227
|
-
],
|
228
|
-
warnings: @warnings
|
229
|
-
}
|
230
|
-
end
|
231
|
-
|
232
|
-
protected
|
233
|
-
|
234
|
-
def eml_to_html; end
|
235
|
-
|
236
|
-
private
|
237
|
-
|
238
|
-
def find_hdr(list, key)
|
239
|
-
keys = list.keys
|
240
|
-
if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
|
241
|
-
v = list[k]
|
242
|
-
v = v.first if v.is_a? Array
|
243
|
-
v = Rfc2047.decode(v).strip if v.is_a? String
|
244
|
-
return v
|
245
|
-
end
|
246
|
-
nil
|
247
|
-
end
|
248
|
-
|
249
|
-
def hdr_html(key, value)
|
250
|
-
return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
|
251
|
-
|
252
|
-
''
|
253
|
-
end
|
254
|
-
|
255
|
-
def get_attachment_data(attachments, cid)
|
256
|
-
attachments.each do |attachment|
|
257
|
-
next unless attachment.properties.attach_content_id == cid
|
258
|
-
|
259
|
-
attachment.data.rewind
|
260
|
-
return {
|
261
|
-
mime_type: attachment.properties.attach_mime_tag,
|
262
|
-
base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
|
263
|
-
}
|
264
|
-
end
|
265
|
-
nil
|
266
|
-
end
|
267
|
-
end
|
268
|
-
end
|
269
|
-
end
|
270
|
-
end
|