libis-format 1.3.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +40 -153
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +48 -0
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +47 -0
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +85 -125
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
metadata
CHANGED
@@ -1,101 +1,87 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libis-format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Dekeyser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 0.0.2
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 0.0.2
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: deep_dive
|
14
|
+
name: rake
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
30
16
|
requirements:
|
31
17
|
- - "~>"
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
34
|
-
type: :
|
19
|
+
version: '10.3'
|
20
|
+
type: :development
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
24
|
- - "~>"
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
26
|
+
version: '10.3'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
28
|
+
name: rspec
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
31
|
- - "~>"
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
48
|
-
type: :
|
33
|
+
version: '3.1'
|
34
|
+
type: :development
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
40
|
+
version: '3.1'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: awesome_print
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
|
-
- - "
|
45
|
+
- - ">="
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
62
|
-
type: :
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- - "
|
52
|
+
- - ">="
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
54
|
+
version: '0'
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: equivalent-xml
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
59
|
- - "~>"
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
76
|
-
type: :
|
61
|
+
version: '0.5'
|
62
|
+
type: :development
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
66
|
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
68
|
+
version: '0.5'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
70
|
+
name: nokogiri
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
|
-
- - "
|
73
|
+
- - ">="
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
90
|
-
type: :
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
91
77
|
prerelease: false
|
92
78
|
version_requirements: !ruby/object:Gem::Requirement
|
93
79
|
requirements:
|
94
|
-
- - "
|
80
|
+
- - ">="
|
95
81
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
82
|
+
version: '0'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
84
|
+
name: libis-tools
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
100
86
|
requirements:
|
101
87
|
- - "~>"
|
@@ -112,142 +98,109 @@ dependencies:
|
|
112
98
|
name: os
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - '='
|
116
102
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
103
|
+
version: 0.9.6
|
118
104
|
type: :runtime
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
107
|
requirements:
|
122
|
-
- -
|
108
|
+
- - '='
|
123
109
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
110
|
+
version: 0.9.6
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
112
|
+
name: mini_magick
|
127
113
|
requirement: !ruby/object:Gem::Requirement
|
128
114
|
requirements:
|
129
115
|
- - "~>"
|
130
116
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
117
|
+
version: '4.3'
|
132
118
|
type: :runtime
|
133
119
|
prerelease: false
|
134
120
|
version_requirements: !ruby/object:Gem::Requirement
|
135
121
|
requirements:
|
136
122
|
- - "~>"
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
124
|
+
version: '4.3'
|
139
125
|
- !ruby/object:Gem::Dependency
|
140
|
-
name:
|
126
|
+
name: deep_dive
|
141
127
|
requirement: !ruby/object:Gem::Requirement
|
142
128
|
requirements:
|
143
129
|
- - "~>"
|
144
130
|
- !ruby/object:Gem::Version
|
145
|
-
version: '0.
|
131
|
+
version: '0.3'
|
146
132
|
type: :runtime
|
147
133
|
prerelease: false
|
148
134
|
version_requirements: !ruby/object:Gem::Requirement
|
149
135
|
requirements:
|
150
136
|
- - "~>"
|
151
137
|
- !ruby/object:Gem::Version
|
152
|
-
version: '0.
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: awesome_print
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - ">="
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: '0'
|
160
|
-
type: :development
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - ">="
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: '0'
|
167
|
-
- !ruby/object:Gem::Dependency
|
168
|
-
name: equivalent-xml
|
169
|
-
requirement: !ruby/object:Gem::Requirement
|
170
|
-
requirements:
|
171
|
-
- - ">="
|
172
|
-
- !ruby/object:Gem::Version
|
173
|
-
version: '0'
|
174
|
-
type: :development
|
175
|
-
prerelease: false
|
176
|
-
version_requirements: !ruby/object:Gem::Requirement
|
177
|
-
requirements:
|
178
|
-
- - ">="
|
179
|
-
- !ruby/object:Gem::Version
|
180
|
-
version: '0'
|
181
|
-
- !ruby/object:Gem::Dependency
|
182
|
-
name: rake
|
183
|
-
requirement: !ruby/object:Gem::Requirement
|
184
|
-
requirements:
|
185
|
-
- - ">="
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
188
|
-
type: :development
|
189
|
-
prerelease: false
|
190
|
-
version_requirements: !ruby/object:Gem::Requirement
|
191
|
-
requirements:
|
192
|
-
- - ">="
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
138
|
+
version: '0.3'
|
195
139
|
- !ruby/object:Gem::Dependency
|
196
|
-
name:
|
140
|
+
name: chromaprint
|
197
141
|
requirement: !ruby/object:Gem::Requirement
|
198
142
|
requirements:
|
199
|
-
- - "
|
143
|
+
- - "~>"
|
200
144
|
- !ruby/object:Gem::Version
|
201
|
-
version:
|
202
|
-
type: :
|
145
|
+
version: 0.0.2
|
146
|
+
type: :runtime
|
203
147
|
prerelease: false
|
204
148
|
version_requirements: !ruby/object:Gem::Requirement
|
205
149
|
requirements:
|
206
|
-
- - "
|
150
|
+
- - "~>"
|
207
151
|
- !ruby/object:Gem::Version
|
208
|
-
version:
|
152
|
+
version: 0.0.2
|
209
153
|
- !ruby/object:Gem::Dependency
|
210
|
-
name:
|
154
|
+
name: naturally
|
211
155
|
requirement: !ruby/object:Gem::Requirement
|
212
156
|
requirements:
|
213
|
-
- - "
|
157
|
+
- - "~>"
|
214
158
|
- !ruby/object:Gem::Version
|
215
|
-
version: '
|
216
|
-
type: :
|
159
|
+
version: '2.1'
|
160
|
+
type: :runtime
|
217
161
|
prerelease: false
|
218
162
|
version_requirements: !ruby/object:Gem::Requirement
|
219
163
|
requirements:
|
220
|
-
- - "
|
164
|
+
- - "~>"
|
221
165
|
- !ruby/object:Gem::Version
|
222
|
-
version: '
|
223
|
-
description: Collection of tools and classes that help to identify
|
224
|
-
create derivative copies.
|
166
|
+
version: '2.1'
|
167
|
+
description: Collection of tools and classes that help to identify formats of binary
|
168
|
+
files and create derivative copies (e.g. PDF from Word).
|
225
169
|
email:
|
226
170
|
- kris.dekeyser@libis.be
|
227
171
|
executables:
|
228
|
-
-
|
229
|
-
-
|
172
|
+
- droid
|
173
|
+
- fido
|
230
174
|
- formatinfo
|
231
175
|
- libis_format
|
232
|
-
-
|
176
|
+
- pdf_copy
|
233
177
|
extensions: []
|
234
178
|
extra_rdoc_files: []
|
235
179
|
files:
|
180
|
+
- ".coveralls.yml"
|
181
|
+
- ".gitignore"
|
182
|
+
- ".travis.yml"
|
236
183
|
- Gemfile
|
237
184
|
- LICENSE.txt
|
238
185
|
- README.md
|
239
|
-
-
|
240
|
-
-
|
186
|
+
- Rakefile
|
187
|
+
- base/Dockerfile
|
188
|
+
- base/Dockerfile.alpine
|
189
|
+
- base/Dockerfile.rvm
|
190
|
+
- base/rework_path
|
191
|
+
- bin/droid
|
192
|
+
- bin/fido
|
241
193
|
- bin/formatinfo
|
242
194
|
- bin/libis_format
|
243
|
-
- bin/
|
244
|
-
- data/AdobeRGB1998.icc
|
195
|
+
- bin/pdf_copy
|
245
196
|
- data/ISOcoated_v2_eci.icc
|
246
197
|
- data/PDFA_def.ps
|
247
198
|
- data/ead.xsd
|
199
|
+
- data/eciRGB_v2.icc
|
248
200
|
- data/lias_formats.xml
|
249
201
|
- data/types.yml
|
250
202
|
- data/xlink.xsd
|
203
|
+
- docker_cfg.yml
|
251
204
|
- lib/libis-format.rb
|
252
205
|
- lib/libis/format.rb
|
253
206
|
- lib/libis/format/cli/convert.rb
|
@@ -260,35 +213,43 @@ files:
|
|
260
213
|
- lib/libis/format/converter/audio_converter.rb
|
261
214
|
- lib/libis/format/converter/base.rb
|
262
215
|
- lib/libis/format/converter/chain.rb
|
263
|
-
- lib/libis/format/converter/email_converter.rb
|
264
216
|
- lib/libis/format/converter/fop_pdf_converter.rb
|
217
|
+
- lib/libis/format/converter/image_assembler.rb
|
265
218
|
- lib/libis/format/converter/image_converter.rb
|
219
|
+
- lib/libis/format/converter/image_splitter.rb
|
220
|
+
- lib/libis/format/converter/image_watermarker.rb
|
266
221
|
- lib/libis/format/converter/jp2_converter.rb
|
267
222
|
- lib/libis/format/converter/office_converter.rb
|
223
|
+
- lib/libis/format/converter/pdf_assembler.rb
|
268
224
|
- lib/libis/format/converter/pdf_converter.rb
|
225
|
+
- lib/libis/format/converter/pdf_optimizer.rb
|
226
|
+
- lib/libis/format/converter/pdf_splitter.rb
|
227
|
+
- lib/libis/format/converter/pdf_watermarker.rb
|
269
228
|
- lib/libis/format/converter/repository.rb
|
270
229
|
- lib/libis/format/converter/spreadsheet_converter.rb
|
271
230
|
- lib/libis/format/converter/video_converter.rb
|
272
231
|
- lib/libis/format/converter/xslt_converter.rb
|
273
232
|
- lib/libis/format/identifier.rb
|
233
|
+
- lib/libis/format/info.rb
|
234
|
+
- lib/libis/format/library.rb
|
274
235
|
- lib/libis/format/tool.rb
|
275
236
|
- lib/libis/format/tool/droid.rb
|
276
237
|
- lib/libis/format/tool/extension_identification.rb
|
277
|
-
- lib/libis/format/tool/
|
238
|
+
- lib/libis/format/tool/ffmpeg.rb
|
278
239
|
- lib/libis/format/tool/fido.rb
|
279
240
|
- lib/libis/format/tool/file_tool.rb
|
280
241
|
- lib/libis/format/tool/fop_pdf.rb
|
281
242
|
- lib/libis/format/tool/identification_tool.rb
|
282
|
-
- lib/libis/format/tool/msg_to_pdf.rb
|
283
243
|
- lib/libis/format/tool/office_to_pdf.rb
|
244
|
+
- lib/libis/format/tool/pdf_copy.rb
|
245
|
+
- lib/libis/format/tool/pdf_merge.rb
|
284
246
|
- lib/libis/format/tool/pdf_optimizer.rb
|
247
|
+
- lib/libis/format/tool/pdf_split.rb
|
285
248
|
- lib/libis/format/tool/pdf_to_pdfa.rb
|
286
|
-
- lib/libis/format/tool/pdf_tool.rb
|
287
249
|
- lib/libis/format/tool/pdfa_validator.rb
|
288
250
|
- lib/libis/format/tool/spreadsheet_to_ods.rb
|
289
|
-
- lib/libis/format/type_database.rb
|
290
|
-
- lib/libis/format/type_database_impl.rb
|
291
251
|
- lib/libis/format/version.rb
|
252
|
+
- lib/libis/format/yaml_loader.rb
|
292
253
|
- libis-format.gemspec
|
293
254
|
- tools/PdfTool.jar
|
294
255
|
- tools/bcpkix-jdk15on-1.49.jar
|
@@ -343,9 +304,8 @@ files:
|
|
343
304
|
- tools/fop/lib/xmlgraphics-commons-2.3.jar
|
344
305
|
- tools/fop/lib/xmlgraphics-commons.LICENSE.txt
|
345
306
|
- tools/fop/lib/xmlgraphics-commons.NOTICE.txt
|
346
|
-
- tools/
|
347
|
-
- tools/pdfbox/
|
348
|
-
- tools/pdfbox/preflight-app-3.0.3.jar
|
307
|
+
- tools/pdfbox/pdfbox-app-2.0.13.jar
|
308
|
+
- tools/pdfbox/preflight-app-2.0.13.jar
|
349
309
|
homepage: ''
|
350
310
|
licenses:
|
351
311
|
- MIT
|
@@ -358,14 +318,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
358
318
|
requirements:
|
359
319
|
- - ">="
|
360
320
|
- !ruby/object:Gem::Version
|
361
|
-
version: '
|
321
|
+
version: '0'
|
362
322
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
363
323
|
requirements:
|
364
324
|
- - ">="
|
365
325
|
- !ruby/object:Gem::Version
|
366
326
|
version: '0'
|
367
327
|
requirements: []
|
368
|
-
rubygems_version: 3.
|
328
|
+
rubygems_version: 3.0.8
|
369
329
|
signing_key:
|
370
330
|
specification_version: 4
|
371
331
|
summary: LIBIS File format format services.
|
data/data/AdobeRGB1998.icc
DELETED
Binary file
|
@@ -1,35 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'base'
|
4
|
-
|
5
|
-
require 'libis/format/tool/msg_to_pdf'
|
6
|
-
require 'libis/format/type_database'
|
7
|
-
require 'rexml/document'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
module Converter
|
12
|
-
class EmailConverter < Libis::Format::Converter::Base
|
13
|
-
def self.input_types
|
14
|
-
[:MSG]
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.output_types(format = nil)
|
18
|
-
return [] unless input_types.include?(format)
|
19
|
-
|
20
|
-
[:PDF]
|
21
|
-
end
|
22
|
-
|
23
|
-
def email_convert(_)
|
24
|
-
# force usage of this converter
|
25
|
-
end
|
26
|
-
|
27
|
-
def convert(source, target, format, opts = {})
|
28
|
-
super
|
29
|
-
|
30
|
-
Format::Tool::MsgToPdf.run(source, target)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,270 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'mapi/msg'
|
4
|
-
require 'rfc_2047'
|
5
|
-
require 'cgi'
|
6
|
-
require 'pdfkit'
|
7
|
-
require 'time'
|
8
|
-
require 'fileutils'
|
9
|
-
require 'pathname'
|
10
|
-
require 'libis/format/config'
|
11
|
-
|
12
|
-
module Libis
|
13
|
-
module Format
|
14
|
-
module Tool
|
15
|
-
class MsgToPdf
|
16
|
-
include ::Libis::Tools::Logger
|
17
|
-
|
18
|
-
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
|
19
|
-
HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
|
20
|
-
HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
|
21
|
-
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
|
22
|
-
|
23
|
-
IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
|
24
|
-
IMG_CID_HTML_REGEX = /cid:([^"]*)/m
|
25
|
-
|
26
|
-
def self.installed?
|
27
|
-
File.exist?(Libis::Format::Config[:wkhtmltopdf])
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.run(source, target, **options)
|
31
|
-
new.run source, target, **options
|
32
|
-
end
|
33
|
-
|
34
|
-
def run(source, target, **options)
|
35
|
-
# Preliminary checks
|
36
|
-
# ------------------
|
37
|
-
|
38
|
-
@warnings = []
|
39
|
-
|
40
|
-
# Check if source file exists
|
41
|
-
raise "File #{source} does not exist" unless File.exist?(source)
|
42
|
-
|
43
|
-
# Retrieving the message
|
44
|
-
# ----------------------
|
45
|
-
|
46
|
-
# Open the message
|
47
|
-
msg = Mapi::Msg.open(source)
|
48
|
-
|
49
|
-
target_format = options.delete(:to_html) ? :HTML : :PDF
|
50
|
-
result = msg_to_pdf(msg, target, target_format, options)
|
51
|
-
msg.close
|
52
|
-
result
|
53
|
-
end
|
54
|
-
|
55
|
-
def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
|
56
|
-
# Make sure the target directory exists
|
57
|
-
outdir = File.dirname(target)
|
58
|
-
FileUtils.mkdir_p(outdir)
|
59
|
-
|
60
|
-
# Get the body of the message in HTML
|
61
|
-
body = msg.properties.body_html
|
62
|
-
|
63
|
-
# Embed plain body in HTML as a fallback
|
64
|
-
body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
|
65
|
-
|
66
|
-
# Check and fix the character encoding
|
67
|
-
begin
|
68
|
-
# Try to encode into UTF-8
|
69
|
-
body.encode!('UTF-8', universal_newline: true)
|
70
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
71
|
-
begin
|
72
|
-
# If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
|
73
|
-
body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
|
74
|
-
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
75
|
-
# If that fails too, log a warning and replace the invalid/unknown with a ? character.
|
76
|
-
@warnings << "#{e.class}: #{e.message}"
|
77
|
-
body.encode!('UTF-8', universal_newline: true, invalid: :replace, undef: :replace)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
# Process headers
|
82
|
-
# ---------------
|
83
|
-
headers = {}
|
84
|
-
hdr_html = ''
|
85
|
-
|
86
|
-
%w[From To Cc Subject Date].each do |key|
|
87
|
-
value = find_hdr(msg.headers, key)
|
88
|
-
if value
|
89
|
-
headers[key.downcase.to_sym] = value
|
90
|
-
hdr_html += hdr_html(key, value)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
[:date].each do |key|
|
95
|
-
next unless headers[key]
|
96
|
-
|
97
|
-
headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
|
98
|
-
end
|
99
|
-
|
100
|
-
# Add header section to the HTML body
|
101
|
-
unless hdr_html.empty?
|
102
|
-
# Insert header block styles
|
103
|
-
if body =~ %r{</head>}
|
104
|
-
# if head exists, append the style block
|
105
|
-
body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
|
106
|
-
elsif body =~ %r{<head/>}
|
107
|
-
# empty head, replace with the style block
|
108
|
-
body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
|
109
|
-
else
|
110
|
-
# otherwise insert a head section before the body tag
|
111
|
-
body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
|
112
|
-
end
|
113
|
-
# Add the headers html table as first element in the body section
|
114
|
-
body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
|
115
|
-
end
|
116
|
-
|
117
|
-
# Embed inline images
|
118
|
-
# -------------------
|
119
|
-
attachments = msg.attachments
|
120
|
-
used_files = []
|
121
|
-
|
122
|
-
# First process plaintext cid entries
|
123
|
-
body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
|
124
|
-
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
125
|
-
if data
|
126
|
-
used_files << ::Regexp.last_match(1)
|
127
|
-
"<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
|
128
|
-
else
|
129
|
-
'<img src=""/>'
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
# Then process HTML img tags with CID entries
|
134
|
-
body.gsub!(IMG_CID_HTML_REGEX) do |_match|
|
135
|
-
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
136
|
-
if data
|
137
|
-
used_files << ::Regexp.last_match(1)
|
138
|
-
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
139
|
-
else
|
140
|
-
''
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
# Create PDF
|
145
|
-
# ----------
|
146
|
-
files = []
|
147
|
-
|
148
|
-
if target_format == :PDF
|
149
|
-
# PDF creation options
|
150
|
-
pdf_options = {
|
151
|
-
page_size: 'A4',
|
152
|
-
margin_top: '10mm',
|
153
|
-
margin_bottom: '10mm',
|
154
|
-
margin_left: '10mm',
|
155
|
-
margin_right: '10mm',
|
156
|
-
# image_quality: 100,
|
157
|
-
# viewport_size: '2480x3508',
|
158
|
-
dpi: 300
|
159
|
-
}.merge pdf_options
|
160
|
-
|
161
|
-
subject = find_hdr(msg.headers, 'Subject')
|
162
|
-
kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
|
163
|
-
pdf = kit.to_pdf
|
164
|
-
File.open(target, 'wb') { |f| f.write(pdf) }
|
165
|
-
else
|
166
|
-
File.open(target, 'wb') { |f| f.write(body) }
|
167
|
-
end
|
168
|
-
files << target if File.exist?(target)
|
169
|
-
|
170
|
-
# Save attachments
|
171
|
-
# ----------------
|
172
|
-
outdir = File.join(outdir, "#{File.basename(target)}.attachments")
|
173
|
-
digits = ((attachments.count + 1) / 10) + 1
|
174
|
-
i = 1
|
175
|
-
attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
|
176
|
-
prefix = "#{format('%0*d', digits, i)}-"
|
177
|
-
if (sub_msg = a.instance_variable_get(:@embedded_msg))
|
178
|
-
subject = a.properties[:display_name] || sub_msg.subject || ''
|
179
|
-
file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
|
180
|
-
result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
|
181
|
-
if (e = result[:error])
|
182
|
-
raise e
|
183
|
-
end
|
184
|
-
|
185
|
-
files += result[:files]
|
186
|
-
elsif a.filename
|
187
|
-
next if used_files.include?(a.filename)
|
188
|
-
|
189
|
-
file = File.join(outdir, "#{prefix}#{a.filename}")
|
190
|
-
FileUtils.mkdir_p(File.dirname(file))
|
191
|
-
File.open(file, 'wb') { |f| a.save(f) }
|
192
|
-
files << file
|
193
|
-
else
|
194
|
-
@warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
|
195
|
-
next
|
196
|
-
end
|
197
|
-
i += 1
|
198
|
-
end
|
199
|
-
|
200
|
-
if root_msg
|
201
|
-
p = Pathname(File.dirname(files.first))
|
202
|
-
files[1..].each do |f|
|
203
|
-
(headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
{
|
208
|
-
command: { status: 0 },
|
209
|
-
files:,
|
210
|
-
headers:,
|
211
|
-
warnings: @warnings
|
212
|
-
}
|
213
|
-
rescue Exception => e
|
214
|
-
raise unless root_msg
|
215
|
-
|
216
|
-
msg.close
|
217
|
-
{
|
218
|
-
command: { status: -1 },
|
219
|
-
files: [],
|
220
|
-
headers: {},
|
221
|
-
errors: [
|
222
|
-
{
|
223
|
-
error: e.message,
|
224
|
-
error_class: e.class.name,
|
225
|
-
error_trace: e.backtrace
|
226
|
-
}
|
227
|
-
],
|
228
|
-
warnings: @warnings
|
229
|
-
}
|
230
|
-
end
|
231
|
-
|
232
|
-
protected
|
233
|
-
|
234
|
-
def eml_to_html; end
|
235
|
-
|
236
|
-
private
|
237
|
-
|
238
|
-
def find_hdr(list, key)
|
239
|
-
keys = list.keys
|
240
|
-
if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
|
241
|
-
v = list[k]
|
242
|
-
v = v.first if v.is_a? Array
|
243
|
-
v = Rfc2047.decode(v).strip if v.is_a? String
|
244
|
-
return v
|
245
|
-
end
|
246
|
-
nil
|
247
|
-
end
|
248
|
-
|
249
|
-
def hdr_html(key, value)
|
250
|
-
return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
|
251
|
-
|
252
|
-
''
|
253
|
-
end
|
254
|
-
|
255
|
-
def get_attachment_data(attachments, cid)
|
256
|
-
attachments.each do |attachment|
|
257
|
-
next unless attachment.properties.attach_content_id == cid
|
258
|
-
|
259
|
-
attachment.data.rewind
|
260
|
-
return {
|
261
|
-
mime_type: attachment.properties.attach_mime_tag,
|
262
|
-
base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
|
263
|
-
}
|
264
|
-
end
|
265
|
-
nil
|
266
|
-
end
|
267
|
-
end
|
268
|
-
end
|
269
|
-
end
|
270
|
-
end
|