libis-format 1.0.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/Gemfile +2 -0
  4. data/data/AdobeRGB1998.icc +0 -0
  5. data/data/PDFA_def.ps +3 -3
  6. data/lib/libis/format/config.rb +1 -1
  7. data/lib/libis/format/converter/audio_converter.rb +6 -8
  8. data/lib/libis/format/converter/chain.rb +16 -4
  9. data/lib/libis/format/converter/email_converter.rb +2 -4
  10. data/lib/libis/format/converter/fop_pdf_converter.rb +0 -2
  11. data/lib/libis/format/converter/image_converter.rb +5 -3
  12. data/lib/libis/format/converter/jp2_converter.rb +3 -3
  13. data/lib/libis/format/converter/office_converter.rb +1 -3
  14. data/lib/libis/format/converter/pdf_converter.rb +13 -4
  15. data/lib/libis/format/converter/spreadsheet_converter.rb +1 -3
  16. data/lib/libis/format/converter/video_converter.rb +5 -2
  17. data/lib/libis/format/converter/xslt_converter.rb +15 -14
  18. data/lib/libis/format/tool/email_to_pdf.rb +52 -17
  19. data/lib/libis/format/tool/{ffmpeg.rb → ff_mpeg.rb} +10 -1
  20. data/lib/libis/format/tool/fop_pdf.rb +12 -0
  21. data/lib/libis/format/tool/office_to_pdf.rb +10 -1
  22. data/lib/libis/format/tool/pdf_copy.rb +11 -1
  23. data/lib/libis/format/tool/pdf_merge.rb +11 -1
  24. data/lib/libis/format/tool/pdf_optimizer.rb +11 -2
  25. data/lib/libis/format/tool/pdf_split.rb +11 -1
  26. data/lib/libis/format/tool/pdf_to_pdfa.rb +59 -43
  27. data/lib/libis/format/tool/pdfa_validator.rb +28 -35
  28. data/lib/libis/format/tool/spreadsheet_to_ods.rb +10 -1
  29. data/lib/libis/format/tool.rb +1 -1
  30. data/lib/libis/format/version.rb +1 -1
  31. data/libis-format.gemspec +2 -0
  32. data/tools/emailconverter.jar +0 -0
  33. data/tools/pdf2pdfa +395 -0
  34. metadata +20 -4
  35. data/data/eciRGB_v2.icc +0 -0
data/tools/pdf2pdfa ADDED
@@ -0,0 +1,395 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # PDF2ARCHIVE 0.3.2
4
+ # (C) 2018 Matteo Seclì <secli.matteo@gmail.com>
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+
20
+ #=====# INITIALIZE VARIABLES #=====#
21
+ VERSION="0.3.2"
22
+ INPUT=""
23
+ OUTPUT=""
24
+ QUALITYOPTS=""
25
+ DEBUG=false
26
+ VALIDATE=false
27
+ MSGOPTS="-dQUIET -sstdout=/dev/null"
28
+ VERAMSGOPTS=""
29
+ #ERROPTS="2>/dev/null"
30
+
31
+
32
+ #=====# HELP FUNCTION #=====#
33
+ help()
34
+ {
35
+ TOTLEN="38" # Adjust this
36
+ TITLESTRING="PDF2ARCHIVE, version $VERSION"
37
+ SPACEL=$(echo "($TOTLEN-${#TITLESTRING})/2 + (36-${#TITLESTRING})%2 - 1" | bc)
38
+ SPACER=$(echo "($TOTLEN-${#TITLESTRING})/2 - 1" | bc)
39
+ TITLESTRING=$(printf "|%-${SPACEL}s%s%-${SPACER}s|" "" "$TITLESTRING" "")
40
+ DASHSTRING=$(eval printf "%.0s-" {1..$TOTLEN})
41
+ echo \
42
+ "$DASHSTRING
43
+ $TITLESTRING
44
+ $DASHSTRING
45
+
46
+ OVERVIEW:
47
+ A simple Ghostscript-based PDF to PDF/A-1B converter.
48
+
49
+ USAGE:
50
+ $0 [options] input.pdf [output.pdf]
51
+
52
+ EXAMPLES:
53
+ Convert 'input.pdf' in PDF/A-1B format; the output is 'input-PDFA.pdf':
54
+ $0 input.pdf
55
+ Convert 'input.pdf' in PDF/A-1B format; the output is 'output.pdf':
56
+ $0 input.pdf output.pdf
57
+ Convert 'input.pdf' in PDF/A-1B format and perform a high-quality compression:
58
+ $0 --quality=high input.pdf
59
+ Convert 'input.pdf' in PDF/A-1B format and specify the document title:
60
+ $0 --title=\"Title of your nice document\" input.pdf
61
+ Convert 'input.pdf' in PDF/A-1B format and validate the result:
62
+ $0 --validate input.pdf
63
+
64
+ OPTIONS:
65
+ -h, --help Show the help
66
+ --quality=<value> Set the quality of the output when downsampling. The
67
+ possible values are 'high', 'medium' and 'low', where
68
+ 'high' gives the highest output quality. By specifying no
69
+ option, no additional downsampling is done.
70
+ --title=<value> Title of the resulting PDF/A file
71
+ --author=<value> Author of the resulting PDF/A file
72
+ --subject=<value> Subject of the resulting PDF/A file
73
+ --keywords=<value> Comma-separated keywords of the resulting PDF/A file
74
+ --cleanmetadata Clean all the standard metadata fields, except the ones
75
+ specified via the command line options.
76
+ --validate Validate the resulting file. The validation is done with
77
+ VeraPDF, you need a working Java installation.
78
+ --validate-only Perform only the validation on the input file, again using
79
+ VeraPDF
80
+ --debug Write additional debug information on screen
81
+ -v, --version Show the program version
82
+
83
+ LICENSE:
84
+ GPLv3
85
+
86
+ AUTHORS:
87
+ (C) 2017-2018 Matteo Seclì"
88
+ }
89
+
90
+
91
+ #=====# RUN HELPER FUNCTION #=====#
92
+ run() {
93
+ if $DEBUG; then
94
+ #v=$(exec 2>&1 && set -x && set -- "$@")
95
+ #echo "#${v#*--}"
96
+ "$@"
97
+ else
98
+ "$@" 2>/dev/null #>/dev/null 2>&1
99
+ fi
100
+ }
101
+
102
+
103
+ #=====# CHECKS #=====#
104
+ if [ "$(which gs)" == "" ]; then
105
+ echo " ERROR: Ghostscript is not installed or it's not in the path"
106
+ exit
107
+ fi
108
+
109
+
110
+ #=====# VALIDATION #=====#
111
+
112
+ javaCheck() {
113
+ if [ "$(which java)" == "" ]; then
114
+ echo " ERROR: Java is not installed or it's not in the path"
115
+ echo " Cannot perform validation"
116
+ exit 1
117
+ fi
118
+ }
119
+
120
+ validate() {
121
+ echo " Validating..."
122
+ echo " $(./verapdf/verapdf "$1" --extract --flavour 1b --format text "$2")"
123
+ }
124
+
125
+
126
+ #=====# INPUT PARSER #=====#
127
+ if [ "$1" == "" ]; then
128
+ help
129
+ exit
130
+ fi
131
+ while [ "$1" != "" ]; do
132
+ PARAM=`echo $1 | awk -F= '{print $1}'`
133
+ VALUE=`echo $1 | awk -F= '{print $2}'`
134
+ case $PARAM in
135
+ -h | --help)
136
+ help
137
+ exit
138
+ ;;
139
+ -v | --version)
140
+ echo $VERSION
141
+ exit
142
+ ;;
143
+ --debug)
144
+ DEBUG=true
145
+ MSGOPTS=""
146
+ VERAMSGOPTS="--verbose"
147
+ #ERROPTS=""
148
+ ;;
149
+ --quality)
150
+ if [ "$VALUE" == "high" ]; then
151
+ QUALITYOPTS="-dPDFSETTINGS=/printer"
152
+ elif [ "$VALUE" == "medium" ]; then
153
+ QUALITYOPTS="-dPDFSETTINGS=/ebook"
154
+ elif [ "$VALUE" == "low" ]; then
155
+ QUALITYOPTS="-dPDFSETTINGS=/screen"
156
+ else
157
+ echo " ERROR: unknown quality option '$VALUE'"
158
+ help
159
+ exit 1
160
+ fi
161
+ ;;
162
+ --cleanmetadata)
163
+ [ -z ${PDFTITLE+x} ] && PDFTITLE=""
164
+ [ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=""
165
+ [ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=""
166
+ [ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=""
167
+ [ -z ${PDFCREATOR+x} ] && PDFCREATOR=""
168
+ [ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=""
169
+ [ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=""
170
+ [ -z ${PDFMODDATE+x} ] && PDFMODDATE=""
171
+ [ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=""
172
+ ;;
173
+ --title)
174
+ PDFTITLE=$VALUE
175
+ ;;
176
+ --author)
177
+ PDFAUTHOR=$VALUE
178
+ ;;
179
+ --subject)
180
+ PDFSUBJECT=$VALUE
181
+ ;;
182
+ --keywords)
183
+ PDFKEYWORDS=$VALUE
184
+ ;;
185
+ --validate)
186
+ javaCheck
187
+ VALIDATE=true
188
+ ;;
189
+ --validate-only)
190
+ javaCheck
191
+ validate $VERAMSGOPTS $2
192
+ exit
193
+ ;;
194
+ *.pdf)
195
+ if [ "$INPUT" == "" ]; then
196
+ INPUT=$PARAM
197
+ elif [ "$OUTPUT" == "" ]; then
198
+ OUTPUT=$PARAM
199
+ else
200
+ echo " ERROR: too many PDF files as input!"
201
+ help
202
+ exit 1
203
+ fi
204
+ ;;
205
+ *)
206
+ echo " ERROR: unknown parameter \"$PARAM\""
207
+ help
208
+ exit 1
209
+ ;;
210
+ esac
211
+ shift
212
+ done
213
+
214
+ #=====# SET UP ALL THE STUFF #=====#
215
+ echo "=== Welcome to PDF2ARCHIVE ==="
216
+ if [ "$OUTPUT" == "" ]; then
217
+ OUTPUT="${INPUT%.pdf}-PDFA.pdf"
218
+ fi
219
+ TMPFILE=$(mktemp)
220
+ TMPDIR=$(mktemp -d)
221
+ PSTMPFILE=$TMPDIR/PDFA_def.ps
222
+ ICCTMPFILE=$TMPDIR/AdobeRGB1998.icc
223
+ INFOTMPFILE=$TMPDIR/pdf_minimal_info.ps
224
+ echo \
225
+ "%!PS
226
+ % Extract PDF info in a minimal way.
227
+ % Inspired by 'toolbin/pdf_info.ps'.
228
+
229
+ /QUIET true def
230
+ File dup (r) file runpdfbegin
231
+ Trailer /Info knownoget {
232
+ dup /Title knownoget { (__knowninfoTitle: ) print = flush } if
233
+ dup /Author knownoget { (__knowninfoAuthor: ) print = flush } if
234
+ dup /Subject knownoget { (__knowninfoSubject: ) print = flush } if
235
+ dup /Keywords knownoget { (__knowninfoKeywords: ) print = flush } if
236
+ dup /Creator knownoget { (__knowninfoCreator: ) print = flush } if
237
+ dup /Producer knownoget { (__knowninfoProducer: ) print = flush } if
238
+ dup /CreationDate knownoget { (__knowninfoCreationDate: ) print = flush } if
239
+ dup /ModDate knownoget { (__knowninfoModDate: ) print = flush } if
240
+ dup /Trapped knownoget { (__knowninfoTrapped: ) print = flush } if
241
+ } if
242
+ quit
243
+ " > $INFOTMPFILE
244
+
245
+
246
+ #=====# PRESERVE UNSPECIFIED KNOWN STANDARD METADATA #=====#
247
+ # Notes:
248
+ # 'iconv' is necessary to filter out all the invalid bytes.
249
+ # If it's not used, sed (unless it's GNU sed) will fail with
250
+ # 'RE error: illegal byte sequence'. A solution to this is to
251
+ # use 'LC_CTYPE=C && LANG=C && echo "$METADUMP" ...' in the
252
+ # variable assignments; however, this produces bad PDF files.
253
+ #
254
+ METADUMP=$(gs -dNOSAFER -dNODISPLAY -q -sFile="$INPUT" $INFOTMPFILE | iconv -f utf-8 -t utf-8 -c)
255
+ [ -z ${PDFTITLE+x} ] && PDFTITLE=$(echo "$METADUMP" | grep "__knowninfoTitle: " | sed "s/^__knowninfoTitle: //g")
256
+ [ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=$(echo "$METADUMP" | grep "__knowninfoAuthor: " | sed "s/^__knowninfoAuthor: //g")
257
+ [ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=$(echo "$METADUMP" | grep "__knowninfoSubject: " | sed "s/^__knowninfoSubject: //g")
258
+ [ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=$(echo "$METADUMP" | grep "__knowninfoKeywords: " | sed "s/^__knowninfoKeywords: //g")
259
+ [ -z ${PDFCREATOR+x} ] && PDFCREATOR=$(echo "$METADUMP" | grep "__knowninfoCreator: " | sed "s/^__knowninfoCreator: //g")
260
+ [ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=$(echo "$METADUMP" | grep "__knowninfoProducer: " | sed "s/^__knowninfoProducer: //g")
261
+ [ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=$(echo "$METADUMP" | grep "__knowninfoCreationDate: " | sed "s/^__knowninfoCreationDate: //g")
262
+ [ -z ${PDFMODDATE+x} ] && PDFMODDATE=$(echo "$METADUMP" | grep "__knowninfoModDate: " | sed "s/^__knowninfoModDate: //g")
263
+ [ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=$(echo "$METADUMP" | grep "__knowninfoTrapped: " | sed "s/^__knowninfoTrapped: //g")
264
+ # Replace "Trapped" string, if not empty, with an operator. Fixes 3Heights.
265
+ if [ "$PDFTRAPPED" != "" ]; then
266
+ PDFTRAPPED="/$(tr '[:lower:]' '[:upper:]' <<< ${PDFTRAPPED:0:1})$(tr '[:upper:]' '[:lower:]' <<< ${PDFTRAPPED:1})"
267
+ fi
268
+ # Check if the operator is allowed, otherwise empty variable.
269
+ if [ "$PDFTRAPPED" != "/True" ] && [ "$PDFTRAPPED" != "/False" ]; then
270
+ PDFTRAPPED=""
271
+ fi
272
+
273
+
274
+ #=====# PRINT DEBUG INFO #=====#
275
+ if $DEBUG; then
276
+ echo " DEBUG: running PDF2ARCHIVE, version $VERSION"
277
+ echo " DEBUG: using Ghostscript binary at $(which gs), version $(gs --version)"
278
+ echo " DEBUG: the input file is '$INPUT'"
279
+ echo " DEBUG: the output file is '$OUTPUT'"
280
+ echo " DEBUG: the intermediate processing file is $TMPFILE"
281
+ echo " DEBUG: the temporary directory is $TMPDIR"
282
+ echo " DEBUG: the current quality options are '$QUALITYOPTS'"
283
+ echo " DEBUG: PDF title '$PDFTITLE'"
284
+ echo " DEBUG: PDF author '$PDFAUTHOR'"
285
+ echo " DEBUG: PDF subject '$PDFSUBJECT'"
286
+ echo " DEBUG: PDF keywords '$PDFKEYWORDS'"
287
+ echo " DEBUG: PDF creator '$PDFCREATOR'"
288
+ echo " DEBUG: PDF producer '$PDFPRODUCER'"
289
+ echo " DEBUG: PDF creation date '$PDFCREATIONDATE'"
290
+ echo " DEBUG: PDF modification date '$PDFMODDATE'"
291
+ echo " DEBUG: PDF trapping '$PDFTRAPPED'"
292
+ fi
293
+
294
+
295
+ #=====# CREATE THE PS DEFINITION FILE #=====#
296
+ echo " Creating the definition file..."
297
+ echo \
298
+ "%!
299
+ % This prefix file for creating a PDF/A document is derived from
300
+ % the sample included with Ghostscript 9.07, released under the
301
+ % GNU Affero General Public License.
302
+ % Modified 4/15/2013 by MCB Systems.
303
+
304
+ % Feel free to modify entries marked with \"Customize\".
305
+
306
+ % This assumes an ICC profile to reside in the file (AdobeRGB1998.icc),
307
+ % unless the user modifies the corresponding line below.
308
+
309
+ % The color space described by the ICC profile must correspond to the
310
+ % ProcessColorModel specified when using this prefix file (GRAY with
311
+ % DeviceGray, RGB with DeviceRGB, and CMYK with DeviceCMYK).
312
+
313
+ % Define entries in the document Info dictionary :
314
+
315
+ /ICCProfile ($ICCTMPFILE) % Customize.
316
+ def
317
+
318
+ [ /Title ($PDFTITLE) % Customize." > $PSTMPFILE
319
+ if [ "$PDFAUTHOR" != "" ]; then
320
+ echo " /Author ($PDFAUTHOR)" >> $PSTMPFILE
321
+ fi
322
+ if [ "$PDFSUBJECT" != "" ]; then
323
+ echo " /Subject ($PDFSUBJECT)" >> $PSTMPFILE
324
+ fi
325
+ if [ "$PDFKEYWORDS" != "" ]; then
326
+ echo " /Keywords ($PDFKEYWORDS)" >> $PSTMPFILE
327
+ fi
328
+ if [ "$PDFCREATOR" != "" ]; then
329
+ echo " /Creator ($PDFCREATOR)" >> $PSTMPFILE
330
+ fi
331
+ echo \
332
+ "% /Producer % Reserved to GS
333
+ % /CreationDate % Reserved to GS
334
+ % /ModDate % Reserved to GS" >> $PSTMPFILE
335
+ if [ "$PDFTRAPPED" != "" ]; then
336
+ echo " /Trapped $PDFTRAPPED" >> $PSTMPFILE
337
+ fi
338
+ echo \
339
+ " /DOCINFO pdfmark
340
+
341
+ % Define an ICC profile :
342
+
343
+ [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
344
+ [{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {systemdict /ProcessColorModel get /DeviceRGB eq {3} {4} ifelse} ifelse >> /PUT pdfmark
345
+ [{icc_PDFA} ICCProfile (r) file /PUT pdfmark
346
+
347
+ % Define the output intent dictionary :
348
+
349
+ [/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
350
+ [{OutputIntent_PDFA} <<
351
+ /Type /OutputIntent % Must be so (the standard requires).
352
+ /S /GTS_PDFA1 % Must be so (the standard requires).
353
+ /DestOutputProfile {icc_PDFA} % Must be so (see above).
354
+ /OutputConditionIdentifier (AdobeRGB1998) % Customize
355
+ >> /PUT pdfmark
356
+ [{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
357
+ " >> $PSTMPFILE
358
+
359
+
360
+ #=====# CREATE THE COLOR PROFILE FILE #=====#
361
+ echo -n -e "\\x00\\x00\\x02\\x30\\x41\\x44\\x42\\x45\\x02\\x10\\x00\\x00\\x6d\\x6e\\x74\\x72\\x52\\x47\\x42\\x20\\x58\\x59\\x5a\\x20\\x07\\xd0\\x00\\x08\\x00\\x0b\\x00\\x13\\x00\\x33\\x00\\x3b\\x61\\x63\\x73\\x70\\x41\\x50\\x50\\x4c\\x00\\x00\\x00\\x00\\x6e\\x6f\\x6e\\x65\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3\\x2d\\x41\\x44\\x42\\x45\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0a\\x63\\x70\\x72\\x74\\x00\\x00\\x00\\xfc\\x00\\x00\\x00\\x32\\x64\\x65\\x73\\x63\\x00\\x00\\x01\\x30\\x00\\x00\\x00\\x6b\\x77\\x74\\x70\\x74\\x00\\x00\\x01\\x9c\\x00\\x00\\x00\\x14\\x62\\x6b\\x70\\x74\\x00\\x00\\x01\\xb0\\x00\\x00\\x00\\x14\\x72\\x54\\x52\\x43\\x00\\x00\\x01\\xc4\\x00\\x00\\x00\\x0e\\x67\\x54\\x52\\x43\\x00\\x00\\x01\\xd4\\x00\\x00\\x00\\x0e\\x62\\x54\\x52\\x43\\x00\\x00\\x01\\xe4\\x00\\x00\\x00\\x0e\\x72\\x58\\x59\\x5a\\x00\\x00\\x01\\xf4\\x00\\x00\\x00\\x14\\x67\\x58\\x59\\x5a\\x00\\x00\\x02\\x08\\x00\\x00\\x00\\x14\\x62\\x58\\x59\\x5a\\x00\\x00\\x02\\x1c\\x00\\x00\\x00\\x14\\x74\\x65\\x78\\x74\\x00\\x00\\x00\\x00\\x43\\x6f\\x70\\x79\\x72\\x69\\x67\\x68\\x74\\x20\\x32\\x30\\x30\\x30\\x20\\x41\\x64\\x6f\\x62\\x65\\x20\\x53\\x79\\x73\\x74\\x65\\x6d\\x73\\x20\\x49\\x6e\\x63\\x6f\\x72\\x70\\x6f\\x72\\x61\\x74\\x65\\x64\\x00\\x00\\x00\\x64\\x65\\x73\\x63\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x11\\x41\\x64\\x6f\\x62\\x65\\x20\\x52\\x47\\x42\\x20\\x28\\x31\\x39\\x39\\x38\\x29\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\xf3\\x51\\x00\\x01\\x00\\x00\\x00\\x01\\x16\\xcc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x9c\\x18\\x00\\x00\\x4f\\xa5\\x00\\x00\\x04\\xfc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x34\\x8d\\x00\\x00\\xa0\\x2c\\x00\\x00\\x0f\\x95\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x26\\x31\\x00\\x00\\x10\\x2f\\x00\\x00\\xbe\\x9c" > $ICCTMPFILE
362
+
363
+
364
+ #=====# DO THE ACTUAL CONVERSION #=====#
365
+ echo " Compressing PDF & embedding fonts..."
366
+ run gs $MSGOPTS \
367
+ -dBATCH -dNOPAUSE -dNOOUTERSAVE \
368
+ -dCompatibilityLevel=1.4 \
369
+ -dEmbedAllFonts=true -dSubsetFonts=true \
370
+ -dCompressFonts=true -dCompressPages=true \
371
+ -sColorConversionStrategy=RGB \
372
+ -dDownsampleMonoImages=false -dDownsampleGrayImages=false -dDownsampleColorImages=false \
373
+ -dAutoFilterColorImages=false -dAutoFilterGrayImages=false \
374
+ -sDEVICE=pdfwrite \
375
+ -sOutputFile="$TMPFILE" "$INPUT"
376
+ echo " Converting to PDF/A-1B..."
377
+ run gs $MSGOPTS \
378
+ -dPDFA=1 -dBATCH -dNOPAUSE -dNOOUTERSAVE \
379
+ $QUALITYOPTS \
380
+ -dCompatibilityLevel=1.4 -dPDFACompatibilityPolicy=1 \
381
+ -sProcessColorModel=DeviceRGB -sColorConversionStrategy=RGB \
382
+ -sOutputICCProfile=$ICCTMPFILE \
383
+ -sDEVICE=pdfwrite \
384
+ -sOutputFile="$OUTPUT" "$TMPFILE" $PSTMPFILE
385
+ echo " Removing temporary files..."
386
+ rm $TMPFILE
387
+ echo " Done, now ESSE3 is happy! ;)"
388
+
389
+
390
+ #=====# VALIDATE THE RESULT #=====#
391
+ if $VALIDATE; then
392
+ validate $VERAMSGOPTS "$OUTPUT"
393
+ else
394
+ echo " Suggestion: validate the resulting PDF to be sure it's PDF/A-1B compliant."
395
+ fi
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libis-format
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.8
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kris Dekeyser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-18 00:00:00.000000000 Z
11
+ date: 2023-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '2.1'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pdfinfo
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.4'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.4'
167
181
  description: Collection of tools and classes that help to identify formats of binary
168
182
  files and create derivative copies (e.g. PDF from Word).
169
183
  email:
@@ -193,10 +207,10 @@ files:
193
207
  - bin/formatinfo
194
208
  - bin/libis_format
195
209
  - bin/pdf_copy
210
+ - data/AdobeRGB1998.icc
196
211
  - data/ISOcoated_v2_eci.icc
197
212
  - data/PDFA_def.ps
198
213
  - data/ead.xsd
199
- - data/eciRGB_v2.icc
200
214
  - data/lias_formats.xml
201
215
  - data/types.yml
202
216
  - data/xlink.xsd
@@ -228,7 +242,7 @@ files:
228
242
  - lib/libis/format/tool/droid.rb
229
243
  - lib/libis/format/tool/email_to_pdf.rb
230
244
  - lib/libis/format/tool/extension_identification.rb
231
- - lib/libis/format/tool/ffmpeg.rb
245
+ - lib/libis/format/tool/ff_mpeg.rb
232
246
  - lib/libis/format/tool/fido.rb
233
247
  - lib/libis/format/tool/file_tool.rb
234
248
  - lib/libis/format/tool/fop_pdf.rb
@@ -248,6 +262,7 @@ files:
248
262
  - tools/PdfTool.jar
249
263
  - tools/bcpkix-jdk15on-1.49.jar
250
264
  - tools/bcprov-jdk15on-1.49.jar
265
+ - tools/emailconverter.jar
251
266
  - tools/fop/build/fop.jar
252
267
  - tools/fop/conf/fop.xconf
253
268
  - tools/fop/fop
@@ -298,6 +313,7 @@ files:
298
313
  - tools/fop/lib/xmlgraphics-commons-2.3.jar
299
314
  - tools/fop/lib/xmlgraphics-commons.LICENSE.txt
300
315
  - tools/fop/lib/xmlgraphics-commons.NOTICE.txt
316
+ - tools/pdf2pdfa
301
317
  - tools/pdfbox/pdfbox-app-2.0.13.jar
302
318
  - tools/pdfbox/preflight-app-2.0.13.jar
303
319
  homepage: ''
data/data/eciRGB_v2.icc DELETED
Binary file