libis-format 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -10
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/data/PDFA_def.ps +3 -3
  13. data/data/eciRGB_v2.icc +0 -0
  14. data/data/types.yml +4 -17
  15. data/docker_cfg.yml +1 -0
  16. data/lib/libis/format/cli/convert.rb +4 -4
  17. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  18. data/lib/libis/format/command_line.rb +3 -2
  19. data/lib/libis/format/config.rb +22 -20
  20. data/lib/libis/format/converter/audio_converter.rb +31 -56
  21. data/lib/libis/format/converter/base.rb +36 -16
  22. data/lib/libis/format/converter/chain.rb +32 -52
  23. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  24. data/lib/libis/format/converter/image_assembler.rb +82 -0
  25. data/lib/libis/format/converter/image_converter.rb +40 -153
  26. data/lib/libis/format/converter/image_splitter.rb +80 -0
  27. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  28. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  29. data/lib/libis/format/converter/office_converter.rb +28 -22
  30. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  31. data/lib/libis/format/converter/pdf_converter.rb +50 -111
  32. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  33. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  34. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  35. data/lib/libis/format/converter/repository.rb +13 -7
  36. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  37. data/lib/libis/format/converter/video_converter.rb +58 -47
  38. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  39. data/lib/libis/format/converter.rb +1 -1
  40. data/lib/libis/format/identifier.rb +46 -44
  41. data/lib/libis/format/info.rb +27 -0
  42. data/lib/libis/format/library.rb +147 -0
  43. data/lib/libis/format/tool/droid.rb +30 -29
  44. data/lib/libis/format/tool/extension_identification.rb +26 -24
  45. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  46. data/lib/libis/format/tool/fido.rb +27 -22
  47. data/lib/libis/format/tool/file_tool.rb +24 -11
  48. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  49. data/lib/libis/format/tool/identification_tool.rb +40 -38
  50. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  51. data/lib/libis/format/tool/pdf_copy.rb +15 -24
  52. data/lib/libis/format/tool/pdf_merge.rb +14 -24
  53. data/lib/libis/format/tool/pdf_optimizer.rb +17 -24
  54. data/lib/libis/format/tool/pdf_split.rb +16 -25
  55. data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
  56. data/lib/libis/format/tool/pdfa_validator.rb +30 -25
  57. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  58. data/lib/libis/format/tool.rb +3 -4
  59. data/lib/libis/format/version.rb +1 -3
  60. data/lib/libis/format/yaml_loader.rb +71 -0
  61. data/lib/libis/format.rb +7 -5
  62. data/lib/libis-format.rb +0 -2
  63. data/libis-format.gemspec +18 -24
  64. metadata +78 -120
  65. data/data/AdobeRGB1998.icc +0 -0
  66. data/lib/libis/format/converter/email_converter.rb +0 -35
  67. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  68. data/lib/libis/format/type_database.rb +0 -156
  69. data/lib/libis/format/type_database_impl.rb +0 -153
  70. data/tools/pdf2pdfa +0 -395
  71. /data/bin/{droid_tool → droid} +0 -0
  72. /data/bin/{fido_tool → fido} +0 -0
@@ -1,156 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'yaml'
4
- require 'libis/tools/extend/hash'
5
-
6
- module Libis
7
- module Format
8
- # noinspection RubyClassVariableUsageInspection
9
- class TypeDatabase
10
- @implementation = Libis::Format::TypeDatabaseImpl.instance
11
-
12
- def self.implementation(impl)
13
- @implementation = impl
14
- end
15
-
16
- def self.enrich(info, map_keys = {})
17
- return {} unless info.is_a? Hash
18
-
19
- mapper = Hash.new { |hash, key| hash[key] = key }
20
- mapper.merge! map_keys
21
- unless (puid = info[mapper[:PUID]]).blank?
22
- info[mapper[:TYPE]] ||= begin
23
- puid_infos(puid).first[:TYPE]
24
- rescue StandardError
25
- nil
26
- end
27
- end
28
- unless (mime = info[mapper[:MIME]]).blank?
29
- info[mapper[:TYPE]] ||= begin
30
- mime_infos(mime).first[:TYPE]
31
- rescue StandardError
32
- nil
33
- end
34
- end
35
- unless (type_name = info[mapper[:TYPE]]).nil?
36
- mapper.each_key do |key|
37
- info[mapper[key]] = get(type_name, key) || info[mapper[key]]
38
- end
39
- info[mapper[:GROUP]] = type_group(type_name)
40
- end
41
- info
42
- end
43
-
44
- def self.normalize(info, map_keys = {})
45
- return {} unless info.is_a? Hash
46
-
47
- mapper = Hash.new { |hash, key| hash[key] = key }
48
- mapper.merge! map_keys
49
- unless (puid = info[mapper[:PUID]]).blank?
50
- info[mapper[:TYPE]] ||= begin
51
- puid_infos(puid).first[:TYPE]
52
- rescue StandardError
53
- nil
54
- end
55
- end
56
- unless (mime = info[mapper[:MIME]]).blank?
57
- info[mapper[:TYPE]] ||= begin
58
- mime_infos(mime).first[:TYPE]
59
- rescue StandardError
60
- nil
61
- end
62
- end
63
- unless (type_name = info[mapper[:TYPE]]).nil?
64
- info[mapper[:MIME]] = type_mimetypes(type_name).first if type_mimetypes(type_name).first
65
- info[mapper[:GROUP]] = type_group(type_name)
66
- end
67
- info
68
- end
69
-
70
- def self.get(type_name, key)
71
- case key
72
- when :MIME
73
- type_mimetypes(type_name).first
74
- when :PUID
75
- type_puids(type_name).first
76
- when :EXTENSION
77
- type_extentions(type_name).first
78
- else
79
- typeinfo(type_name)[key]
80
- end
81
- end
82
-
83
- def self.type_group(ftype)
84
- typeinfo(ftype)[:GROUP]
85
- end
86
-
87
- def self.type_mimetypes(ftype)
88
- typeinfo(ftype)[:MIME] || []
89
- end
90
-
91
- def self.type_puids(ftype)
92
- typeinfo(ftype)[:PUID] || []
93
- end
94
-
95
- def self.type_extentions(ftype)
96
- typeinfo(ftype)[:EXTENSIONS] || []
97
- end
98
-
99
- def self.typeinfo(ftype)
100
- @implementation.typeinfo(ftype)
101
- end
102
-
103
- def self.group_types(group)
104
- @implementation.group_types(group)
105
- end
106
-
107
- def self.puid_infos(puid)
108
- @implementation.puid_infos(puid)
109
- end
110
-
111
- def self.puid_types(puid)
112
- @implementation.puid_types(puid)
113
- end
114
-
115
- def self.puid_groups(puid)
116
- puid_types(puid).map(&method(:type_group))
117
- end
118
-
119
- def self.mime_infos(mime)
120
- @implementation.mime_infos(mime)
121
- end
122
-
123
- def self.mime_types(mime)
124
- @implementation.mime_types(mime)
125
- end
126
-
127
- def self.mime_groups(mime)
128
- mime_types(mime).map(&method(:type_group))
129
- end
130
-
131
- def self.ext_infos(ext)
132
- @implementation.ext_infos(ext)
133
- end
134
-
135
- def self.ext_types(ext)
136
- @implementation.ext_types(ext)
137
- end
138
-
139
- def self.puid_typeinfo(puid)
140
- @implementation.puid_typeinfo(puid)
141
- end
142
-
143
- def self.known_mime?(mime)
144
- @implementation.known_mime?(mime)
145
- end
146
-
147
- def self.groups
148
- @implementation.groups
149
- end
150
-
151
- def self.export_csv(filename, **options)
152
- @implementation.export_csv(filename, **options)
153
- end
154
- end
155
- end
156
- end
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'singleton'
4
- require 'yaml'
5
- require 'csv'
6
-
7
- require 'libis/tools/logger'
8
- require 'libis/tools/extend/hash'
9
- require 'libis/tools/extend/string'
10
- require 'libis/tools/extend/symbol'
11
-
12
- module Libis
13
- module Format
14
- class TypeDatabaseImpl
15
- include Singleton
16
- include ::Libis::Tools::Logger
17
-
18
- def typeinfo(ftype)
19
- @types[ftype.to_sym] || {}
20
- end
21
-
22
- def group_types(group)
23
- @types.select do |_, v|
24
- v[:GROUP] == group.to_sym
25
- end.keys
26
- end
27
-
28
- def puid_infos(puid)
29
- @types.select do |_, v|
30
- v[:PUID].include? puid
31
- rescue StandardError
32
- false
33
- end.values
34
- end
35
-
36
- def puid_types(puid)
37
- @types.select do |_, v|
38
- v[:PUID].include? puid
39
- rescue StandardError
40
- false
41
- end.keys
42
- end
43
-
44
- def mime_infos(mime)
45
- @types.select do |_, v|
46
- v[:MIME].include? mime
47
- rescue StandardError
48
- false
49
- end.values
50
- end
51
-
52
- def mime_types(mime)
53
- @types.select do |_, v|
54
- v[:MIME].include? mime
55
- rescue StandardError
56
- false
57
- end.keys
58
- end
59
-
60
- def ext_infos(ext)
61
- ext = ext.gsub(/^\./, '')
62
- @types.select do |_, v|
63
- v[:EXTENSIONS].include?(ext)
64
- rescue StandardError
65
- false
66
- end.values
67
- end
68
-
69
- def ext_types(ext)
70
- ext = ext.gsub(/^\./, '')
71
- @types.select do |_, v|
72
- v[:EXTENSIONS].include?(ext)
73
- rescue StandardError
74
- false
75
- end.keys
76
- end
77
-
78
- def puid_typeinfo(puid)
79
- @types.each do |_, v|
80
- return v if v[:PUID]&.include?(puid)
81
- end
82
- nil
83
- end
84
-
85
- def known_mime?(mime)
86
- @types.each do |_, v|
87
- return true if v[:MIME]&.include? mime
88
- end
89
- false
90
- end
91
-
92
- def groups
93
- @types.values.map(&:dig.call(:GROUP)).uniq
94
- end
95
-
96
- def export_csv(filename, **options)
97
- headers = @types.values.each_with_object(Set.new) { |v, s| v.each_key { |k| s << k.to_s } }
98
- options[:headers] = headers.to_a
99
- CSV.open(filename, 'w', **options) do |csv|
100
- @types.each_value do |v|
101
- csv << CSV::Row.new(v.keys, v.values.map { |x| x.is_a?(Array) ? x.join(', ') : x })
102
- end
103
- end
104
- end
105
-
106
- def load_types(file_or_hash = {}, append = true)
107
- hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML.load_file(file_or_hash)
108
- # noinspection RubyResolve
109
- hash.each do |group, type_info|
110
- type_info.each do |type_name, info|
111
- type_key = type_name.to_sym
112
- info.symbolize_keys!
113
- info[:TYPE] = type_key
114
- info[:GROUP] = group.to_sym
115
- info[:MIME] = begin
116
- info[:MIME].strip.split(/[\s,]+/).map(&:strip)
117
- rescue StandardError
118
- []
119
- end
120
- info[:EXTENSIONS] = begin
121
- info[:EXTENSIONS].strip.split(/[\s,]+/).map(&:strip)
122
- rescue StandardError
123
- []
124
- end
125
- info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map(&:strip) if info[:PUID]
126
- if @types.key?(type_key)
127
- warn 'Type %s already defined; merging with info from %s.', type_name, file_or_hash
128
- info.merge!(@types[type_key]) do |_, v_new, v_old|
129
- case v_old
130
- when Array
131
- append ? v_old + v_new : v_new + v_old
132
- when Hash
133
- append ? v_new.merge(v_old) : v_old.merge(v_new)
134
- else
135
- append ? v_old : v_new
136
- end
137
- end
138
- end
139
- @types[type_key] = info
140
- end
141
- end
142
- end
143
-
144
- protected
145
-
146
- def initialize
147
- @types = {}
148
- type_database = Libis::Format::Config[:type_database]
149
- load_types(type_database)
150
- end
151
- end
152
- end
153
- end
data/tools/pdf2pdfa DELETED
@@ -1,395 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # PDF2ARCHIVE 0.3.2
4
- # (C) 2018 Matteo Seclì <secli.matteo@gmail.com>
5
- #
6
- # This program is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # This program is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
-
19
-
20
- #=====# INITIALIZE VARIABLES #=====#
21
- VERSION="0.3.2"
22
- INPUT=""
23
- OUTPUT=""
24
- QUALITYOPTS=""
25
- DEBUG=false
26
- VALIDATE=false
27
- MSGOPTS="-dQUIET -sstdout=/dev/null"
28
- VERAMSGOPTS=""
29
- #ERROPTS="2>/dev/null"
30
-
31
-
32
- #=====# HELP FUNCTION #=====#
33
- help()
34
- {
35
- TOTLEN="38" # Adjust this
36
- TITLESTRING="PDF2ARCHIVE, version $VERSION"
37
- SPACEL=$(echo "($TOTLEN-${#TITLESTRING})/2 + (36-${#TITLESTRING})%2 - 1" | bc)
38
- SPACER=$(echo "($TOTLEN-${#TITLESTRING})/2 - 1" | bc)
39
- TITLESTRING=$(printf "|%-${SPACEL}s%s%-${SPACER}s|" "" "$TITLESTRING" "")
40
- DASHSTRING=$(eval printf "%.0s-" {1..$TOTLEN})
41
- echo \
42
- "$DASHSTRING
43
- $TITLESTRING
44
- $DASHSTRING
45
-
46
- OVERVIEW:
47
- A simple Ghostscript-based PDF to PDF/A-1B converter.
48
-
49
- USAGE:
50
- $0 [options] input.pdf [output.pdf]
51
-
52
- EXAMPLES:
53
- Convert 'input.pdf' in PDF/A-1B format; the output is 'input-PDFA.pdf':
54
- $0 input.pdf
55
- Convert 'input.pdf' in PDF/A-1B format; the output is 'output.pdf':
56
- $0 input.pdf output.pdf
57
- Convert 'input.pdf' in PDF/A-1B format and perform a high-quality compression:
58
- $0 --quality=high input.pdf
59
- Convert 'input.pdf' in PDF/A-1B format and specify the document title:
60
- $0 --title=\"Title of your nice document\" input.pdf
61
- Convert 'input.pdf' in PDF/A-1B format and validate the result:
62
- $0 --validate input.pdf
63
-
64
- OPTIONS:
65
- -h, --help Show the help
66
- --quality=<value> Set the quality of the output when downsampling. The
67
- possible values are 'high', 'medium' and 'low', where
68
- 'high' gives the highest output quality. By specifying no
69
- option, no additional downsampling is done.
70
- --title=<value> Title of the resulting PDF/A file
71
- --author=<value> Author of the resulting PDF/A file
72
- --subject=<value> Subject of the resulting PDF/A file
73
- --keywords=<value> Comma-separated keywords of the resulting PDF/A file
74
- --cleanmetadata Clean all the standard metadata fields, except the ones
75
- specified via the command line options.
76
- --validate Validate the resulting file. The validation is done with
77
- VeraPDF, you need a working Java installation.
78
- --validate-only Perform only the validation on the input file, again using
79
- VeraPDF
80
- --debug Write additional debug information on screen
81
- -v, --version Show the program version
82
-
83
- LICENSE:
84
- GPLv3
85
-
86
- AUTHORS:
87
- (C) 2017-2018 Matteo Seclì"
88
- }
89
-
90
-
91
- #=====# RUN HELPER FUNCTION #=====#
92
- run() {
93
- if $DEBUG; then
94
- #v=$(exec 2>&1 && set -x && set -- "$@")
95
- #echo "#${v#*--}"
96
- "$@"
97
- else
98
- "$@" 2>/dev/null #>/dev/null 2>&1
99
- fi
100
- }
101
-
102
-
103
- #=====# CHECKS #=====#
104
- if [ "$(which gs)" == "" ]; then
105
- echo " ERROR: Ghostscript is not installed or it's not in the path"
106
- exit
107
- fi
108
-
109
-
110
- #=====# VALIDATION #=====#
111
-
112
- javaCheck() {
113
- if [ "$(which java)" == "" ]; then
114
- echo " ERROR: Java is not installed or it's not in the path"
115
- echo " Cannot perform validation"
116
- exit 1
117
- fi
118
- }
119
-
120
- validate() {
121
- echo " Validating..."
122
- echo " $(./verapdf/verapdf "$1" --extract --flavour 1b --format text "$2")"
123
- }
124
-
125
-
126
- #=====# INPUT PARSER #=====#
127
- if [ "$1" == "" ]; then
128
- help
129
- exit
130
- fi
131
- while [ "$1" != "" ]; do
132
- PARAM=`echo $1 | awk -F= '{print $1}'`
133
- VALUE=`echo $1 | awk -F= '{print $2}'`
134
- case $PARAM in
135
- -h | --help)
136
- help
137
- exit
138
- ;;
139
- -v | --version)
140
- echo $VERSION
141
- exit
142
- ;;
143
- --debug)
144
- DEBUG=true
145
- MSGOPTS=""
146
- VERAMSGOPTS="--verbose"
147
- #ERROPTS=""
148
- ;;
149
- --quality)
150
- if [ "$VALUE" == "high" ]; then
151
- QUALITYOPTS="-dPDFSETTINGS=/printer"
152
- elif [ "$VALUE" == "medium" ]; then
153
- QUALITYOPTS="-dPDFSETTINGS=/ebook"
154
- elif [ "$VALUE" == "low" ]; then
155
- QUALITYOPTS="-dPDFSETTINGS=/screen"
156
- else
157
- echo " ERROR: unknown quality option '$VALUE'"
158
- help
159
- exit 1
160
- fi
161
- ;;
162
- --cleanmetadata)
163
- [ -z ${PDFTITLE+x} ] && PDFTITLE=""
164
- [ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=""
165
- [ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=""
166
- [ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=""
167
- [ -z ${PDFCREATOR+x} ] && PDFCREATOR=""
168
- [ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=""
169
- [ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=""
170
- [ -z ${PDFMODDATE+x} ] && PDFMODDATE=""
171
- [ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=""
172
- ;;
173
- --title)
174
- PDFTITLE=$VALUE
175
- ;;
176
- --author)
177
- PDFAUTHOR=$VALUE
178
- ;;
179
- --subject)
180
- PDFSUBJECT=$VALUE
181
- ;;
182
- --keywords)
183
- PDFKEYWORDS=$VALUE
184
- ;;
185
- --validate)
186
- javaCheck
187
- VALIDATE=true
188
- ;;
189
- --validate-only)
190
- javaCheck
191
- validate $VERAMSGOPTS $2
192
- exit
193
- ;;
194
- *.pdf)
195
- if [ "$INPUT" == "" ]; then
196
- INPUT=$PARAM
197
- elif [ "$OUTPUT" == "" ]; then
198
- OUTPUT=$PARAM
199
- else
200
- echo " ERROR: too many PDF files as input!"
201
- help
202
- exit 1
203
- fi
204
- ;;
205
- *)
206
- echo " ERROR: unknown parameter \"$PARAM\""
207
- help
208
- exit 1
209
- ;;
210
- esac
211
- shift
212
- done
213
-
214
- #=====# SET UP ALL THE STUFF #=====#
215
- echo "=== Welcome to PDF2ARCHIVE ==="
216
- if [ "$OUTPUT" == "" ]; then
217
- OUTPUT="${INPUT%.pdf}-PDFA.pdf"
218
- fi
219
- TMPFILE=$(mktemp)
220
- TMPDIR=$(mktemp -d)
221
- PSTMPFILE=$TMPDIR/PDFA_def.ps
222
- ICCTMPFILE=$TMPDIR/AdobeRGB1998.icc
223
- INFOTMPFILE=$TMPDIR/pdf_minimal_info.ps
224
- echo \
225
- "%!PS
226
- % Extract PDF info in a minimal way.
227
- % Inspired by 'toolbin/pdf_info.ps'.
228
-
229
- /QUIET true def
230
- File dup (r) file runpdfbegin
231
- Trailer /Info knownoget {
232
- dup /Title knownoget { (__knowninfoTitle: ) print = flush } if
233
- dup /Author knownoget { (__knowninfoAuthor: ) print = flush } if
234
- dup /Subject knownoget { (__knowninfoSubject: ) print = flush } if
235
- dup /Keywords knownoget { (__knowninfoKeywords: ) print = flush } if
236
- dup /Creator knownoget { (__knowninfoCreator: ) print = flush } if
237
- dup /Producer knownoget { (__knowninfoProducer: ) print = flush } if
238
- dup /CreationDate knownoget { (__knowninfoCreationDate: ) print = flush } if
239
- dup /ModDate knownoget { (__knowninfoModDate: ) print = flush } if
240
- dup /Trapped knownoget { (__knowninfoTrapped: ) print = flush } if
241
- } if
242
- quit
243
- " > $INFOTMPFILE
244
-
245
-
246
- #=====# PRESERVE UNSPECIFIED KNOWN STANDARD METADATA #=====#
247
- # Notes:
248
- # 'iconv' is necessary to filter out all the invalid bytes.
249
- # If it's not used, sed (unless it's GNU sed) will fail with
250
- # 'RE error: illegal byte sequence'. A solution to this is to
251
- # use 'LC_CTYPE=C && LANG=C && echo "$METADUMP" ...' in the
252
- # variable assignments; however, this produces bad PDF files.
253
- #
254
- METADUMP=$(gs -dNOSAFER -dNODISPLAY -q -sFile="$INPUT" $INFOTMPFILE | iconv -f utf-8 -t utf-8 -c)
255
- [ -z ${PDFTITLE+x} ] && PDFTITLE=$(echo "$METADUMP" | grep "__knowninfoTitle: " | sed "s/^__knowninfoTitle: //g")
256
- [ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=$(echo "$METADUMP" | grep "__knowninfoAuthor: " | sed "s/^__knowninfoAuthor: //g")
257
- [ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=$(echo "$METADUMP" | grep "__knowninfoSubject: " | sed "s/^__knowninfoSubject: //g")
258
- [ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=$(echo "$METADUMP" | grep "__knowninfoKeywords: " | sed "s/^__knowninfoKeywords: //g")
259
- [ -z ${PDFCREATOR+x} ] && PDFCREATOR=$(echo "$METADUMP" | grep "__knowninfoCreator: " | sed "s/^__knowninfoCreator: //g")
260
- [ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=$(echo "$METADUMP" | grep "__knowninfoProducer: " | sed "s/^__knowninfoProducer: //g")
261
- [ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=$(echo "$METADUMP" | grep "__knowninfoCreationDate: " | sed "s/^__knowninfoCreationDate: //g")
262
- [ -z ${PDFMODDATE+x} ] && PDFMODDATE=$(echo "$METADUMP" | grep "__knowninfoModDate: " | sed "s/^__knowninfoModDate: //g")
263
- [ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=$(echo "$METADUMP" | grep "__knowninfoTrapped: " | sed "s/^__knowninfoTrapped: //g")
264
- # Replace "Trapped" string, if not empty, with an operator. Fixes 3Heights.
265
- if [ "$PDFTRAPPED" != "" ]; then
266
- PDFTRAPPED="/$(tr '[:lower:]' '[:upper:]' <<< ${PDFTRAPPED:0:1})$(tr '[:upper:]' '[:lower:]' <<< ${PDFTRAPPED:1})"
267
- fi
268
- # Check if the operator is allowed, otherwise empty variable.
269
- if [ "$PDFTRAPPED" != "/True" ] && [ "$PDFTRAPPED" != "/False" ]; then
270
- PDFTRAPPED=""
271
- fi
272
-
273
-
274
- #=====# PRINT DEBUG INFO #=====#
275
- if $DEBUG; then
276
- echo " DEBUG: running PDF2ARCHIVE, version $VERSION"
277
- echo " DEBUG: using Ghostscript binary at $(which gs), version $(gs --version)"
278
- echo " DEBUG: the input file is '$INPUT'"
279
- echo " DEBUG: the output file is '$OUTPUT'"
280
- echo " DEBUG: the intermediate processing file is $TMPFILE"
281
- echo " DEBUG: the temporary directory is $TMPDIR"
282
- echo " DEBUG: the current quality options are '$QUALITYOPTS'"
283
- echo " DEBUG: PDF title '$PDFTITLE'"
284
- echo " DEBUG: PDF author '$PDFAUTHOR'"
285
- echo " DEBUG: PDF subject '$PDFSUBJECT'"
286
- echo " DEBUG: PDF keywords '$PDFKEYWORDS'"
287
- echo " DEBUG: PDF creator '$PDFCREATOR'"
288
- echo " DEBUG: PDF producer '$PDFPRODUCER'"
289
- echo " DEBUG: PDF creation date '$PDFCREATIONDATE'"
290
- echo " DEBUG: PDF modification date '$PDFMODDATE'"
291
- echo " DEBUG: PDF trapping '$PDFTRAPPED'"
292
- fi
293
-
294
-
295
- #=====# CREATE THE PS DEFINITION FILE #=====#
296
- echo " Creating the definition file..."
297
- echo \
298
- "%!
299
- % This prefix file for creating a PDF/A document is derived from
300
- % the sample included with Ghostscript 9.07, released under the
301
- % GNU Affero General Public License.
302
- % Modified 4/15/2013 by MCB Systems.
303
-
304
- % Feel free to modify entries marked with \"Customize\".
305
-
306
- % This assumes an ICC profile to reside in the file (AdobeRGB1998.icc),
307
- % unless the user modifies the corresponding line below.
308
-
309
- % The color space described by the ICC profile must correspond to the
310
- % ProcessColorModel specified when using this prefix file (GRAY with
311
- % DeviceGray, RGB with DeviceRGB, and CMYK with DeviceCMYK).
312
-
313
- % Define entries in the document Info dictionary :
314
-
315
- /ICCProfile ($ICCTMPFILE) % Customize.
316
- def
317
-
318
- [ /Title ($PDFTITLE) % Customize." > $PSTMPFILE
319
- if [ "$PDFAUTHOR" != "" ]; then
320
- echo " /Author ($PDFAUTHOR)" >> $PSTMPFILE
321
- fi
322
- if [ "$PDFSUBJECT" != "" ]; then
323
- echo " /Subject ($PDFSUBJECT)" >> $PSTMPFILE
324
- fi
325
- if [ "$PDFKEYWORDS" != "" ]; then
326
- echo " /Keywords ($PDFKEYWORDS)" >> $PSTMPFILE
327
- fi
328
- if [ "$PDFCREATOR" != "" ]; then
329
- echo " /Creator ($PDFCREATOR)" >> $PSTMPFILE
330
- fi
331
- echo \
332
- "% /Producer % Reserved to GS
333
- % /CreationDate % Reserved to GS
334
- % /ModDate % Reserved to GS" >> $PSTMPFILE
335
- if [ "$PDFTRAPPED" != "" ]; then
336
- echo " /Trapped $PDFTRAPPED" >> $PSTMPFILE
337
- fi
338
- echo \
339
- " /DOCINFO pdfmark
340
-
341
- % Define an ICC profile :
342
-
343
- [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
344
- [{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {systemdict /ProcessColorModel get /DeviceRGB eq {3} {4} ifelse} ifelse >> /PUT pdfmark
345
- [{icc_PDFA} ICCProfile (r) file /PUT pdfmark
346
-
347
- % Define the output intent dictionary :
348
-
349
- [/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
350
- [{OutputIntent_PDFA} <<
351
- /Type /OutputIntent % Must be so (the standard requires).
352
- /S /GTS_PDFA1 % Must be so (the standard requires).
353
- /DestOutputProfile {icc_PDFA} % Must be so (see above).
354
- /OutputConditionIdentifier (AdobeRGB1998) % Customize
355
- >> /PUT pdfmark
356
- [{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
357
- " >> $PSTMPFILE
358
-
359
-
360
- #=====# CREATE THE COLOR PROFILE FILE #=====#
361
- echo -n -e "\\x00\\x00\\x02\\x30\\x41\\x44\\x42\\x45\\x02\\x10\\x00\\x00\\x6d\\x6e\\x74\\x72\\x52\\x47\\x42\\x20\\x58\\x59\\x5a\\x20\\x07\\xd0\\x00\\x08\\x00\\x0b\\x00\\x13\\x00\\x33\\x00\\x3b\\x61\\x63\\x73\\x70\\x41\\x50\\x50\\x4c\\x00\\x00\\x00\\x00\\x6e\\x6f\\x6e\\x65\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3\\x2d\\x41\\x44\\x42\\x45\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0a\\x63\\x70\\x72\\x74\\x00\\x00\\x00\\xfc\\x00\\x00\\x00\\x32\\x64\\x65\\x73\\x63\\x00\\x00\\x01\\x30\\x00\\x00\\x00\\x6b\\x77\\x74\\x70\\x74\\x00\\x00\\x01\\x9c\\x00\\x00\\x00\\x14\\x62\\x6b\\x70\\x74\\x00\\x00\\x01\\xb0\\x00\\x00\\x00\\x14\\x72\\x54\\x52\\x43\\x00\\x00\\x01\\xc4\\x00\\x00\\x00\\x0e\\x67\\x54\\x52\\x43\\x00\\x00\\x01\\xd4\\x00\\x00\\x00\\x0e\\x62\\x54\\x52\\x43\\x00\\x00\\x01\\xe4\\x00\\x00\\x00\\x0e\\x72\\x58\\x59\\x5a\\x00\\x00\\x01\\xf4\\x00\\x00\\x00\\x14\\x67\\x58\\x59\\x5a\\x00\\x00\\x02\\x08\\x00\\x00\\x00\\x14\\x62\\x58\\x59\\x5a\\x00\\x00\\x02\\x1c\\x00\\x00\\x00\\x14\\x74\\x65\\x78\\x74\\x00\\x00\\x00\\x00\\x43\\x6f\\x70\\x79\\x72\\x69\\x67\\x68\\x74\\x20\\x32\\x30\\x30\\x30\\x20\\x41\\x64\\x6f\\x62\\x65\\x20\\x53\\x79\\x73\\x74\\x65\\x6d\\x73\\x20\\x49\\x6e\\x63\\x6f\\x72\\x70\\x6f\\x72\\x61\\x74\\x65\\x64\\x00\\x00\\x00\\x64\\x65\\x73\\x63\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x11\\x41\\x64\\x6f\\x62\\x65\\x20\\x52\\x47\\x42\\x20\\x28\\x31\\x39\\x39\\x38\\x29\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\xf3\\x51\\x00\\x01\\x00\\x00\\x00\\x01\\x16\\xcc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x9c\\x18\\x00\\x00\\x4f\\xa5\\x00\\x00\\x04\\xfc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x34\\x8d\\x00\\x00\\xa0\\x2c\\x00\\x00\\x0f\\x95\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x26\\x31\\x00\\x00\\x10\\x2f\\x00\\x00\\xbe\\x9c" > $ICCTMPFILE
362
-
363
-
364
- #=====# DO THE ACTUAL CONVERSION #=====#
365
- echo " Compressing PDF & embedding fonts..."
366
- run gs $MSGOPTS \
367
- -dBATCH -dNOPAUSE -dNOOUTERSAVE \
368
- -dCompatibilityLevel=1.4 \
369
- -dEmbedAllFonts=true -dSubsetFonts=true \
370
- -dCompressFonts=true -dCompressPages=true \
371
- -sColorConversionStrategy=RGB \
372
- -dDownsampleMonoImages=false -dDownsampleGrayImages=false -dDownsampleColorImages=false \
373
- -dAutoFilterColorImages=false -dAutoFilterGrayImages=false \
374
- -sDEVICE=pdfwrite \
375
- -sOutputFile="$TMPFILE" "$INPUT"
376
- echo " Converting to PDF/A-1B..."
377
- run gs $MSGOPTS \
378
- -dPDFA=1 -dBATCH -dNOPAUSE -dNOOUTERSAVE \
379
- $QUALITYOPTS \
380
- -dCompatibilityLevel=1.4 -dPDFACompatibilityPolicy=1 \
381
- -sProcessColorModel=DeviceRGB -sColorConversionStrategy=RGB \
382
- -sOutputICCProfile=$ICCTMPFILE \
383
- -sDEVICE=pdfwrite \
384
- -sOutputFile="$OUTPUT" "$TMPFILE" $PSTMPFILE
385
- echo " Removing temporary files..."
386
- rm $TMPFILE
387
- echo " Done, now ESSE3 is happy! ;)"
388
-
389
-
390
- #=====# VALIDATE THE RESULT #=====#
391
- if $VALIDATE; then
392
- validate $VERAMSGOPTS "$OUTPUT"
393
- else
394
- echo " Suggestion: validate the resulting PDF to be sure it's PDF/A-1B compliant."
395
- fi