libis-format 1.3.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -10
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/data/PDFA_def.ps +3 -3
  13. data/data/eciRGB_v2.icc +0 -0
  14. data/data/types.yml +4 -17
  15. data/docker_cfg.yml +1 -0
  16. data/lib/libis/format/cli/convert.rb +4 -4
  17. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  18. data/lib/libis/format/command_line.rb +3 -2
  19. data/lib/libis/format/config.rb +22 -20
  20. data/lib/libis/format/converter/audio_converter.rb +31 -56
  21. data/lib/libis/format/converter/base.rb +36 -16
  22. data/lib/libis/format/converter/chain.rb +32 -52
  23. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  24. data/lib/libis/format/converter/image_assembler.rb +82 -0
  25. data/lib/libis/format/converter/image_converter.rb +40 -153
  26. data/lib/libis/format/converter/image_splitter.rb +80 -0
  27. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  28. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  29. data/lib/libis/format/converter/office_converter.rb +28 -22
  30. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  31. data/lib/libis/format/converter/pdf_converter.rb +50 -111
  32. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  33. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  34. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  35. data/lib/libis/format/converter/repository.rb +13 -7
  36. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  37. data/lib/libis/format/converter/video_converter.rb +58 -47
  38. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  39. data/lib/libis/format/converter.rb +1 -1
  40. data/lib/libis/format/identifier.rb +46 -44
  41. data/lib/libis/format/info.rb +27 -0
  42. data/lib/libis/format/library.rb +147 -0
  43. data/lib/libis/format/tool/droid.rb +30 -29
  44. data/lib/libis/format/tool/extension_identification.rb +26 -24
  45. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  46. data/lib/libis/format/tool/fido.rb +27 -22
  47. data/lib/libis/format/tool/file_tool.rb +24 -11
  48. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  49. data/lib/libis/format/tool/identification_tool.rb +40 -38
  50. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  51. data/lib/libis/format/tool/pdf_copy.rb +15 -24
  52. data/lib/libis/format/tool/pdf_merge.rb +14 -24
  53. data/lib/libis/format/tool/pdf_optimizer.rb +17 -24
  54. data/lib/libis/format/tool/pdf_split.rb +16 -25
  55. data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
  56. data/lib/libis/format/tool/pdfa_validator.rb +30 -25
  57. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  58. data/lib/libis/format/tool.rb +3 -4
  59. data/lib/libis/format/version.rb +1 -3
  60. data/lib/libis/format/yaml_loader.rb +71 -0
  61. data/lib/libis/format.rb +7 -5
  62. data/lib/libis-format.rb +0 -2
  63. data/libis-format.gemspec +18 -24
  64. metadata +78 -120
  65. data/data/AdobeRGB1998.icc +0 -0
  66. data/lib/libis/format/converter/email_converter.rb +0 -35
  67. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  68. data/lib/libis/format/type_database.rb +0 -156
  69. data/lib/libis/format/type_database_impl.rb +0 -153
  70. data/tools/pdf2pdfa +0 -395
  71. /data/bin/{droid_tool → droid} +0 -0
  72. /data/bin/{fido_tool → fido} +0 -0
@@ -1,156 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'yaml'
4
- require 'libis/tools/extend/hash'
5
-
6
- module Libis
7
- module Format
8
- # noinspection RubyClassVariableUsageInspection
9
- class TypeDatabase
10
- @implementation = Libis::Format::TypeDatabaseImpl.instance
11
-
12
- def self.implementation(impl)
13
- @implementation = impl
14
- end
15
-
16
- def self.enrich(info, map_keys = {})
17
- return {} unless info.is_a? Hash
18
-
19
- mapper = Hash.new { |hash, key| hash[key] = key }
20
- mapper.merge! map_keys
21
- unless (puid = info[mapper[:PUID]]).blank?
22
- info[mapper[:TYPE]] ||= begin
23
- puid_infos(puid).first[:TYPE]
24
- rescue StandardError
25
- nil
26
- end
27
- end
28
- unless (mime = info[mapper[:MIME]]).blank?
29
- info[mapper[:TYPE]] ||= begin
30
- mime_infos(mime).first[:TYPE]
31
- rescue StandardError
32
- nil
33
- end
34
- end
35
- unless (type_name = info[mapper[:TYPE]]).nil?
36
- mapper.each_key do |key|
37
- info[mapper[key]] = get(type_name, key) || info[mapper[key]]
38
- end
39
- info[mapper[:GROUP]] = type_group(type_name)
40
- end
41
- info
42
- end
43
-
44
- def self.normalize(info, map_keys = {})
45
- return {} unless info.is_a? Hash
46
-
47
- mapper = Hash.new { |hash, key| hash[key] = key }
48
- mapper.merge! map_keys
49
- unless (puid = info[mapper[:PUID]]).blank?
50
- info[mapper[:TYPE]] ||= begin
51
- puid_infos(puid).first[:TYPE]
52
- rescue StandardError
53
- nil
54
- end
55
- end
56
- unless (mime = info[mapper[:MIME]]).blank?
57
- info[mapper[:TYPE]] ||= begin
58
- mime_infos(mime).first[:TYPE]
59
- rescue StandardError
60
- nil
61
- end
62
- end
63
- unless (type_name = info[mapper[:TYPE]]).nil?
64
- info[mapper[:MIME]] = type_mimetypes(type_name).first if type_mimetypes(type_name).first
65
- info[mapper[:GROUP]] = type_group(type_name)
66
- end
67
- info
68
- end
69
-
70
- def self.get(type_name, key)
71
- case key
72
- when :MIME
73
- type_mimetypes(type_name).first
74
- when :PUID
75
- type_puids(type_name).first
76
- when :EXTENSION
77
- type_extentions(type_name).first
78
- else
79
- typeinfo(type_name)[key]
80
- end
81
- end
82
-
83
- def self.type_group(ftype)
84
- typeinfo(ftype)[:GROUP]
85
- end
86
-
87
- def self.type_mimetypes(ftype)
88
- typeinfo(ftype)[:MIME] || []
89
- end
90
-
91
- def self.type_puids(ftype)
92
- typeinfo(ftype)[:PUID] || []
93
- end
94
-
95
- def self.type_extentions(ftype)
96
- typeinfo(ftype)[:EXTENSIONS] || []
97
- end
98
-
99
- def self.typeinfo(ftype)
100
- @implementation.typeinfo(ftype)
101
- end
102
-
103
- def self.group_types(group)
104
- @implementation.group_types(group)
105
- end
106
-
107
- def self.puid_infos(puid)
108
- @implementation.puid_infos(puid)
109
- end
110
-
111
- def self.puid_types(puid)
112
- @implementation.puid_types(puid)
113
- end
114
-
115
- def self.puid_groups(puid)
116
- puid_types(puid).map(&method(:type_group))
117
- end
118
-
119
- def self.mime_infos(mime)
120
- @implementation.mime_infos(mime)
121
- end
122
-
123
- def self.mime_types(mime)
124
- @implementation.mime_types(mime)
125
- end
126
-
127
- def self.mime_groups(mime)
128
- mime_types(mime).map(&method(:type_group))
129
- end
130
-
131
- def self.ext_infos(ext)
132
- @implementation.ext_infos(ext)
133
- end
134
-
135
- def self.ext_types(ext)
136
- @implementation.ext_types(ext)
137
- end
138
-
139
- def self.puid_typeinfo(puid)
140
- @implementation.puid_typeinfo(puid)
141
- end
142
-
143
- def self.known_mime?(mime)
144
- @implementation.known_mime?(mime)
145
- end
146
-
147
- def self.groups
148
- @implementation.groups
149
- end
150
-
151
- def self.export_csv(filename, **options)
152
- @implementation.export_csv(filename, **options)
153
- end
154
- end
155
- end
156
- end
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'singleton'
4
- require 'yaml'
5
- require 'csv'
6
-
7
- require 'libis/tools/logger'
8
- require 'libis/tools/extend/hash'
9
- require 'libis/tools/extend/string'
10
- require 'libis/tools/extend/symbol'
11
-
12
- module Libis
13
- module Format
14
- class TypeDatabaseImpl
15
- include Singleton
16
- include ::Libis::Tools::Logger
17
-
18
- def typeinfo(ftype)
19
- @types[ftype.to_sym] || {}
20
- end
21
-
22
- def group_types(group)
23
- @types.select do |_, v|
24
- v[:GROUP] == group.to_sym
25
- end.keys
26
- end
27
-
28
- def puid_infos(puid)
29
- @types.select do |_, v|
30
- v[:PUID].include? puid
31
- rescue StandardError
32
- false
33
- end.values
34
- end
35
-
36
- def puid_types(puid)
37
- @types.select do |_, v|
38
- v[:PUID].include? puid
39
- rescue StandardError
40
- false
41
- end.keys
42
- end
43
-
44
- def mime_infos(mime)
45
- @types.select do |_, v|
46
- v[:MIME].include? mime
47
- rescue StandardError
48
- false
49
- end.values
50
- end
51
-
52
- def mime_types(mime)
53
- @types.select do |_, v|
54
- v[:MIME].include? mime
55
- rescue StandardError
56
- false
57
- end.keys
58
- end
59
-
60
- def ext_infos(ext)
61
- ext = ext.gsub(/^\./, '')
62
- @types.select do |_, v|
63
- v[:EXTENSIONS].include?(ext)
64
- rescue StandardError
65
- false
66
- end.values
67
- end
68
-
69
- def ext_types(ext)
70
- ext = ext.gsub(/^\./, '')
71
- @types.select do |_, v|
72
- v[:EXTENSIONS].include?(ext)
73
- rescue StandardError
74
- false
75
- end.keys
76
- end
77
-
78
- def puid_typeinfo(puid)
79
- @types.each do |_, v|
80
- return v if v[:PUID]&.include?(puid)
81
- end
82
- nil
83
- end
84
-
85
- def known_mime?(mime)
86
- @types.each do |_, v|
87
- return true if v[:MIME]&.include? mime
88
- end
89
- false
90
- end
91
-
92
- def groups
93
- @types.values.map(&:dig.call(:GROUP)).uniq
94
- end
95
-
96
- def export_csv(filename, **options)
97
- headers = @types.values.each_with_object(Set.new) { |v, s| v.each_key { |k| s << k.to_s } }
98
- options[:headers] = headers.to_a
99
- CSV.open(filename, 'w', **options) do |csv|
100
- @types.each_value do |v|
101
- csv << CSV::Row.new(v.keys, v.values.map { |x| x.is_a?(Array) ? x.join(', ') : x })
102
- end
103
- end
104
- end
105
-
106
- def load_types(file_or_hash = {}, append = true)
107
- hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML.load_file(file_or_hash)
108
- # noinspection RubyResolve
109
- hash.each do |group, type_info|
110
- type_info.each do |type_name, info|
111
- type_key = type_name.to_sym
112
- info.symbolize_keys!
113
- info[:TYPE] = type_key
114
- info[:GROUP] = group.to_sym
115
- info[:MIME] = begin
116
- info[:MIME].strip.split(/[\s,]+/).map(&:strip)
117
- rescue StandardError
118
- []
119
- end
120
- info[:EXTENSIONS] = begin
121
- info[:EXTENSIONS].strip.split(/[\s,]+/).map(&:strip)
122
- rescue StandardError
123
- []
124
- end
125
- info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map(&:strip) if info[:PUID]
126
- if @types.key?(type_key)
127
- warn 'Type %s already defined; merging with info from %s.', type_name, file_or_hash
128
- info.merge!(@types[type_key]) do |_, v_new, v_old|
129
- case v_old
130
- when Array
131
- append ? v_old + v_new : v_new + v_old
132
- when Hash
133
- append ? v_new.merge(v_old) : v_old.merge(v_new)
134
- else
135
- append ? v_old : v_new
136
- end
137
- end
138
- end
139
- @types[type_key] = info
140
- end
141
- end
142
- end
143
-
144
- protected
145
-
146
- def initialize
147
- @types = {}
148
- type_database = Libis::Format::Config[:type_database]
149
- load_types(type_database)
150
- end
151
- end
152
- end
153
- end
data/tools/pdf2pdfa DELETED
@@ -1,395 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # PDF2ARCHIVE 0.3.2
4
- # (C) 2018 Matteo Seclì <secli.matteo@gmail.com>
5
- #
6
- # This program is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # This program is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
-
19
-
20
- #=====# INITIALIZE VARIABLES #=====#
21
- VERSION="0.3.2"
22
- INPUT=""
23
- OUTPUT=""
24
- QUALITYOPTS=""
25
- DEBUG=false
26
- VALIDATE=false
27
- MSGOPTS="-dQUIET -sstdout=/dev/null"
28
- VERAMSGOPTS=""
29
- #ERROPTS="2>/dev/null"
30
-
31
-
32
- #=====# HELP FUNCTION #=====#
33
- help()
34
- {
35
- TOTLEN="38" # Adjust this
36
- TITLESTRING="PDF2ARCHIVE, version $VERSION"
37
- SPACEL=$(echo "($TOTLEN-${#TITLESTRING})/2 + (36-${#TITLESTRING})%2 - 1" | bc)
38
- SPACER=$(echo "($TOTLEN-${#TITLESTRING})/2 - 1" | bc)
39
- TITLESTRING=$(printf "|%-${SPACEL}s%s%-${SPACER}s|" "" "$TITLESTRING" "")
40
- DASHSTRING=$(eval printf "%.0s-" {1..$TOTLEN})
41
- echo \
42
- "$DASHSTRING
43
- $TITLESTRING
44
- $DASHSTRING
45
-
46
- OVERVIEW:
47
- A simple Ghostscript-based PDF to PDF/A-1B converter.
48
-
49
- USAGE:
50
- $0 [options] input.pdf [output.pdf]
51
-
52
- EXAMPLES:
53
- Convert 'input.pdf' in PDF/A-1B format; the output is 'input-PDFA.pdf':
54
- $0 input.pdf
55
- Convert 'input.pdf' in PDF/A-1B format; the output is 'output.pdf':
56
- $0 input.pdf output.pdf
57
- Convert 'input.pdf' in PDF/A-1B format and perform a high-quality compression:
58
- $0 --quality=high input.pdf
59
- Convert 'input.pdf' in PDF/A-1B format and specify the document title:
60
- $0 --title=\"Title of your nice document\" input.pdf
61
- Convert 'input.pdf' in PDF/A-1B format and validate the result:
62
- $0 --validate input.pdf
63
-
64
- OPTIONS:
65
- -h, --help Show the help
66
- --quality=<value> Set the quality of the output when downsampling. The
67
- possible values are 'high', 'medium' and 'low', where
68
- 'high' gives the highest output quality. By specifying no
69
- option, no additional downsampling is done.
70
- --title=<value> Title of the resulting PDF/A file
71
- --author=<value> Author of the resulting PDF/A file
72
- --subject=<value> Subject of the resulting PDF/A file
73
- --keywords=<value> Comma-separated keywords of the resulting PDF/A file
74
- --cleanmetadata Clean all the standard metadata fields, except the ones
75
- specified via the command line options.
76
- --validate Validate the resulting file. The validation is done with
77
- VeraPDF, you need a working Java installation.
78
- --validate-only Perform only the validation on the input file, again using
79
- VeraPDF
80
- --debug Write additional debug information on screen
81
- -v, --version Show the program version
82
-
83
- LICENSE:
84
- GPLv3
85
-
86
- AUTHORS:
87
- (C) 2017-2018 Matteo Seclì"
88
- }
89
-
90
-
91
- #=====# RUN HELPER FUNCTION #=====#
92
- run() {
93
- if $DEBUG; then
94
- #v=$(exec 2>&1 && set -x && set -- "$@")
95
- #echo "#${v#*--}"
96
- "$@"
97
- else
98
- "$@" 2>/dev/null #>/dev/null 2>&1
99
- fi
100
- }
101
-
102
-
103
- #=====# CHECKS #=====#
104
- if [ "$(which gs)" == "" ]; then
105
- echo " ERROR: Ghostscript is not installed or it's not in the path"
106
- exit
107
- fi
108
-
109
-
110
- #=====# VALIDATION #=====#
111
-
112
- javaCheck() {
113
- if [ "$(which java)" == "" ]; then
114
- echo " ERROR: Java is not installed or it's not in the path"
115
- echo " Cannot perform validation"
116
- exit 1
117
- fi
118
- }
119
-
120
- validate() {
121
- echo " Validating..."
122
- echo " $(./verapdf/verapdf "$1" --extract --flavour 1b --format text "$2")"
123
- }
124
-
125
-
126
- #=====# INPUT PARSER #=====#
127
- if [ "$1" == "" ]; then
128
- help
129
- exit
130
- fi
131
- while [ "$1" != "" ]; do
132
- PARAM=`echo $1 | awk -F= '{print $1}'`
133
- VALUE=`echo $1 | awk -F= '{print $2}'`
134
- case $PARAM in
135
- -h | --help)
136
- help
137
- exit
138
- ;;
139
- -v | --version)
140
- echo $VERSION
141
- exit
142
- ;;
143
- --debug)
144
- DEBUG=true
145
- MSGOPTS=""
146
- VERAMSGOPTS="--verbose"
147
- #ERROPTS=""
148
- ;;
149
- --quality)
150
- if [ "$VALUE" == "high" ]; then
151
- QUALITYOPTS="-dPDFSETTINGS=/printer"
152
- elif [ "$VALUE" == "medium" ]; then
153
- QUALITYOPTS="-dPDFSETTINGS=/ebook"
154
- elif [ "$VALUE" == "low" ]; then
155
- QUALITYOPTS="-dPDFSETTINGS=/screen"
156
- else
157
- echo " ERROR: unknown quality option '$VALUE'"
158
- help
159
- exit 1
160
- fi
161
- ;;
162
- --cleanmetadata)
163
- [ -z ${PDFTITLE+x} ] && PDFTITLE=""
164
- [ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=""
165
- [ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=""
166
- [ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=""
167
- [ -z ${PDFCREATOR+x} ] && PDFCREATOR=""
168
- [ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=""
169
- [ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=""
170
- [ -z ${PDFMODDATE+x} ] && PDFMODDATE=""
171
- [ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=""
172
- ;;
173
- --title)
174
- PDFTITLE=$VALUE
175
- ;;
176
- --author)
177
- PDFAUTHOR=$VALUE
178
- ;;
179
- --subject)
180
- PDFSUBJECT=$VALUE
181
- ;;
182
- --keywords)
183
- PDFKEYWORDS=$VALUE
184
- ;;
185
- --validate)
186
- javaCheck
187
- VALIDATE=true
188
- ;;
189
- --validate-only)
190
- javaCheck
191
- validate $VERAMSGOPTS $2
192
- exit
193
- ;;
194
- *.pdf)
195
- if [ "$INPUT" == "" ]; then
196
- INPUT=$PARAM
197
- elif [ "$OUTPUT" == "" ]; then
198
- OUTPUT=$PARAM
199
- else
200
- echo " ERROR: too many PDF files as input!"
201
- help
202
- exit 1
203
- fi
204
- ;;
205
- *)
206
- echo " ERROR: unknown parameter \"$PARAM\""
207
- help
208
- exit 1
209
- ;;
210
- esac
211
- shift
212
- done
213
-
214
- #=====# SET UP ALL THE STUFF #=====#
215
- echo "=== Welcome to PDF2ARCHIVE ==="
216
- if [ "$OUTPUT" == "" ]; then
217
- OUTPUT="${INPUT%.pdf}-PDFA.pdf"
218
- fi
219
- TMPFILE=$(mktemp)
220
- TMPDIR=$(mktemp -d)
221
- PSTMPFILE=$TMPDIR/PDFA_def.ps
222
- ICCTMPFILE=$TMPDIR/AdobeRGB1998.icc
223
- INFOTMPFILE=$TMPDIR/pdf_minimal_info.ps
224
- echo \
225
- "%!PS
226
- % Extract PDF info in a minimal way.
227
- % Inspired by 'toolbin/pdf_info.ps'.
228
-
229
- /QUIET true def
230
- File dup (r) file runpdfbegin
231
- Trailer /Info knownoget {
232
- dup /Title knownoget { (__knowninfoTitle: ) print = flush } if
233
- dup /Author knownoget { (__knowninfoAuthor: ) print = flush } if
234
- dup /Subject knownoget { (__knowninfoSubject: ) print = flush } if
235
- dup /Keywords knownoget { (__knowninfoKeywords: ) print = flush } if
236
- dup /Creator knownoget { (__knowninfoCreator: ) print = flush } if
237
- dup /Producer knownoget { (__knowninfoProducer: ) print = flush } if
238
- dup /CreationDate knownoget { (__knowninfoCreationDate: ) print = flush } if
239
- dup /ModDate knownoget { (__knowninfoModDate: ) print = flush } if
240
- dup /Trapped knownoget { (__knowninfoTrapped: ) print = flush } if
241
- } if
242
- quit
243
- " > $INFOTMPFILE
244
-
245
-
246
- #=====# PRESERVE UNSPECIFIED KNOWN STANDARD METADATA #=====#
247
- # Notes:
248
- # 'iconv' is necessary to filter out all the invalid bytes.
249
- # If it's not used, sed (unless it's GNU sed) will fail with
250
- # 'RE error: illegal byte sequence'. A solution to this is to
251
- # use 'LC_CTYPE=C && LANG=C && echo "$METADUMP" ...' in the
252
- # variable assignments; however, this produces bad PDF files.
253
- #
254
- METADUMP=$(gs -dNOSAFER -dNODISPLAY -q -sFile="$INPUT" $INFOTMPFILE | iconv -f utf-8 -t utf-8 -c)
255
- [ -z ${PDFTITLE+x} ] && PDFTITLE=$(echo "$METADUMP" | grep "__knowninfoTitle: " | sed "s/^__knowninfoTitle: //g")
256
- [ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=$(echo "$METADUMP" | grep "__knowninfoAuthor: " | sed "s/^__knowninfoAuthor: //g")
257
- [ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=$(echo "$METADUMP" | grep "__knowninfoSubject: " | sed "s/^__knowninfoSubject: //g")
258
- [ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=$(echo "$METADUMP" | grep "__knowninfoKeywords: " | sed "s/^__knowninfoKeywords: //g")
259
- [ -z ${PDFCREATOR+x} ] && PDFCREATOR=$(echo "$METADUMP" | grep "__knowninfoCreator: " | sed "s/^__knowninfoCreator: //g")
260
- [ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=$(echo "$METADUMP" | grep "__knowninfoProducer: " | sed "s/^__knowninfoProducer: //g")
261
- [ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=$(echo "$METADUMP" | grep "__knowninfoCreationDate: " | sed "s/^__knowninfoCreationDate: //g")
262
- [ -z ${PDFMODDATE+x} ] && PDFMODDATE=$(echo "$METADUMP" | grep "__knowninfoModDate: " | sed "s/^__knowninfoModDate: //g")
263
- [ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=$(echo "$METADUMP" | grep "__knowninfoTrapped: " | sed "s/^__knowninfoTrapped: //g")
264
- # Replace "Trapped" string, if not empty, with an operator. Fixes 3Heights.
265
- if [ "$PDFTRAPPED" != "" ]; then
266
- PDFTRAPPED="/$(tr '[:lower:]' '[:upper:]' <<< ${PDFTRAPPED:0:1})$(tr '[:upper:]' '[:lower:]' <<< ${PDFTRAPPED:1})"
267
- fi
268
- # Check if the operator is allowed, otherwise empty variable.
269
- if [ "$PDFTRAPPED" != "/True" ] && [ "$PDFTRAPPED" != "/False" ]; then
270
- PDFTRAPPED=""
271
- fi
272
-
273
-
274
- #=====# PRINT DEBUG INFO #=====#
275
- if $DEBUG; then
276
- echo " DEBUG: running PDF2ARCHIVE, version $VERSION"
277
- echo " DEBUG: using Ghostscript binary at $(which gs), version $(gs --version)"
278
- echo " DEBUG: the input file is '$INPUT'"
279
- echo " DEBUG: the output file is '$OUTPUT'"
280
- echo " DEBUG: the intermediate processing file is $TMPFILE"
281
- echo " DEBUG: the temporary directory is $TMPDIR"
282
- echo " DEBUG: the current quality options are '$QUALITYOPTS'"
283
- echo " DEBUG: PDF title '$PDFTITLE'"
284
- echo " DEBUG: PDF author '$PDFAUTHOR'"
285
- echo " DEBUG: PDF subject '$PDFSUBJECT'"
286
- echo " DEBUG: PDF keywords '$PDFKEYWORDS'"
287
- echo " DEBUG: PDF creator '$PDFCREATOR'"
288
- echo " DEBUG: PDF producer '$PDFPRODUCER'"
289
- echo " DEBUG: PDF creation date '$PDFCREATIONDATE'"
290
- echo " DEBUG: PDF modification date '$PDFMODDATE'"
291
- echo " DEBUG: PDF trapping '$PDFTRAPPED'"
292
- fi
293
-
294
-
295
- #=====# CREATE THE PS DEFINITION FILE #=====#
296
- echo " Creating the definition file..."
297
- echo \
298
- "%!
299
- % This prefix file for creating a PDF/A document is derived from
300
- % the sample included with Ghostscript 9.07, released under the
301
- % GNU Affero General Public License.
302
- % Modified 4/15/2013 by MCB Systems.
303
-
304
- % Feel free to modify entries marked with \"Customize\".
305
-
306
- % This assumes an ICC profile to reside in the file (AdobeRGB1998.icc),
307
- % unless the user modifies the corresponding line below.
308
-
309
- % The color space described by the ICC profile must correspond to the
310
- % ProcessColorModel specified when using this prefix file (GRAY with
311
- % DeviceGray, RGB with DeviceRGB, and CMYK with DeviceCMYK).
312
-
313
- % Define entries in the document Info dictionary :
314
-
315
- /ICCProfile ($ICCTMPFILE) % Customize.
316
- def
317
-
318
- [ /Title ($PDFTITLE) % Customize." > $PSTMPFILE
319
- if [ "$PDFAUTHOR" != "" ]; then
320
- echo " /Author ($PDFAUTHOR)" >> $PSTMPFILE
321
- fi
322
- if [ "$PDFSUBJECT" != "" ]; then
323
- echo " /Subject ($PDFSUBJECT)" >> $PSTMPFILE
324
- fi
325
- if [ "$PDFKEYWORDS" != "" ]; then
326
- echo " /Keywords ($PDFKEYWORDS)" >> $PSTMPFILE
327
- fi
328
- if [ "$PDFCREATOR" != "" ]; then
329
- echo " /Creator ($PDFCREATOR)" >> $PSTMPFILE
330
- fi
331
- echo \
332
- "% /Producer % Reserved to GS
333
- % /CreationDate % Reserved to GS
334
- % /ModDate % Reserved to GS" >> $PSTMPFILE
335
- if [ "$PDFTRAPPED" != "" ]; then
336
- echo " /Trapped $PDFTRAPPED" >> $PSTMPFILE
337
- fi
338
- echo \
339
- " /DOCINFO pdfmark
340
-
341
- % Define an ICC profile :
342
-
343
- [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
344
- [{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {systemdict /ProcessColorModel get /DeviceRGB eq {3} {4} ifelse} ifelse >> /PUT pdfmark
345
- [{icc_PDFA} ICCProfile (r) file /PUT pdfmark
346
-
347
- % Define the output intent dictionary :
348
-
349
- [/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
350
- [{OutputIntent_PDFA} <<
351
- /Type /OutputIntent % Must be so (the standard requires).
352
- /S /GTS_PDFA1 % Must be so (the standard requires).
353
- /DestOutputProfile {icc_PDFA} % Must be so (see above).
354
- /OutputConditionIdentifier (AdobeRGB1998) % Customize
355
- >> /PUT pdfmark
356
- [{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
357
- " >> $PSTMPFILE
358
-
359
-
360
- #=====# CREATE THE COLOR PROFILE FILE #=====#
361
- echo -n -e "\\x00\\x00\\x02\\x30\\x41\\x44\\x42\\x45\\x02\\x10\\x00\\x00\\x6d\\x6e\\x74\\x72\\x52\\x47\\x42\\x20\\x58\\x59\\x5a\\x20\\x07\\xd0\\x00\\x08\\x00\\x0b\\x00\\x13\\x00\\x33\\x00\\x3b\\x61\\x63\\x73\\x70\\x41\\x50\\x50\\x4c\\x00\\x00\\x00\\x00\\x6e\\x6f\\x6e\\x65\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3\\x2d\\x41\\x44\\x42\\x45\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0a\\x63\\x70\\x72\\x74\\x00\\x00\\x00\\xfc\\x00\\x00\\x00\\x32\\x64\\x65\\x73\\x63\\x00\\x00\\x01\\x30\\x00\\x00\\x00\\x6b\\x77\\x74\\x70\\x74\\x00\\x00\\x01\\x9c\\x00\\x00\\x00\\x14\\x62\\x6b\\x70\\x74\\x00\\x00\\x01\\xb0\\x00\\x00\\x00\\x14\\x72\\x54\\x52\\x43\\x00\\x00\\x01\\xc4\\x00\\x00\\x00\\x0e\\x67\\x54\\x52\\x43\\x00\\x00\\x01\\xd4\\x00\\x00\\x00\\x0e\\x62\\x54\\x52\\x43\\x00\\x00\\x01\\xe4\\x00\\x00\\x00\\x0e\\x72\\x58\\x59\\x5a\\x00\\x00\\x01\\xf4\\x00\\x00\\x00\\x14\\x67\\x58\\x59\\x5a\\x00\\x00\\x02\\x08\\x00\\x00\\x00\\x14\\x62\\x58\\x59\\x5a\\x00\\x00\\x02\\x1c\\x00\\x00\\x00\\x14\\x74\\x65\\x78\\x74\\x00\\x00\\x00\\x00\\x43\\x6f\\x70\\x79\\x72\\x69\\x67\\x68\\x74\\x20\\x32\\x30\\x30\\x30\\x20\\x41\\x64\\x6f\\x62\\x65\\x20\\x53\\x79\\x73\\x74\\x65\\x6d\\x73\\x20\\x49\\x6e\\x63\\x6f\\x72\\x70\\x6f\\x72\\x61\\x74\\x65\\x64\\x00\\x00\\x00\\x64\\x65\\x73\\x63\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x11\\x41\\x64\\x6f\\x62\\x65\\x20\\x52\\x47\\x42\\x20\\x28\\x31\\x39\\x39\\x38\\x29\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\xf3\\x51\\x00\\x01\\x00\\x00\\x00\\x01\\x16\\xcc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x9c\\x18\\x00\\x00\\x4f\\xa5\\x00\\x00\\x04\\xfc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x34\\x8d\\x00\\x00\\xa0\\x2c\\x00\\x00\\x0f\\x95\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x26\\x31\\x00\\x00\\x10\\x2f\\x00\\x00\\xbe\\x9c" > $ICCTMPFILE
362
-
363
-
364
- #=====# DO THE ACTUAL CONVERSION #=====#
365
- echo " Compressing PDF & embedding fonts..."
366
- run gs $MSGOPTS \
367
- -dBATCH -dNOPAUSE -dNOOUTERSAVE \
368
- -dCompatibilityLevel=1.4 \
369
- -dEmbedAllFonts=true -dSubsetFonts=true \
370
- -dCompressFonts=true -dCompressPages=true \
371
- -sColorConversionStrategy=RGB \
372
- -dDownsampleMonoImages=false -dDownsampleGrayImages=false -dDownsampleColorImages=false \
373
- -dAutoFilterColorImages=false -dAutoFilterGrayImages=false \
374
- -sDEVICE=pdfwrite \
375
- -sOutputFile="$TMPFILE" "$INPUT"
376
- echo " Converting to PDF/A-1B..."
377
- run gs $MSGOPTS \
378
- -dPDFA=1 -dBATCH -dNOPAUSE -dNOOUTERSAVE \
379
- $QUALITYOPTS \
380
- -dCompatibilityLevel=1.4 -dPDFACompatibilityPolicy=1 \
381
- -sProcessColorModel=DeviceRGB -sColorConversionStrategy=RGB \
382
- -sOutputICCProfile=$ICCTMPFILE \
383
- -sDEVICE=pdfwrite \
384
- -sOutputFile="$OUTPUT" "$TMPFILE" $PSTMPFILE
385
- echo " Removing temporary files..."
386
- rm $TMPFILE
387
- echo " Done, now ESSE3 is happy! ;)"
388
-
389
-
390
- #=====# VALIDATE THE RESULT #=====#
391
- if $VALIDATE; then
392
- validate $VERAMSGOPTS "$OUTPUT"
393
- else
394
- echo " Suggestion: validate the resulting PDF to be sure it's PDF/A-1B compliant."
395
- fi