libis-format 1.2.9 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -8
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +3 -16
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/config.rb +4 -3
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +21 -8
- data/lib/libis/format/converter/chain.rb +6 -18
- data/lib/libis/format/converter/fop_pdf_converter.rb +2 -0
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +21 -141
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +4 -4
- data/lib/libis/format/converter/office_converter.rb +5 -3
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +31 -98
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/spreadsheet_converter.rb +5 -3
- data/lib/libis/format/converter/video_converter.rb +3 -6
- data/lib/libis/format/converter/xslt_converter.rb +14 -15
- data/lib/libis/format/identifier.rb +4 -4
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +1 -10
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +1 -11
- data/lib/libis/format/tool/pdf_merge.rb +1 -11
- data/lib/libis/format/tool/pdf_optimizer.rb +2 -11
- data/lib/libis/format/tool/pdf_split.rb +16 -25
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
- data/lib/libis/format/tool/pdfa_validator.rb +30 -25
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +2 -10
- data/lib/libis/format/tool.rb +1 -2
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +5 -2
- data/libis-format.gemspec +18 -24
- metadata +78 -120
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -38
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/type_database.rb +0 -133
- data/lib/libis/format/type_database_impl.rb +0 -120
- data/tools/pdf2pdfa +0 -395
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,133 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
require 'yaml'
|
4
|
-
require 'libis/tools/extend/hash'
|
5
|
-
|
6
|
-
module Libis
|
7
|
-
module Format
|
8
|
-
|
9
|
-
# noinspection RubyClassVariableUsageInspection
|
10
|
-
class TypeDatabase
|
11
|
-
@implementation = Libis::Format::TypeDatabaseImpl.instance
|
12
|
-
|
13
|
-
def self.implementation(impl)
|
14
|
-
@implementation = impl
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.enrich(info, map_keys = {})
|
18
|
-
return {} unless info.is_a? Hash
|
19
|
-
mapper = Hash.new {|hash,key| hash[key] = key}
|
20
|
-
mapper.merge! map_keys
|
21
|
-
unless (puid = info[mapper[:PUID]]).blank?
|
22
|
-
info[mapper[:TYPE]] ||= puid_infos(puid).first[:TYPE] rescue nil
|
23
|
-
end
|
24
|
-
unless (mime = info[mapper[:MIME]]).blank?
|
25
|
-
info[mapper[:TYPE]] ||= mime_infos(mime).first[:TYPE] rescue nil
|
26
|
-
end
|
27
|
-
unless (type_name = info[mapper[:TYPE]]).nil?
|
28
|
-
mapper.keys.each do |key|
|
29
|
-
info[mapper[key]] = get(type_name, key) || info[mapper[key]]
|
30
|
-
end
|
31
|
-
info[mapper[:GROUP]] = type_group(type_name)
|
32
|
-
end
|
33
|
-
info
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.normalize(info, map_keys = {})
|
37
|
-
return {} unless info.is_a? Hash
|
38
|
-
mapper = Hash.new {|hash,key| hash[key] = key}
|
39
|
-
mapper.merge! map_keys
|
40
|
-
unless (puid = info[mapper[:PUID]]).blank?
|
41
|
-
info[mapper[:TYPE]] ||= puid_infos(puid).first[:TYPE] rescue nil
|
42
|
-
end
|
43
|
-
unless (mime = info[mapper[:MIME]]).blank?
|
44
|
-
info[mapper[:TYPE]] ||= mime_infos(mime).first[:TYPE] rescue nil
|
45
|
-
end
|
46
|
-
unless (type_name = info[mapper[:TYPE]]).nil?
|
47
|
-
info[mapper[:MIME]] = type_mimetypes(type_name).first if type_mimetypes(type_name).first
|
48
|
-
info[mapper[:GROUP]] = type_group(type_name)
|
49
|
-
end
|
50
|
-
info
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.get(type_name, key)
|
54
|
-
case key
|
55
|
-
when :MIME
|
56
|
-
type_mimetypes(type_name).first
|
57
|
-
when :PUID
|
58
|
-
type_puids(type_name).first
|
59
|
-
when :EXTENSION
|
60
|
-
type_extentions(type_name).first
|
61
|
-
else
|
62
|
-
typeinfo(type_name)[key]
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def self.type_group(t)
|
67
|
-
typeinfo(t)[:GROUP]
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.type_mimetypes(t)
|
71
|
-
typeinfo(t)[:MIME] || []
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.type_puids(t)
|
75
|
-
typeinfo(t)[:PUID] || []
|
76
|
-
end
|
77
|
-
|
78
|
-
def self.type_extentions(t)
|
79
|
-
typeinfo(t)[:EXTENSIONS] || []
|
80
|
-
end
|
81
|
-
|
82
|
-
def self.typeinfo(t)
|
83
|
-
@implementation.typeinfo(t)
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.group_types(group)
|
87
|
-
@implementation.group_types(group)
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.puid_infos(puid)
|
91
|
-
@implementation.puid_infos(puid)
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.puid_types(puid)
|
95
|
-
@implementation.puid_types(puid)
|
96
|
-
end
|
97
|
-
|
98
|
-
def self.puid_groups(puid)
|
99
|
-
puid_types(puid).map(&method(:type_group))
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.mime_infos(mime)
|
103
|
-
@implementation.mime_infos(mime)
|
104
|
-
end
|
105
|
-
|
106
|
-
def self.mime_types(mime)
|
107
|
-
@implementation.mime_types(mime)
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.mime_groups(mime)
|
111
|
-
mime_types(mime).map(&method(:type_group))
|
112
|
-
end
|
113
|
-
|
114
|
-
def self.ext_infos(ext)
|
115
|
-
@implementation.ext_infos(ext)
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.ext_types(ext)
|
119
|
-
@implementation.ext_types(ext)
|
120
|
-
end
|
121
|
-
|
122
|
-
def self.puid_typeinfo(puid)
|
123
|
-
@implementation.puid_typeinfo(puid)
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.known_mime?(mime)
|
127
|
-
@implementation.known_mime?(mime)
|
128
|
-
end
|
129
|
-
|
130
|
-
end
|
131
|
-
|
132
|
-
end
|
133
|
-
end
|
@@ -1,120 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
require 'singleton'
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
require 'libis/tools/logger'
|
7
|
-
require 'libis/tools/extend/hash'
|
8
|
-
require 'libis/tools/extend/string'
|
9
|
-
|
10
|
-
module Libis
|
11
|
-
module Format
|
12
|
-
|
13
|
-
class TypeDatabaseImpl
|
14
|
-
include Singleton
|
15
|
-
include ::Libis::Tools::Logger
|
16
|
-
|
17
|
-
def typeinfo(t)
|
18
|
-
@types[t.to_sym] || {}
|
19
|
-
end
|
20
|
-
|
21
|
-
def group_types(group)
|
22
|
-
@types.select do |_, v|
|
23
|
-
v[:GROUP] == group.to_sym
|
24
|
-
end.keys
|
25
|
-
end
|
26
|
-
|
27
|
-
def puid_infos(puid)
|
28
|
-
@types.select do |_, v|
|
29
|
-
v[:PUID].include? puid rescue false
|
30
|
-
end.values
|
31
|
-
end
|
32
|
-
|
33
|
-
def puid_types(puid)
|
34
|
-
@types.select do |_, v|
|
35
|
-
v[:PUID].include? puid rescue false
|
36
|
-
end.keys
|
37
|
-
end
|
38
|
-
|
39
|
-
def mime_infos(mime)
|
40
|
-
@types.select do |_, v|
|
41
|
-
v[:MIME].include? mime rescue false
|
42
|
-
end.values
|
43
|
-
end
|
44
|
-
|
45
|
-
def mime_types(mime)
|
46
|
-
@types.select do |_, v|
|
47
|
-
v[:MIME].include? mime rescue false
|
48
|
-
end.keys
|
49
|
-
end
|
50
|
-
|
51
|
-
def ext_infos(ext)
|
52
|
-
ext = ext.gsub /^\./, ''
|
53
|
-
@types.select do |_, v|
|
54
|
-
v[:EXTENSIONS].include?(ext) rescue false
|
55
|
-
end.values
|
56
|
-
end
|
57
|
-
|
58
|
-
def ext_types(ext)
|
59
|
-
ext = ext.gsub /^\./, ''
|
60
|
-
@types.select do |_, v|
|
61
|
-
v[:EXTENSIONS].include?(ext) rescue false
|
62
|
-
end.keys
|
63
|
-
end
|
64
|
-
|
65
|
-
def puid_typeinfo(puid)
|
66
|
-
@types.each do |_, v|
|
67
|
-
return v if v[:PUID] and v[:PUID].include?(puid)
|
68
|
-
end
|
69
|
-
nil
|
70
|
-
end
|
71
|
-
|
72
|
-
def known_mime?(mime)
|
73
|
-
@types.each do |_, v|
|
74
|
-
return true if v[:MIME].include? mime
|
75
|
-
end
|
76
|
-
false
|
77
|
-
end
|
78
|
-
|
79
|
-
def load_types(file_or_hash = {}, append = true)
|
80
|
-
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
81
|
-
# noinspection RubyResolve
|
82
|
-
hash.each do |group, type_info|
|
83
|
-
type_info.each do |type_name, info|
|
84
|
-
type_key = type_name.to_sym
|
85
|
-
info.symbolize_keys!
|
86
|
-
info[:TYPE] = type_key
|
87
|
-
info[:GROUP] = group.to_sym
|
88
|
-
info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map(&:strip) rescue []
|
89
|
-
info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
90
|
-
info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
|
91
|
-
if @types.has_key?(type_key)
|
92
|
-
warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
|
93
|
-
info.merge!(@types[type_key]) do |_,v_new,v_old|
|
94
|
-
case v_old
|
95
|
-
when Array
|
96
|
-
append ? v_old + v_new : v_new + v_old
|
97
|
-
when Hash
|
98
|
-
append ? v_new.merge(v_old) : v_old.merge(v_new)
|
99
|
-
else
|
100
|
-
append ? v_old : v_new
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
@types[type_key] = info
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
protected
|
110
|
-
|
111
|
-
def initialize
|
112
|
-
@types = Hash.new
|
113
|
-
type_database = Libis::Format::Config[:type_database]
|
114
|
-
load_types(type_database)
|
115
|
-
end
|
116
|
-
|
117
|
-
end
|
118
|
-
|
119
|
-
end
|
120
|
-
end
|
data/tools/pdf2pdfa
DELETED
@@ -1,395 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
# PDF2ARCHIVE 0.3.2
|
4
|
-
# (C) 2018 Matteo Seclì <secli.matteo@gmail.com>
|
5
|
-
#
|
6
|
-
# This program is free software: you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# This program is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
|
19
|
-
|
20
|
-
#=====# INITIALIZE VARIABLES #=====#
|
21
|
-
VERSION="0.3.2"
|
22
|
-
INPUT=""
|
23
|
-
OUTPUT=""
|
24
|
-
QUALITYOPTS=""
|
25
|
-
DEBUG=false
|
26
|
-
VALIDATE=false
|
27
|
-
MSGOPTS="-dQUIET -sstdout=/dev/null"
|
28
|
-
VERAMSGOPTS=""
|
29
|
-
#ERROPTS="2>/dev/null"
|
30
|
-
|
31
|
-
|
32
|
-
#=====# HELP FUNCTION #=====#
|
33
|
-
help()
|
34
|
-
{
|
35
|
-
TOTLEN="38" # Adjust this
|
36
|
-
TITLESTRING="PDF2ARCHIVE, version $VERSION"
|
37
|
-
SPACEL=$(echo "($TOTLEN-${#TITLESTRING})/2 + (36-${#TITLESTRING})%2 - 1" | bc)
|
38
|
-
SPACER=$(echo "($TOTLEN-${#TITLESTRING})/2 - 1" | bc)
|
39
|
-
TITLESTRING=$(printf "|%-${SPACEL}s%s%-${SPACER}s|" "" "$TITLESTRING" "")
|
40
|
-
DASHSTRING=$(eval printf "%.0s-" {1..$TOTLEN})
|
41
|
-
echo \
|
42
|
-
"$DASHSTRING
|
43
|
-
$TITLESTRING
|
44
|
-
$DASHSTRING
|
45
|
-
|
46
|
-
OVERVIEW:
|
47
|
-
A simple Ghostscript-based PDF to PDF/A-1B converter.
|
48
|
-
|
49
|
-
USAGE:
|
50
|
-
$0 [options] input.pdf [output.pdf]
|
51
|
-
|
52
|
-
EXAMPLES:
|
53
|
-
Convert 'input.pdf' in PDF/A-1B format; the output is 'input-PDFA.pdf':
|
54
|
-
$0 input.pdf
|
55
|
-
Convert 'input.pdf' in PDF/A-1B format; the output is 'output.pdf':
|
56
|
-
$0 input.pdf output.pdf
|
57
|
-
Convert 'input.pdf' in PDF/A-1B format and perform a high-quality compression:
|
58
|
-
$0 --quality=high input.pdf
|
59
|
-
Convert 'input.pdf' in PDF/A-1B format and specify the document title:
|
60
|
-
$0 --title=\"Title of your nice document\" input.pdf
|
61
|
-
Convert 'input.pdf' in PDF/A-1B format and validate the result:
|
62
|
-
$0 --validate input.pdf
|
63
|
-
|
64
|
-
OPTIONS:
|
65
|
-
-h, --help Show the help
|
66
|
-
--quality=<value> Set the quality of the output when downsampling. The
|
67
|
-
possible values are 'high', 'medium' and 'low', where
|
68
|
-
'high' gives the highest output quality. By specifying no
|
69
|
-
option, no additional downsampling is done.
|
70
|
-
--title=<value> Title of the resulting PDF/A file
|
71
|
-
--author=<value> Author of the resulting PDF/A file
|
72
|
-
--subject=<value> Subject of the resulting PDF/A file
|
73
|
-
--keywords=<value> Comma-separated keywords of the resulting PDF/A file
|
74
|
-
--cleanmetadata Clean all the standard metadata fields, except the ones
|
75
|
-
specified via the command line options.
|
76
|
-
--validate Validate the resulting file. The validation is done with
|
77
|
-
VeraPDF, you need a working Java installation.
|
78
|
-
--validate-only Perform only the validation on the input file, again using
|
79
|
-
VeraPDF
|
80
|
-
--debug Write additional debug information on screen
|
81
|
-
-v, --version Show the program version
|
82
|
-
|
83
|
-
LICENSE:
|
84
|
-
GPLv3
|
85
|
-
|
86
|
-
AUTHORS:
|
87
|
-
(C) 2017-2018 Matteo Seclì"
|
88
|
-
}
|
89
|
-
|
90
|
-
|
91
|
-
#=====# RUN HELPER FUNCTION #=====#
|
92
|
-
run() {
|
93
|
-
if $DEBUG; then
|
94
|
-
#v=$(exec 2>&1 && set -x && set -- "$@")
|
95
|
-
#echo "#${v#*--}"
|
96
|
-
"$@"
|
97
|
-
else
|
98
|
-
"$@" 2>/dev/null #>/dev/null 2>&1
|
99
|
-
fi
|
100
|
-
}
|
101
|
-
|
102
|
-
|
103
|
-
#=====# CHECKS #=====#
|
104
|
-
if [ "$(which gs)" == "" ]; then
|
105
|
-
echo " ERROR: Ghostscript is not installed or it's not in the path"
|
106
|
-
exit
|
107
|
-
fi
|
108
|
-
|
109
|
-
|
110
|
-
#=====# VALIDATION #=====#
|
111
|
-
|
112
|
-
javaCheck() {
|
113
|
-
if [ "$(which java)" == "" ]; then
|
114
|
-
echo " ERROR: Java is not installed or it's not in the path"
|
115
|
-
echo " Cannot perform validation"
|
116
|
-
exit 1
|
117
|
-
fi
|
118
|
-
}
|
119
|
-
|
120
|
-
validate() {
|
121
|
-
echo " Validating..."
|
122
|
-
echo " $(./verapdf/verapdf "$1" --extract --flavour 1b --format text "$2")"
|
123
|
-
}
|
124
|
-
|
125
|
-
|
126
|
-
#=====# INPUT PARSER #=====#
|
127
|
-
if [ "$1" == "" ]; then
|
128
|
-
help
|
129
|
-
exit
|
130
|
-
fi
|
131
|
-
while [ "$1" != "" ]; do
|
132
|
-
PARAM=`echo $1 | awk -F= '{print $1}'`
|
133
|
-
VALUE=`echo $1 | awk -F= '{print $2}'`
|
134
|
-
case $PARAM in
|
135
|
-
-h | --help)
|
136
|
-
help
|
137
|
-
exit
|
138
|
-
;;
|
139
|
-
-v | --version)
|
140
|
-
echo $VERSION
|
141
|
-
exit
|
142
|
-
;;
|
143
|
-
--debug)
|
144
|
-
DEBUG=true
|
145
|
-
MSGOPTS=""
|
146
|
-
VERAMSGOPTS="--verbose"
|
147
|
-
#ERROPTS=""
|
148
|
-
;;
|
149
|
-
--quality)
|
150
|
-
if [ "$VALUE" == "high" ]; then
|
151
|
-
QUALITYOPTS="-dPDFSETTINGS=/printer"
|
152
|
-
elif [ "$VALUE" == "medium" ]; then
|
153
|
-
QUALITYOPTS="-dPDFSETTINGS=/ebook"
|
154
|
-
elif [ "$VALUE" == "low" ]; then
|
155
|
-
QUALITYOPTS="-dPDFSETTINGS=/screen"
|
156
|
-
else
|
157
|
-
echo " ERROR: unknown quality option '$VALUE'"
|
158
|
-
help
|
159
|
-
exit 1
|
160
|
-
fi
|
161
|
-
;;
|
162
|
-
--cleanmetadata)
|
163
|
-
[ -z ${PDFTITLE+x} ] && PDFTITLE=""
|
164
|
-
[ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=""
|
165
|
-
[ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=""
|
166
|
-
[ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=""
|
167
|
-
[ -z ${PDFCREATOR+x} ] && PDFCREATOR=""
|
168
|
-
[ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=""
|
169
|
-
[ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=""
|
170
|
-
[ -z ${PDFMODDATE+x} ] && PDFMODDATE=""
|
171
|
-
[ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=""
|
172
|
-
;;
|
173
|
-
--title)
|
174
|
-
PDFTITLE=$VALUE
|
175
|
-
;;
|
176
|
-
--author)
|
177
|
-
PDFAUTHOR=$VALUE
|
178
|
-
;;
|
179
|
-
--subject)
|
180
|
-
PDFSUBJECT=$VALUE
|
181
|
-
;;
|
182
|
-
--keywords)
|
183
|
-
PDFKEYWORDS=$VALUE
|
184
|
-
;;
|
185
|
-
--validate)
|
186
|
-
javaCheck
|
187
|
-
VALIDATE=true
|
188
|
-
;;
|
189
|
-
--validate-only)
|
190
|
-
javaCheck
|
191
|
-
validate $VERAMSGOPTS $2
|
192
|
-
exit
|
193
|
-
;;
|
194
|
-
*.pdf)
|
195
|
-
if [ "$INPUT" == "" ]; then
|
196
|
-
INPUT=$PARAM
|
197
|
-
elif [ "$OUTPUT" == "" ]; then
|
198
|
-
OUTPUT=$PARAM
|
199
|
-
else
|
200
|
-
echo " ERROR: too many PDF files as input!"
|
201
|
-
help
|
202
|
-
exit 1
|
203
|
-
fi
|
204
|
-
;;
|
205
|
-
*)
|
206
|
-
echo " ERROR: unknown parameter \"$PARAM\""
|
207
|
-
help
|
208
|
-
exit 1
|
209
|
-
;;
|
210
|
-
esac
|
211
|
-
shift
|
212
|
-
done
|
213
|
-
|
214
|
-
#=====# SET UP ALL THE STUFF #=====#
|
215
|
-
echo "=== Welcome to PDF2ARCHIVE ==="
|
216
|
-
if [ "$OUTPUT" == "" ]; then
|
217
|
-
OUTPUT="${INPUT%.pdf}-PDFA.pdf"
|
218
|
-
fi
|
219
|
-
TMPFILE=$(mktemp)
|
220
|
-
TMPDIR=$(mktemp -d)
|
221
|
-
PSTMPFILE=$TMPDIR/PDFA_def.ps
|
222
|
-
ICCTMPFILE=$TMPDIR/AdobeRGB1998.icc
|
223
|
-
INFOTMPFILE=$TMPDIR/pdf_minimal_info.ps
|
224
|
-
echo \
|
225
|
-
"%!PS
|
226
|
-
% Extract PDF info in a minimal way.
|
227
|
-
% Inspired by 'toolbin/pdf_info.ps'.
|
228
|
-
|
229
|
-
/QUIET true def
|
230
|
-
File dup (r) file runpdfbegin
|
231
|
-
Trailer /Info knownoget {
|
232
|
-
dup /Title knownoget { (__knowninfoTitle: ) print = flush } if
|
233
|
-
dup /Author knownoget { (__knowninfoAuthor: ) print = flush } if
|
234
|
-
dup /Subject knownoget { (__knowninfoSubject: ) print = flush } if
|
235
|
-
dup /Keywords knownoget { (__knowninfoKeywords: ) print = flush } if
|
236
|
-
dup /Creator knownoget { (__knowninfoCreator: ) print = flush } if
|
237
|
-
dup /Producer knownoget { (__knowninfoProducer: ) print = flush } if
|
238
|
-
dup /CreationDate knownoget { (__knowninfoCreationDate: ) print = flush } if
|
239
|
-
dup /ModDate knownoget { (__knowninfoModDate: ) print = flush } if
|
240
|
-
dup /Trapped knownoget { (__knowninfoTrapped: ) print = flush } if
|
241
|
-
} if
|
242
|
-
quit
|
243
|
-
" > $INFOTMPFILE
|
244
|
-
|
245
|
-
|
246
|
-
#=====# PRESERVE UNSPECIFIED KNOWN STANDARD METADATA #=====#
|
247
|
-
# Notes:
|
248
|
-
# 'iconv' is necessary to filter out all the invalid bytes.
|
249
|
-
# If it's not used, sed (unless it's GNU sed) will fail with
|
250
|
-
# 'RE error: illegal byte sequence'. A solution to this is to
|
251
|
-
# use 'LC_CTYPE=C && LANG=C && echo "$METADUMP" ...' in the
|
252
|
-
# variable assignments; however, this produces bad PDF files.
|
253
|
-
#
|
254
|
-
METADUMP=$(gs -dNOSAFER -dNODISPLAY -q -sFile="$INPUT" $INFOTMPFILE | iconv -f utf-8 -t utf-8 -c)
|
255
|
-
[ -z ${PDFTITLE+x} ] && PDFTITLE=$(echo "$METADUMP" | grep "__knowninfoTitle: " | sed "s/^__knowninfoTitle: //g")
|
256
|
-
[ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=$(echo "$METADUMP" | grep "__knowninfoAuthor: " | sed "s/^__knowninfoAuthor: //g")
|
257
|
-
[ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=$(echo "$METADUMP" | grep "__knowninfoSubject: " | sed "s/^__knowninfoSubject: //g")
|
258
|
-
[ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=$(echo "$METADUMP" | grep "__knowninfoKeywords: " | sed "s/^__knowninfoKeywords: //g")
|
259
|
-
[ -z ${PDFCREATOR+x} ] && PDFCREATOR=$(echo "$METADUMP" | grep "__knowninfoCreator: " | sed "s/^__knowninfoCreator: //g")
|
260
|
-
[ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=$(echo "$METADUMP" | grep "__knowninfoProducer: " | sed "s/^__knowninfoProducer: //g")
|
261
|
-
[ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=$(echo "$METADUMP" | grep "__knowninfoCreationDate: " | sed "s/^__knowninfoCreationDate: //g")
|
262
|
-
[ -z ${PDFMODDATE+x} ] && PDFMODDATE=$(echo "$METADUMP" | grep "__knowninfoModDate: " | sed "s/^__knowninfoModDate: //g")
|
263
|
-
[ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=$(echo "$METADUMP" | grep "__knowninfoTrapped: " | sed "s/^__knowninfoTrapped: //g")
|
264
|
-
# Replace "Trapped" string, if not empty, with an operator. Fixes 3Heights.
|
265
|
-
if [ "$PDFTRAPPED" != "" ]; then
|
266
|
-
PDFTRAPPED="/$(tr '[:lower:]' '[:upper:]' <<< ${PDFTRAPPED:0:1})$(tr '[:upper:]' '[:lower:]' <<< ${PDFTRAPPED:1})"
|
267
|
-
fi
|
268
|
-
# Check if the operator is allowed, otherwise empty variable.
|
269
|
-
if [ "$PDFTRAPPED" != "/True" ] && [ "$PDFTRAPPED" != "/False" ]; then
|
270
|
-
PDFTRAPPED=""
|
271
|
-
fi
|
272
|
-
|
273
|
-
|
274
|
-
#=====# PRINT DEBUG INFO #=====#
|
275
|
-
if $DEBUG; then
|
276
|
-
echo " DEBUG: running PDF2ARCHIVE, version $VERSION"
|
277
|
-
echo " DEBUG: using Ghostscript binary at $(which gs), version $(gs --version)"
|
278
|
-
echo " DEBUG: the input file is '$INPUT'"
|
279
|
-
echo " DEBUG: the output file is '$OUTPUT'"
|
280
|
-
echo " DEBUG: the intermediate processing file is $TMPFILE"
|
281
|
-
echo " DEBUG: the temporary directory is $TMPDIR"
|
282
|
-
echo " DEBUG: the current quality options are '$QUALITYOPTS'"
|
283
|
-
echo " DEBUG: PDF title '$PDFTITLE'"
|
284
|
-
echo " DEBUG: PDF author '$PDFAUTHOR'"
|
285
|
-
echo " DEBUG: PDF subject '$PDFSUBJECT'"
|
286
|
-
echo " DEBUG: PDF keywords '$PDFKEYWORDS'"
|
287
|
-
echo " DEBUG: PDF creator '$PDFCREATOR'"
|
288
|
-
echo " DEBUG: PDF producer '$PDFPRODUCER'"
|
289
|
-
echo " DEBUG: PDF creation date '$PDFCREATIONDATE'"
|
290
|
-
echo " DEBUG: PDF modification date '$PDFMODDATE'"
|
291
|
-
echo " DEBUG: PDF trapping '$PDFTRAPPED'"
|
292
|
-
fi
|
293
|
-
|
294
|
-
|
295
|
-
#=====# CREATE THE PS DEFINITION FILE #=====#
|
296
|
-
echo " Creating the definition file..."
|
297
|
-
echo \
|
298
|
-
"%!
|
299
|
-
% This prefix file for creating a PDF/A document is derived from
|
300
|
-
% the sample included with Ghostscript 9.07, released under the
|
301
|
-
% GNU Affero General Public License.
|
302
|
-
% Modified 4/15/2013 by MCB Systems.
|
303
|
-
|
304
|
-
% Feel free to modify entries marked with \"Customize\".
|
305
|
-
|
306
|
-
% This assumes an ICC profile to reside in the file (AdobeRGB1998.icc),
|
307
|
-
% unless the user modifies the corresponding line below.
|
308
|
-
|
309
|
-
% The color space described by the ICC profile must correspond to the
|
310
|
-
% ProcessColorModel specified when using this prefix file (GRAY with
|
311
|
-
% DeviceGray, RGB with DeviceRGB, and CMYK with DeviceCMYK).
|
312
|
-
|
313
|
-
% Define entries in the document Info dictionary :
|
314
|
-
|
315
|
-
/ICCProfile ($ICCTMPFILE) % Customize.
|
316
|
-
def
|
317
|
-
|
318
|
-
[ /Title ($PDFTITLE) % Customize." > $PSTMPFILE
|
319
|
-
if [ "$PDFAUTHOR" != "" ]; then
|
320
|
-
echo " /Author ($PDFAUTHOR)" >> $PSTMPFILE
|
321
|
-
fi
|
322
|
-
if [ "$PDFSUBJECT" != "" ]; then
|
323
|
-
echo " /Subject ($PDFSUBJECT)" >> $PSTMPFILE
|
324
|
-
fi
|
325
|
-
if [ "$PDFKEYWORDS" != "" ]; then
|
326
|
-
echo " /Keywords ($PDFKEYWORDS)" >> $PSTMPFILE
|
327
|
-
fi
|
328
|
-
if [ "$PDFCREATOR" != "" ]; then
|
329
|
-
echo " /Creator ($PDFCREATOR)" >> $PSTMPFILE
|
330
|
-
fi
|
331
|
-
echo \
|
332
|
-
"% /Producer % Reserved to GS
|
333
|
-
% /CreationDate % Reserved to GS
|
334
|
-
% /ModDate % Reserved to GS" >> $PSTMPFILE
|
335
|
-
if [ "$PDFTRAPPED" != "" ]; then
|
336
|
-
echo " /Trapped $PDFTRAPPED" >> $PSTMPFILE
|
337
|
-
fi
|
338
|
-
echo \
|
339
|
-
" /DOCINFO pdfmark
|
340
|
-
|
341
|
-
% Define an ICC profile :
|
342
|
-
|
343
|
-
[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
|
344
|
-
[{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {systemdict /ProcessColorModel get /DeviceRGB eq {3} {4} ifelse} ifelse >> /PUT pdfmark
|
345
|
-
[{icc_PDFA} ICCProfile (r) file /PUT pdfmark
|
346
|
-
|
347
|
-
% Define the output intent dictionary :
|
348
|
-
|
349
|
-
[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
|
350
|
-
[{OutputIntent_PDFA} <<
|
351
|
-
/Type /OutputIntent % Must be so (the standard requires).
|
352
|
-
/S /GTS_PDFA1 % Must be so (the standard requires).
|
353
|
-
/DestOutputProfile {icc_PDFA} % Must be so (see above).
|
354
|
-
/OutputConditionIdentifier (AdobeRGB1998) % Customize
|
355
|
-
>> /PUT pdfmark
|
356
|
-
[{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
|
357
|
-
" >> $PSTMPFILE
|
358
|
-
|
359
|
-
|
360
|
-
#=====# CREATE THE COLOR PROFILE FILE #=====#
|
361
|
-
echo -n -e "\\x00\\x00\\x02\\x30\\x41\\x44\\x42\\x45\\x02\\x10\\x00\\x00\\x6d\\x6e\\x74\\x72\\x52\\x47\\x42\\x20\\x58\\x59\\x5a\\x20\\x07\\xd0\\x00\\x08\\x00\\x0b\\x00\\x13\\x00\\x33\\x00\\x3b\\x61\\x63\\x73\\x70\\x41\\x50\\x50\\x4c\\x00\\x00\\x00\\x00\\x6e\\x6f\\x6e\\x65\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3\\x2d\\x41\\x44\\x42\\x45\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0a\\x63\\x70\\x72\\x74\\x00\\x00\\x00\\xfc\\x00\\x00\\x00\\x32\\x64\\x65\\x73\\x63\\x00\\x00\\x01\\x30\\x00\\x00\\x00\\x6b\\x77\\x74\\x70\\x74\\x00\\x00\\x01\\x9c\\x00\\x00\\x00\\x14\\x62\\x6b\\x70\\x74\\x00\\x00\\x01\\xb0\\x00\\x00\\x00\\x14\\x72\\x54\\x52\\x43\\x00\\x00\\x01\\xc4\\x00\\x00\\x00\\x0e\\x67\\x54\\x52\\x43\\x00\\x00\\x01\\xd4\\x00\\x00\\x00\\x0e\\x62\\x54\\x52\\x43\\x00\\x00\\x01\\xe4\\x00\\x00\\x00\\x0e\\x72\\x58\\x59\\x5a\\x00\\x00\\x01\\xf4\\x00\\x00\\x00\\x14\\x67\\x58\\x59\\x5a\\x00\\x00\\x02\\x08\\x00\\x00\\x00\\x14\\x62\\x58\\x59\\x5a\\x00\\x00\\x02\\x1c\\x00\\x00\\x00\\x14\\x74\\x65\\x78\\x74\\x00\\x00\\x00\\x00\\x43\\x6f\\x70\\x79\\x72\\x69\\x67\\x68\\x74\\x20\\x32\\x30\\x30\\x30\\x20\\x41\\x64\\x6f\\x62\\x65\\x20\\x53\\x79\\x73\\x74\\x65\\x6d\\x73\\x20\\x49\\x6e\\x63\\x6f\\x72\\x70\\x6f\\x72\\x61\\x74\\x65\\x64\\x00\\x00\\x00\\x64\\x65\\x73\\x63\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x11\\x41\\x64\\x6f\\x62\\x65\\x20\\x52\\x47\\x42\\x20\\x28\\x31\\x39\\x39\\x38\\x29\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\xf3\\x51\\x00\\x01\\x00\\x00\\x00\\x01\\x16\\xcc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x9c\\x18\\x00\\x00\\x4f\\xa5\\x00\\x00\\x04\\xfc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x34\\x8d\\x00\\x00\\xa0\\x2c\\x00\\x00\\x0f\\x95\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x26\\x31\\x00\\x00\\x10\\x2f\\x00\\x00\\xbe\\x9c" > $ICCTMPFILE
|
362
|
-
|
363
|
-
|
364
|
-
#=====# DO THE ACTUAL CONVERSION #=====#
|
365
|
-
echo " Compressing PDF & embedding fonts..."
|
366
|
-
run gs $MSGOPTS \
|
367
|
-
-dBATCH -dNOPAUSE -dNOOUTERSAVE \
|
368
|
-
-dCompatibilityLevel=1.4 \
|
369
|
-
-dEmbedAllFonts=true -dSubsetFonts=true \
|
370
|
-
-dCompressFonts=true -dCompressPages=true \
|
371
|
-
-sColorConversionStrategy=RGB \
|
372
|
-
-dDownsampleMonoImages=false -dDownsampleGrayImages=false -dDownsampleColorImages=false \
|
373
|
-
-dAutoFilterColorImages=false -dAutoFilterGrayImages=false \
|
374
|
-
-sDEVICE=pdfwrite \
|
375
|
-
-sOutputFile="$TMPFILE" "$INPUT"
|
376
|
-
echo " Converting to PDF/A-1B..."
|
377
|
-
run gs $MSGOPTS \
|
378
|
-
-dPDFA=1 -dBATCH -dNOPAUSE -dNOOUTERSAVE \
|
379
|
-
$QUALITYOPTS \
|
380
|
-
-dCompatibilityLevel=1.4 -dPDFACompatibilityPolicy=1 \
|
381
|
-
-sProcessColorModel=DeviceRGB -sColorConversionStrategy=RGB \
|
382
|
-
-sOutputICCProfile=$ICCTMPFILE \
|
383
|
-
-sDEVICE=pdfwrite \
|
384
|
-
-sOutputFile="$OUTPUT" "$TMPFILE" $PSTMPFILE
|
385
|
-
echo " Removing temporary files..."
|
386
|
-
rm $TMPFILE
|
387
|
-
echo " Done, now ESSE3 is happy! ;)"
|
388
|
-
|
389
|
-
|
390
|
-
#=====# VALIDATE THE RESULT #=====#
|
391
|
-
if $VALIDATE; then
|
392
|
-
validate $VERAMSGOPTS "$OUTPUT"
|
393
|
-
else
|
394
|
-
echo " Suggestion: validate the resulting PDF to be sure it's PDF/A-1B compliant."
|
395
|
-
fi
|
/data/bin/{droid_tool → droid}
RENAMED
File without changes
|
/data/bin/{fido_tool → fido}
RENAMED
File without changes
|