libis-format 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/data/AdobeRGB1998.icc +0 -0
- data/data/PDFA_def.ps +3 -3
- data/lib/libis/format/config.rb +1 -1
- data/lib/libis/format/converter/pdf_converter.rb +10 -4
- data/lib/libis/format/converter/xslt_converter.rb +15 -14
- data/lib/libis/format/tool/email_to_pdf.rb +4 -4
- data/lib/libis/format/tool/fop_pdf.rb +1 -0
- data/lib/libis/format/tool/office_to_pdf.rb +3 -3
- data/lib/libis/format/tool/pdf_to_pdfa.rb +51 -44
- data/lib/libis/format/tool/pdfa_validator.rb +28 -35
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +3 -3
- data/lib/libis/format/version.rb +1 -1
- data/libis-format.gemspec +2 -0
- data/tools/pdf2pdfa +395 -0
- metadata +18 -3
- data/data/eciRGB_v2.icc +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8f2304a5f189140ad867cb1d7d9d41f280f796992c84514316b7e584628a64b
|
4
|
+
data.tar.gz: ee097826d8e73fa7fa72167b4a3eaebef5992bba8fd78a6e0d454fe552730d6c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fdb85d3847ba3ed08c96cc409b5e7f2c471d80edf61ab6581b28cb46380e1b48e8c65daf182717dd35cf216d91e50f1680d31a352d9aab557d54f7f3af61dc8
|
7
|
+
data.tar.gz: 0d8d4f7c29c0fa5da26a85eb82554796cb1ac18f98e00e3f1e700bd4eb79f647b8311f56e2eb7a46cc1679a2809fc140b1cc702856da1baacafd5127a65ea66f
|
data/.gitignore
CHANGED
Binary file
|
data/data/PDFA_def.ps
CHANGED
@@ -5,10 +5,10 @@
|
|
5
5
|
% unless the user modifies the corresponding line below.
|
6
6
|
|
7
7
|
% Define entries in the document Info dictionary :
|
8
|
-
/ICCProfile ([**
|
8
|
+
/ICCProfile ([**ICC profile**])
|
9
9
|
def
|
10
10
|
|
11
|
-
[
|
11
|
+
[ [**METADATA**]
|
12
12
|
/DOCINFO pdfmark
|
13
13
|
|
14
14
|
% Define an ICC profile :
|
@@ -35,6 +35,6 @@ def
|
|
35
35
|
/Type /OutputIntent % Must be so (the standard requires).
|
36
36
|
/S /GTS_PDFA1 % Must be so (the standard requires).
|
37
37
|
/DestOutputProfile {icc_PDFA} % Must be so (see above).
|
38
|
-
/OutputConditionIdentifier ([**
|
38
|
+
/OutputConditionIdentifier ([**ICC reference**])
|
39
39
|
>> /PUT pdfmark
|
40
40
|
[{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
|
data/lib/libis/format/config.rb
CHANGED
@@ -21,7 +21,7 @@ module Libis
|
|
21
21
|
Config[:fido_formats] = [(File.join(Libis::Format::DATA_DIR, 'lias_formats.xml'))]
|
22
22
|
Config[:pdf_tool] = File.join(Libis::Format::TOOL_DIR, 'PdfTool.jar')
|
23
23
|
Config[:preflight_jar] = File.join(Libis::Format::TOOL_DIR, 'pdfbox', 'preflight-app-2.0.13.jar')
|
24
|
-
Config[:
|
24
|
+
Config[:email2pdf_jar] = File.join(Libis::Format::TOOL_DIR, 'emailconverter.jar')
|
25
25
|
# noinspection RubyStringKeysInHashInspection
|
26
26
|
Config[:xml_validations] = [['archive/ead', File.join(Libis::Format::DATA_DIR, 'ead.xsd')]]
|
27
27
|
Config[:type_database] = File.join(Libis::Format::DATA_DIR, 'types.yml')
|
@@ -5,6 +5,7 @@ require_relative 'base'
|
|
5
5
|
require 'libis/tools/extend/hash'
|
6
6
|
require 'libis/format/tool/pdf_copy'
|
7
7
|
require 'libis/format/tool/pdf_to_pdfa'
|
8
|
+
require 'libis/format/tool/pdfa_validator'
|
8
9
|
require 'libis/format/tool/pdf_optimizer'
|
9
10
|
|
10
11
|
module Libis
|
@@ -120,7 +121,7 @@ module Libis
|
|
120
121
|
end
|
121
122
|
|
122
123
|
{
|
123
|
-
files: [result]
|
124
|
+
files: [result],
|
124
125
|
converter: self.class.name
|
125
126
|
}
|
126
127
|
|
@@ -163,11 +164,16 @@ module Libis
|
|
163
164
|
|
164
165
|
using_temp(target) do |tmpname|
|
165
166
|
result = Libis::Format::Tool::PdfToPdfa.run source, tmpname
|
166
|
-
|
167
|
-
|
167
|
+
|
168
|
+
if result[:command][:status] != 0
|
169
|
+
error("Pdf/A conversion encountered errors:\n%s", (result[:command][:out] + result[:command][:err]).join("\n"))
|
168
170
|
next nil
|
169
171
|
else
|
170
|
-
|
172
|
+
r = Libis::Format::Tool::PdfaValidator.run tmpname
|
173
|
+
if r[:status] != 0
|
174
|
+
error "Pdf/A file failed to validate with following errors:\n%s", (r[:err] || r[:out] || []).join("\n")
|
175
|
+
next nil
|
176
|
+
end
|
171
177
|
end
|
172
178
|
tmpname
|
173
179
|
end
|
@@ -1,17 +1,17 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative "base"
|
2
2
|
|
3
3
|
module Libis
|
4
4
|
module Format
|
5
5
|
module Converter
|
6
|
-
|
7
6
|
class XsltConverter < Libis::Format::Converter::Base
|
8
|
-
|
9
7
|
def self.input_types
|
10
8
|
[:XML]
|
11
9
|
end
|
12
10
|
|
13
11
|
def self.output_types(format = nil)
|
14
|
-
|
12
|
+
if format
|
13
|
+
return [] unless input_types.include?(format)
|
14
|
+
end
|
15
15
|
[:XML, :HTML, :TXT]
|
16
16
|
end
|
17
17
|
|
@@ -28,20 +28,20 @@ module Libis
|
|
28
28
|
end
|
29
29
|
|
30
30
|
unless @options[:xsl_file]
|
31
|
-
error
|
31
|
+
error "No xsl_file supplied"
|
32
32
|
return nil
|
33
33
|
end
|
34
34
|
|
35
35
|
FileUtils.mkpath(File.dirname(target))
|
36
36
|
|
37
37
|
if RUBY_PLATFORM == "java"
|
38
|
-
require
|
38
|
+
require "saxon-xslt"
|
39
39
|
xsl = Saxon.XSLT(File.open(@options[:xsl_file]))
|
40
40
|
xml = Saxon.XML(File.open(source))
|
41
41
|
result = xsl.transform(xml)
|
42
|
-
File.
|
42
|
+
File.write(target, result.to_s)
|
43
43
|
else
|
44
|
-
require
|
44
|
+
require "nokogiri"
|
45
45
|
|
46
46
|
doc = nil
|
47
47
|
begin
|
@@ -65,7 +65,7 @@ module Libis
|
|
65
65
|
xsl = nil
|
66
66
|
|
67
67
|
begin
|
68
|
-
fp = File.open(file,
|
68
|
+
fp = File.open(file, "r")
|
69
69
|
xsl = Nokogiri::XSLT(fp) do |config|
|
70
70
|
config.options = Nokogiri::XML::ParseOptions::STRICT | Nokogiri::XML::ParseOptions::NOBLANKS
|
71
71
|
end
|
@@ -80,7 +80,7 @@ module Libis
|
|
80
80
|
|
81
81
|
begin
|
82
82
|
target_xml = xsl.transform(doc)
|
83
|
-
fp = File.open(target,
|
83
|
+
fp = File.open(target, "w")
|
84
84
|
fp.write(target_xml)
|
85
85
|
rescue Exception => e
|
86
86
|
error "Error transforming '#{source}' with '#{file}': #{e.message} @ #{e.backtrace[0]}"
|
@@ -89,13 +89,14 @@ module Libis
|
|
89
89
|
fp.close unless fp.nil? or fp.closed?
|
90
90
|
end
|
91
91
|
|
92
|
-
|
93
|
-
|
92
|
+
{
|
93
|
+
command: {status: 0},
|
94
|
+
files: [target]
|
95
|
+
}
|
94
96
|
|
97
|
+
end
|
95
98
|
end
|
96
|
-
|
97
99
|
end
|
98
|
-
|
99
100
|
end
|
100
101
|
end
|
101
102
|
end
|
@@ -26,8 +26,8 @@ module Libis
|
|
26
26
|
timeout = Libis::Format::Config[:timeouts][:email2pdf] || 120
|
27
27
|
result = Libis::Tools::Command.run(
|
28
28
|
Libis::Format::Config[:java_cmd],
|
29
|
-
"-Duser.timezone=Europe/Brussels -Duser.language=nl -Duser.country=BE",
|
30
|
-
"jar", Libis::Format::Config[:
|
29
|
+
"-Duser.timezone=Europe/Brussels", "-Duser.language=nl", "-Duser.country=BE",
|
30
|
+
"-jar", Libis::Format::Config[:email2pdf_jar],
|
31
31
|
"-e", "-hd", "-a",
|
32
32
|
"-o", target,
|
33
33
|
source,
|
@@ -35,7 +35,7 @@ module Libis
|
|
35
35
|
kill_after: timeout * 2
|
36
36
|
)
|
37
37
|
|
38
|
-
warn "EmailToPdf conversion messages: \n\t#{result[:
|
38
|
+
warn "EmailToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
39
39
|
|
40
40
|
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
41
41
|
raise "#{self.class} failed to generate target file #{target}" unless File.exist?(target)
|
@@ -59,7 +59,7 @@ module Libis
|
|
59
59
|
return headers unless File.exist?(headers_file)
|
60
60
|
doc = REXML::Document.new(File.new(headers_file))
|
61
61
|
root = doc.root
|
62
|
-
root.
|
62
|
+
root.children.each do |element|
|
63
63
|
case element.name
|
64
64
|
when "attachments"
|
65
65
|
headers[:attachments] = element.elements.map { |e| e.text }
|
@@ -33,6 +33,7 @@ module Libis
|
|
33
33
|
result = Libis::Tools::Command.run(
|
34
34
|
Libis::Format::Config[:java_cmd],
|
35
35
|
"-Dfop.home=#{File.dirname(Libis::Format::Config[:fop_jar])}",
|
36
|
+
'-Djava.awt.headless=true',
|
36
37
|
'-jar', Libis::Format::Config[:fop_jar],
|
37
38
|
'-fo', xml,
|
38
39
|
'-pdf', target,
|
@@ -52,14 +52,14 @@ module Libis
|
|
52
52
|
|
53
53
|
FileUtils.copy tgt_file, target, preserve: true
|
54
54
|
|
55
|
-
ensure
|
56
|
-
FileUtils.rmtree workdir rescue nil
|
57
|
-
|
58
55
|
{
|
59
56
|
command: result,
|
60
57
|
files: [ target ]
|
61
58
|
}
|
62
59
|
|
60
|
+
ensure
|
61
|
+
FileUtils.rmtree workdir rescue nil
|
62
|
+
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
@@ -1,18 +1,18 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require "tempfile"
|
2
|
+
require "csv"
|
3
|
+
require "fileutils"
|
4
|
+
require 'pdfinfo'
|
4
5
|
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
6
|
+
require "libis/tools/extend/string"
|
7
|
+
require "libis/tools/logger"
|
8
|
+
require "libis/tools/command"
|
9
|
+
require "libis/tools/temp_file"
|
9
10
|
|
10
|
-
require
|
11
|
+
require "libis/format"
|
11
12
|
|
12
13
|
module Libis
|
13
14
|
module Format
|
14
15
|
module Tool
|
15
|
-
|
16
16
|
class PdfToPdfa
|
17
17
|
include ::Libis::Tools::Logger
|
18
18
|
|
@@ -22,71 +22,78 @@ module Libis
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def self.run(source, target = nil, options = {})
|
25
|
-
|
25
|
+
new.run source, target, options
|
26
26
|
end
|
27
27
|
|
28
28
|
def run(source, target = nil, options = nil)
|
29
|
-
|
30
|
-
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
29
|
+
tmp_target = Tools::TempFile.name(File.basename(source, ".*"), ".pdf")
|
31
30
|
target ||= tmp_target
|
32
31
|
|
32
|
+
metadata = get_metadata(source)
|
33
|
+
|
33
34
|
icc_info = icc_options(options[:colorspace])
|
34
35
|
|
35
|
-
icc_file = Tools::TempFile.name(icc_info[:icc_name],
|
36
|
+
icc_file = Tools::TempFile.name(icc_info[:icc_name], ".icc")
|
36
37
|
FileUtils.cp(File.join(Libis::Format::DATA_DIR, "#{icc_info[:icc_name]}.icc"), icc_file)
|
37
38
|
|
38
|
-
def_filename = Tools::TempFile.name(
|
39
|
-
File.open(def_filename,
|
40
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR,
|
41
|
-
|
42
|
-
|
39
|
+
def_filename = Tools::TempFile.name("PDFA_def", ".ps")
|
40
|
+
File.open(def_filename, "w") do |f|
|
41
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, "PDFA_def.ps"))
|
42
|
+
.gsub("[**ICC profile**]", icc_file)
|
43
|
+
.gsub("[**ICC reference**]", icc_info[:icc_ref])
|
44
|
+
.gsub("[**METADATA**]", metadata)
|
43
45
|
end
|
44
46
|
|
45
47
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
46
48
|
result = Libis::Tools::Command.run(
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
Libis::Format::Config[:ghostscript_cmd],
|
50
|
+
"-q",
|
51
|
+
"-dBATCH", "-dNOPAUSE", "-dNOOUTERSAVE", "-dNOSAFER",
|
52
|
+
# "-dNOPLATFONTS", "-dUseCIEColor=true",
|
53
|
+
# "-sColorConversionStrategy=/UseDeviceIndependentColor",
|
54
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
55
|
+
"-sOutputICCProfile=#{icc_file}",
|
56
|
+
"-dCompatibilityLevel=1.4",
|
57
|
+
"-sDEVICE=pdfwrite", "-dPDFA=1", "-dPDFACompatibilityPolicy=1",
|
58
|
+
"-o", File.absolute_path(target),
|
59
|
+
def_filename,
|
60
|
+
source,
|
61
|
+
timeout: timeout,
|
62
|
+
kill_after: timeout * 2
|
58
63
|
)
|
59
64
|
|
60
|
-
raise
|
65
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
66
|
+
raise "#{self.class} failed with error #{result[:status]}: \n#{(result[:out] + result[:err]).join("\n")}" if result[:status] != 0
|
61
67
|
|
62
68
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
63
69
|
|
64
|
-
unless Format::Tool::PdfaValidator.run(target)
|
65
|
-
result[:status] = -999
|
66
|
-
result[:err] << 'Failed to validate generated PDF/A file.'
|
67
|
-
end
|
68
|
-
|
69
70
|
{
|
70
71
|
command: result,
|
71
|
-
files: [
|
72
|
+
files: [target]
|
72
73
|
}
|
73
|
-
|
74
74
|
end
|
75
75
|
|
76
|
-
|
77
76
|
private
|
78
77
|
|
78
|
+
def get_metadata(source)
|
79
|
+
info = Pdfinfo.new(source)
|
80
|
+
metadata = "/Title (#{info.title})"
|
81
|
+
metadata += "\n /Author (#{info.author})" if info.author
|
82
|
+
metadata += "\n /Subject (#{info.subject})" if info.subject
|
83
|
+
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
84
|
+
metadata += "\n /Creator (#{info.creator})" if info.creator
|
85
|
+
metadata
|
86
|
+
end
|
87
|
+
|
79
88
|
def icc_options(colorspace)
|
80
89
|
case colorspace.to_s.downcase
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
90
|
+
when "cmyk"
|
91
|
+
{icc_name: "ISOcoated_v2_eci", icc_ref: "FOGRA39L", device: "DeviceCMYK"}
|
92
|
+
else
|
93
|
+
{icc_name: "AdobeRGB1998", icc_ref: "sRGB", device: "DeviceRGB"}
|
85
94
|
end
|
86
95
|
end
|
87
|
-
|
88
96
|
end
|
89
|
-
|
90
97
|
end
|
91
98
|
end
|
92
99
|
end
|
@@ -1,24 +1,22 @@
|
|
1
|
-
require
|
1
|
+
require "fileutils"
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
3
|
+
require "libis/tools/extend/string"
|
4
|
+
require "libis/tools/logger"
|
5
|
+
require "libis/tools/command"
|
6
6
|
|
7
|
-
require
|
7
|
+
require "libis/format/config"
|
8
8
|
|
9
9
|
module Libis
|
10
10
|
module Format
|
11
11
|
module Tool
|
12
|
-
|
13
12
|
class PdfaValidator
|
14
13
|
include ::Libis::Tools::Logger
|
15
14
|
|
16
15
|
def self.run(source)
|
17
|
-
|
16
|
+
new.run source
|
18
17
|
end
|
19
18
|
|
20
19
|
def run(source)
|
21
|
-
|
22
20
|
src_file = File.absolute_path(source)
|
23
21
|
|
24
22
|
timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
|
@@ -28,46 +26,41 @@ module Libis
|
|
28
26
|
Dir.chdir(Dir.tmpdir)
|
29
27
|
|
30
28
|
result = Libis::Tools::Command.run(
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
29
|
+
pdfa,
|
30
|
+
"--noxml",
|
31
|
+
"--level", "B",
|
32
|
+
"--verb", "0",
|
33
|
+
src_file,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
38
36
|
)
|
39
37
|
|
40
|
-
raise
|
41
|
-
raise
|
38
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
42
40
|
|
43
41
|
Dir.chdir(previous_wd)
|
44
42
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
43
|
+
out, err = result[:out].partition { |line| line =~ /^VLD-\[PASS\]/ }
|
44
|
+
result[:out] = out
|
45
|
+
result[:err] += err
|
46
|
+
|
47
|
+
result
|
50
48
|
else
|
51
49
|
jar = Libis::Format::Config[:preflight_jar]
|
52
50
|
result = Libis::Tools::Command.run(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
51
|
+
Libis::Format::Config[:java_cmd],
|
52
|
+
"-jar", jar,
|
53
|
+
src_file,
|
54
|
+
timeout: timeout,
|
55
|
+
kill_after: timeout * 2
|
58
56
|
)
|
59
|
-
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
60
57
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
return false
|
65
|
-
end
|
58
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
59
|
+
|
60
|
+
result
|
66
61
|
end
|
67
|
-
true
|
68
62
|
end
|
69
63
|
end
|
70
|
-
|
71
64
|
end
|
72
65
|
end
|
73
66
|
end
|
@@ -53,14 +53,14 @@ module Libis
|
|
53
53
|
|
54
54
|
FileUtils.copy tgt_file, target, preserve: true
|
55
55
|
|
56
|
-
ensure
|
57
|
-
FileUtils.rmtree workdir rescue nil
|
58
|
-
|
59
56
|
{
|
60
57
|
command: result,
|
61
58
|
files: [ target ]
|
62
59
|
}
|
63
60
|
|
61
|
+
ensure
|
62
|
+
FileUtils.rmtree workdir rescue nil
|
63
|
+
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
data/lib/libis/format/version.rb
CHANGED
data/libis-format.gemspec
CHANGED
data/tools/pdf2pdfa
ADDED
@@ -0,0 +1,395 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# PDF2ARCHIVE 0.3.2
|
4
|
+
# (C) 2018 Matteo Seclì <secli.matteo@gmail.com>
|
5
|
+
#
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
|
19
|
+
|
20
|
+
#=====# INITIALIZE VARIABLES #=====#
|
21
|
+
VERSION="0.3.2"
|
22
|
+
INPUT=""
|
23
|
+
OUTPUT=""
|
24
|
+
QUALITYOPTS=""
|
25
|
+
DEBUG=false
|
26
|
+
VALIDATE=false
|
27
|
+
MSGOPTS="-dQUIET -sstdout=/dev/null"
|
28
|
+
VERAMSGOPTS=""
|
29
|
+
#ERROPTS="2>/dev/null"
|
30
|
+
|
31
|
+
|
32
|
+
#=====# HELP FUNCTION #=====#
|
33
|
+
help()
|
34
|
+
{
|
35
|
+
TOTLEN="38" # Adjust this
|
36
|
+
TITLESTRING="PDF2ARCHIVE, version $VERSION"
|
37
|
+
SPACEL=$(echo "($TOTLEN-${#TITLESTRING})/2 + (36-${#TITLESTRING})%2 - 1" | bc)
|
38
|
+
SPACER=$(echo "($TOTLEN-${#TITLESTRING})/2 - 1" | bc)
|
39
|
+
TITLESTRING=$(printf "|%-${SPACEL}s%s%-${SPACER}s|" "" "$TITLESTRING" "")
|
40
|
+
DASHSTRING=$(eval printf "%.0s-" {1..$TOTLEN})
|
41
|
+
echo \
|
42
|
+
"$DASHSTRING
|
43
|
+
$TITLESTRING
|
44
|
+
$DASHSTRING
|
45
|
+
|
46
|
+
OVERVIEW:
|
47
|
+
A simple Ghostscript-based PDF to PDF/A-1B converter.
|
48
|
+
|
49
|
+
USAGE:
|
50
|
+
$0 [options] input.pdf [output.pdf]
|
51
|
+
|
52
|
+
EXAMPLES:
|
53
|
+
Convert 'input.pdf' in PDF/A-1B format; the output is 'input-PDFA.pdf':
|
54
|
+
$0 input.pdf
|
55
|
+
Convert 'input.pdf' in PDF/A-1B format; the output is 'output.pdf':
|
56
|
+
$0 input.pdf output.pdf
|
57
|
+
Convert 'input.pdf' in PDF/A-1B format and perform a high-quality compression:
|
58
|
+
$0 --quality=high input.pdf
|
59
|
+
Convert 'input.pdf' in PDF/A-1B format and specify the document title:
|
60
|
+
$0 --title=\"Title of your nice document\" input.pdf
|
61
|
+
Convert 'input.pdf' in PDF/A-1B format and validate the result:
|
62
|
+
$0 --validate input.pdf
|
63
|
+
|
64
|
+
OPTIONS:
|
65
|
+
-h, --help Show the help
|
66
|
+
--quality=<value> Set the quality of the output when downsampling. The
|
67
|
+
possible values are 'high', 'medium' and 'low', where
|
68
|
+
'high' gives the highest output quality. By specifying no
|
69
|
+
option, no additional downsampling is done.
|
70
|
+
--title=<value> Title of the resulting PDF/A file
|
71
|
+
--author=<value> Author of the resulting PDF/A file
|
72
|
+
--subject=<value> Subject of the resulting PDF/A file
|
73
|
+
--keywords=<value> Comma-separated keywords of the resulting PDF/A file
|
74
|
+
--cleanmetadata Clean all the standard metadata fields, except the ones
|
75
|
+
specified via the command line options.
|
76
|
+
--validate Validate the resulting file. The validation is done with
|
77
|
+
VeraPDF, you need a working Java installation.
|
78
|
+
--validate-only Perform only the validation on the input file, again using
|
79
|
+
VeraPDF
|
80
|
+
--debug Write additional debug information on screen
|
81
|
+
-v, --version Show the program version
|
82
|
+
|
83
|
+
LICENSE:
|
84
|
+
GPLv3
|
85
|
+
|
86
|
+
AUTHORS:
|
87
|
+
(C) 2017-2018 Matteo Seclì"
|
88
|
+
}
|
89
|
+
|
90
|
+
|
91
|
+
#=====# RUN HELPER FUNCTION #=====#
|
92
|
+
run() {
|
93
|
+
if $DEBUG; then
|
94
|
+
#v=$(exec 2>&1 && set -x && set -- "$@")
|
95
|
+
#echo "#${v#*--}"
|
96
|
+
"$@"
|
97
|
+
else
|
98
|
+
"$@" 2>/dev/null #>/dev/null 2>&1
|
99
|
+
fi
|
100
|
+
}
|
101
|
+
|
102
|
+
|
103
|
+
#=====# CHECKS #=====#
|
104
|
+
if [ "$(which gs)" == "" ]; then
|
105
|
+
echo " ERROR: Ghostscript is not installed or it's not in the path"
|
106
|
+
exit
|
107
|
+
fi
|
108
|
+
|
109
|
+
|
110
|
+
#=====# VALIDATION #=====#
|
111
|
+
|
112
|
+
javaCheck() {
|
113
|
+
if [ "$(which java)" == "" ]; then
|
114
|
+
echo " ERROR: Java is not installed or it's not in the path"
|
115
|
+
echo " Cannot perform validation"
|
116
|
+
exit 1
|
117
|
+
fi
|
118
|
+
}
|
119
|
+
|
120
|
+
validate() {
|
121
|
+
echo " Validating..."
|
122
|
+
echo " $(./verapdf/verapdf "$1" --extract --flavour 1b --format text "$2")"
|
123
|
+
}
|
124
|
+
|
125
|
+
|
126
|
+
#=====# INPUT PARSER #=====#
|
127
|
+
if [ "$1" == "" ]; then
|
128
|
+
help
|
129
|
+
exit
|
130
|
+
fi
|
131
|
+
while [ "$1" != "" ]; do
|
132
|
+
PARAM=`echo $1 | awk -F= '{print $1}'`
|
133
|
+
VALUE=`echo $1 | awk -F= '{print $2}'`
|
134
|
+
case $PARAM in
|
135
|
+
-h | --help)
|
136
|
+
help
|
137
|
+
exit
|
138
|
+
;;
|
139
|
+
-v | --version)
|
140
|
+
echo $VERSION
|
141
|
+
exit
|
142
|
+
;;
|
143
|
+
--debug)
|
144
|
+
DEBUG=true
|
145
|
+
MSGOPTS=""
|
146
|
+
VERAMSGOPTS="--verbose"
|
147
|
+
#ERROPTS=""
|
148
|
+
;;
|
149
|
+
--quality)
|
150
|
+
if [ "$VALUE" == "high" ]; then
|
151
|
+
QUALITYOPTS="-dPDFSETTINGS=/printer"
|
152
|
+
elif [ "$VALUE" == "medium" ]; then
|
153
|
+
QUALITYOPTS="-dPDFSETTINGS=/ebook"
|
154
|
+
elif [ "$VALUE" == "low" ]; then
|
155
|
+
QUALITYOPTS="-dPDFSETTINGS=/screen"
|
156
|
+
else
|
157
|
+
echo " ERROR: unknown quality option '$VALUE'"
|
158
|
+
help
|
159
|
+
exit 1
|
160
|
+
fi
|
161
|
+
;;
|
162
|
+
--cleanmetadata)
|
163
|
+
[ -z ${PDFTITLE+x} ] && PDFTITLE=""
|
164
|
+
[ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=""
|
165
|
+
[ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=""
|
166
|
+
[ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=""
|
167
|
+
[ -z ${PDFCREATOR+x} ] && PDFCREATOR=""
|
168
|
+
[ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=""
|
169
|
+
[ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=""
|
170
|
+
[ -z ${PDFMODDATE+x} ] && PDFMODDATE=""
|
171
|
+
[ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=""
|
172
|
+
;;
|
173
|
+
--title)
|
174
|
+
PDFTITLE=$VALUE
|
175
|
+
;;
|
176
|
+
--author)
|
177
|
+
PDFAUTHOR=$VALUE
|
178
|
+
;;
|
179
|
+
--subject)
|
180
|
+
PDFSUBJECT=$VALUE
|
181
|
+
;;
|
182
|
+
--keywords)
|
183
|
+
PDFKEYWORDS=$VALUE
|
184
|
+
;;
|
185
|
+
--validate)
|
186
|
+
javaCheck
|
187
|
+
VALIDATE=true
|
188
|
+
;;
|
189
|
+
--validate-only)
|
190
|
+
javaCheck
|
191
|
+
validate $VERAMSGOPTS $2
|
192
|
+
exit
|
193
|
+
;;
|
194
|
+
*.pdf)
|
195
|
+
if [ "$INPUT" == "" ]; then
|
196
|
+
INPUT=$PARAM
|
197
|
+
elif [ "$OUTPUT" == "" ]; then
|
198
|
+
OUTPUT=$PARAM
|
199
|
+
else
|
200
|
+
echo " ERROR: too many PDF files as input!"
|
201
|
+
help
|
202
|
+
exit 1
|
203
|
+
fi
|
204
|
+
;;
|
205
|
+
*)
|
206
|
+
echo " ERROR: unknown parameter \"$PARAM\""
|
207
|
+
help
|
208
|
+
exit 1
|
209
|
+
;;
|
210
|
+
esac
|
211
|
+
shift
|
212
|
+
done
|
213
|
+
|
214
|
+
#=====# SET UP ALL THE STUFF #=====#
|
215
|
+
echo "=== Welcome to PDF2ARCHIVE ==="
|
216
|
+
if [ "$OUTPUT" == "" ]; then
|
217
|
+
OUTPUT="${INPUT%.pdf}-PDFA.pdf"
|
218
|
+
fi
|
219
|
+
TMPFILE=$(mktemp)
|
220
|
+
TMPDIR=$(mktemp -d)
|
221
|
+
PSTMPFILE=$TMPDIR/PDFA_def.ps
|
222
|
+
ICCTMPFILE=$TMPDIR/AdobeRGB1998.icc
|
223
|
+
INFOTMPFILE=$TMPDIR/pdf_minimal_info.ps
|
224
|
+
echo \
|
225
|
+
"%!PS
|
226
|
+
% Extract PDF info in a minimal way.
|
227
|
+
% Inspired by 'toolbin/pdf_info.ps'.
|
228
|
+
|
229
|
+
/QUIET true def
|
230
|
+
File dup (r) file runpdfbegin
|
231
|
+
Trailer /Info knownoget {
|
232
|
+
dup /Title knownoget { (__knowninfoTitle: ) print = flush } if
|
233
|
+
dup /Author knownoget { (__knowninfoAuthor: ) print = flush } if
|
234
|
+
dup /Subject knownoget { (__knowninfoSubject: ) print = flush } if
|
235
|
+
dup /Keywords knownoget { (__knowninfoKeywords: ) print = flush } if
|
236
|
+
dup /Creator knownoget { (__knowninfoCreator: ) print = flush } if
|
237
|
+
dup /Producer knownoget { (__knowninfoProducer: ) print = flush } if
|
238
|
+
dup /CreationDate knownoget { (__knowninfoCreationDate: ) print = flush } if
|
239
|
+
dup /ModDate knownoget { (__knowninfoModDate: ) print = flush } if
|
240
|
+
dup /Trapped knownoget { (__knowninfoTrapped: ) print = flush } if
|
241
|
+
} if
|
242
|
+
quit
|
243
|
+
" > $INFOTMPFILE
|
244
|
+
|
245
|
+
|
246
|
+
#=====# PRESERVE UNSPECIFIED KNOWN STANDARD METADATA #=====#
|
247
|
+
# Notes:
|
248
|
+
# 'iconv' is necessary to filter out all the invalid bytes.
|
249
|
+
# If it's not used, sed (unless it's GNU sed) will fail with
|
250
|
+
# 'RE error: illegal byte sequence'. A solution to this is to
|
251
|
+
# use 'LC_CTYPE=C && LANG=C && echo "$METADUMP" ...' in the
|
252
|
+
# variable assignments; however, this produces bad PDF files.
|
253
|
+
#
|
254
|
+
METADUMP=$(gs -dNOSAFER -dNODISPLAY -q -sFile="$INPUT" $INFOTMPFILE | iconv -f utf-8 -t utf-8 -c)
|
255
|
+
[ -z ${PDFTITLE+x} ] && PDFTITLE=$(echo "$METADUMP" | grep "__knowninfoTitle: " | sed "s/^__knowninfoTitle: //g")
|
256
|
+
[ -z ${PDFAUTHOR+x} ] && PDFAUTHOR=$(echo "$METADUMP" | grep "__knowninfoAuthor: " | sed "s/^__knowninfoAuthor: //g")
|
257
|
+
[ -z ${PDFSUBJECT+x} ] && PDFSUBJECT=$(echo "$METADUMP" | grep "__knowninfoSubject: " | sed "s/^__knowninfoSubject: //g")
|
258
|
+
[ -z ${PDFKEYWORDS+x} ] && PDFKEYWORDS=$(echo "$METADUMP" | grep "__knowninfoKeywords: " | sed "s/^__knowninfoKeywords: //g")
|
259
|
+
[ -z ${PDFCREATOR+x} ] && PDFCREATOR=$(echo "$METADUMP" | grep "__knowninfoCreator: " | sed "s/^__knowninfoCreator: //g")
|
260
|
+
[ -z ${PDFPRODUCER+x} ] && PDFPRODUCER=$(echo "$METADUMP" | grep "__knowninfoProducer: " | sed "s/^__knowninfoProducer: //g")
|
261
|
+
[ -z ${PDFCREATIONDATE+x} ] && PDFCREATIONDATE=$(echo "$METADUMP" | grep "__knowninfoCreationDate: " | sed "s/^__knowninfoCreationDate: //g")
|
262
|
+
[ -z ${PDFMODDATE+x} ] && PDFMODDATE=$(echo "$METADUMP" | grep "__knowninfoModDate: " | sed "s/^__knowninfoModDate: //g")
|
263
|
+
[ -z ${PDFTRAPPED+x} ] && PDFTRAPPED=$(echo "$METADUMP" | grep "__knowninfoTrapped: " | sed "s/^__knowninfoTrapped: //g")
|
264
|
+
# Replace "Trapped" string, if not empty, with an operator. Fixes 3Heights.
|
265
|
+
if [ "$PDFTRAPPED" != "" ]; then
|
266
|
+
PDFTRAPPED="/$(tr '[:lower:]' '[:upper:]' <<< ${PDFTRAPPED:0:1})$(tr '[:upper:]' '[:lower:]' <<< ${PDFTRAPPED:1})"
|
267
|
+
fi
|
268
|
+
# Check if the operator is allowed, otherwise empty variable.
|
269
|
+
if [ "$PDFTRAPPED" != "/True" ] && [ "$PDFTRAPPED" != "/False" ]; then
|
270
|
+
PDFTRAPPED=""
|
271
|
+
fi
|
272
|
+
|
273
|
+
|
274
|
+
#=====# PRINT DEBUG INFO #=====#
|
275
|
+
if $DEBUG; then
|
276
|
+
echo " DEBUG: running PDF2ARCHIVE, version $VERSION"
|
277
|
+
echo " DEBUG: using Ghostscript binary at $(which gs), version $(gs --version)"
|
278
|
+
echo " DEBUG: the input file is '$INPUT'"
|
279
|
+
echo " DEBUG: the output file is '$OUTPUT'"
|
280
|
+
echo " DEBUG: the intermediate processing file is $TMPFILE"
|
281
|
+
echo " DEBUG: the temporary directory is $TMPDIR"
|
282
|
+
echo " DEBUG: the current quality options are '$QUALITYOPTS'"
|
283
|
+
echo " DEBUG: PDF title '$PDFTITLE'"
|
284
|
+
echo " DEBUG: PDF author '$PDFAUTHOR'"
|
285
|
+
echo " DEBUG: PDF subject '$PDFSUBJECT'"
|
286
|
+
echo " DEBUG: PDF keywords '$PDFKEYWORDS'"
|
287
|
+
echo " DEBUG: PDF creator '$PDFCREATOR'"
|
288
|
+
echo " DEBUG: PDF producer '$PDFPRODUCER'"
|
289
|
+
echo " DEBUG: PDF creation date '$PDFCREATIONDATE'"
|
290
|
+
echo " DEBUG: PDF modification date '$PDFMODDATE'"
|
291
|
+
echo " DEBUG: PDF trapping '$PDFTRAPPED'"
|
292
|
+
fi
|
293
|
+
|
294
|
+
|
295
|
+
#=====# CREATE THE PS DEFINITION FILE #=====#
|
296
|
+
echo " Creating the definition file..."
|
297
|
+
echo \
|
298
|
+
"%!
|
299
|
+
% This prefix file for creating a PDF/A document is derived from
|
300
|
+
% the sample included with Ghostscript 9.07, released under the
|
301
|
+
% GNU Affero General Public License.
|
302
|
+
% Modified 4/15/2013 by MCB Systems.
|
303
|
+
|
304
|
+
% Feel free to modify entries marked with \"Customize\".
|
305
|
+
|
306
|
+
% This assumes an ICC profile to reside in the file (AdobeRGB1998.icc),
|
307
|
+
% unless the user modifies the corresponding line below.
|
308
|
+
|
309
|
+
% The color space described by the ICC profile must correspond to the
|
310
|
+
% ProcessColorModel specified when using this prefix file (GRAY with
|
311
|
+
% DeviceGray, RGB with DeviceRGB, and CMYK with DeviceCMYK).
|
312
|
+
|
313
|
+
% Define entries in the document Info dictionary :
|
314
|
+
|
315
|
+
/ICCProfile ($ICCTMPFILE) % Customize.
|
316
|
+
def
|
317
|
+
|
318
|
+
[ /Title ($PDFTITLE) % Customize." > $PSTMPFILE
|
319
|
+
if [ "$PDFAUTHOR" != "" ]; then
|
320
|
+
echo " /Author ($PDFAUTHOR)" >> $PSTMPFILE
|
321
|
+
fi
|
322
|
+
if [ "$PDFSUBJECT" != "" ]; then
|
323
|
+
echo " /Subject ($PDFSUBJECT)" >> $PSTMPFILE
|
324
|
+
fi
|
325
|
+
if [ "$PDFKEYWORDS" != "" ]; then
|
326
|
+
echo " /Keywords ($PDFKEYWORDS)" >> $PSTMPFILE
|
327
|
+
fi
|
328
|
+
if [ "$PDFCREATOR" != "" ]; then
|
329
|
+
echo " /Creator ($PDFCREATOR)" >> $PSTMPFILE
|
330
|
+
fi
|
331
|
+
echo \
|
332
|
+
"% /Producer % Reserved to GS
|
333
|
+
% /CreationDate % Reserved to GS
|
334
|
+
% /ModDate % Reserved to GS" >> $PSTMPFILE
|
335
|
+
if [ "$PDFTRAPPED" != "" ]; then
|
336
|
+
echo " /Trapped $PDFTRAPPED" >> $PSTMPFILE
|
337
|
+
fi
|
338
|
+
echo \
|
339
|
+
" /DOCINFO pdfmark
|
340
|
+
|
341
|
+
% Define an ICC profile :
|
342
|
+
|
343
|
+
[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
|
344
|
+
[{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {systemdict /ProcessColorModel get /DeviceRGB eq {3} {4} ifelse} ifelse >> /PUT pdfmark
|
345
|
+
[{icc_PDFA} ICCProfile (r) file /PUT pdfmark
|
346
|
+
|
347
|
+
% Define the output intent dictionary :
|
348
|
+
|
349
|
+
[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
|
350
|
+
[{OutputIntent_PDFA} <<
|
351
|
+
/Type /OutputIntent % Must be so (the standard requires).
|
352
|
+
/S /GTS_PDFA1 % Must be so (the standard requires).
|
353
|
+
/DestOutputProfile {icc_PDFA} % Must be so (see above).
|
354
|
+
/OutputConditionIdentifier (AdobeRGB1998) % Customize
|
355
|
+
>> /PUT pdfmark
|
356
|
+
[{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
|
357
|
+
" >> $PSTMPFILE
|
358
|
+
|
359
|
+
|
360
|
+
#=====# CREATE THE COLOR PROFILE FILE #=====#
|
361
|
+
echo -n -e "\\x00\\x00\\x02\\x30\\x41\\x44\\x42\\x45\\x02\\x10\\x00\\x00\\x6d\\x6e\\x74\\x72\\x52\\x47\\x42\\x20\\x58\\x59\\x5a\\x20\\x07\\xd0\\x00\\x08\\x00\\x0b\\x00\\x13\\x00\\x33\\x00\\x3b\\x61\\x63\\x73\\x70\\x41\\x50\\x50\\x4c\\x00\\x00\\x00\\x00\\x6e\\x6f\\x6e\\x65\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xf6\\xd6\\x00\\x01\\x00\\x00\\x00\\x00\\xd3\\x2d\\x41\\x44\\x42\\x45\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0a\\x63\\x70\\x72\\x74\\x00\\x00\\x00\\xfc\\x00\\x00\\x00\\x32\\x64\\x65\\x73\\x63\\x00\\x00\\x01\\x30\\x00\\x00\\x00\\x6b\\x77\\x74\\x70\\x74\\x00\\x00\\x01\\x9c\\x00\\x00\\x00\\x14\\x62\\x6b\\x70\\x74\\x00\\x00\\x01\\xb0\\x00\\x00\\x00\\x14\\x72\\x54\\x52\\x43\\x00\\x00\\x01\\xc4\\x00\\x00\\x00\\x0e\\x67\\x54\\x52\\x43\\x00\\x00\\x01\\xd4\\x00\\x00\\x00\\x0e\\x62\\x54\\x52\\x43\\x00\\x00\\x01\\xe4\\x00\\x00\\x00\\x0e\\x72\\x58\\x59\\x5a\\x00\\x00\\x01\\xf4\\x00\\x00\\x00\\x14\\x67\\x58\\x59\\x5a\\x00\\x00\\x02\\x08\\x00\\x00\\x00\\x14\\x62\\x58\\x59\\x5a\\x00\\x00\\x02\\x1c\\x00\\x00\\x00\\x14\\x74\\x65\\x78\\x74\\x00\\x00\\x00\\x00\\x43\\x6f\\x70\\x79\\x72\\x69\\x67\\x68\\x74\\x20\\x32\\x30\\x30\\x30\\x20\\x41\\x64\\x6f\\x62\\x65\\x20\\x53\\x79\\x73\\x74\\x65\\x6d\\x73\\x20\\x49\\x6e\\x63\\x6f\\x72\\x70\\x6f\\x72\\x61\\x74\\x65\\x64\\x00\\x00\\x00\\x64\\x65\\x73\\x63\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x11\\x41\\x64\\x6f\\x62\\x65\\x20\\x52\\x47\\x42\\x20\\x28\\x31\\x39\\x39\\x38\\x29\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\xf3\\x51\\x00\\x01\\x00\\x00\\x00\\x01\\x16\\xcc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x63\\x75\\x72\\x76\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x01\\x02\\x33\\x00\\x00\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x9c\\x18\\x00\\x00\\x4f\\xa5\\x00\\x00\\x04\\xfc\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x34\\x8d\\x00\\x00\\xa0\\x2c\\x00\\x00\\x0f\\x95\\x58\\x59\\x5a\\x20\\x00\\x00\\x00\\x00\\x00\\x00\\x26\\x31\\x00\\x00\\x10\\x2f\\x00\\x00\\xbe\\x9c" > $ICCTMPFILE
|
362
|
+
|
363
|
+
|
364
|
+
#=====# DO THE ACTUAL CONVERSION #=====#
|
365
|
+
echo " Compressing PDF & embedding fonts..."
|
366
|
+
run gs $MSGOPTS \
|
367
|
+
-dBATCH -dNOPAUSE -dNOOUTERSAVE \
|
368
|
+
-dCompatibilityLevel=1.4 \
|
369
|
+
-dEmbedAllFonts=true -dSubsetFonts=true \
|
370
|
+
-dCompressFonts=true -dCompressPages=true \
|
371
|
+
-sColorConversionStrategy=RGB \
|
372
|
+
-dDownsampleMonoImages=false -dDownsampleGrayImages=false -dDownsampleColorImages=false \
|
373
|
+
-dAutoFilterColorImages=false -dAutoFilterGrayImages=false \
|
374
|
+
-sDEVICE=pdfwrite \
|
375
|
+
-sOutputFile="$TMPFILE" "$INPUT"
|
376
|
+
echo " Converting to PDF/A-1B..."
|
377
|
+
run gs $MSGOPTS \
|
378
|
+
-dPDFA=1 -dBATCH -dNOPAUSE -dNOOUTERSAVE \
|
379
|
+
$QUALITYOPTS \
|
380
|
+
-dCompatibilityLevel=1.4 -dPDFACompatibilityPolicy=1 \
|
381
|
+
-sProcessColorModel=DeviceRGB -sColorConversionStrategy=RGB \
|
382
|
+
-sOutputICCProfile=$ICCTMPFILE \
|
383
|
+
-sDEVICE=pdfwrite \
|
384
|
+
-sOutputFile="$OUTPUT" "$TMPFILE" $PSTMPFILE
|
385
|
+
echo " Removing temporary files..."
|
386
|
+
rm $TMPFILE
|
387
|
+
echo " Done, now ESSE3 is happy! ;)"
|
388
|
+
|
389
|
+
|
390
|
+
#=====# VALIDATE THE RESULT #=====#
|
391
|
+
if $VALIDATE; then
|
392
|
+
validate $VERAMSGOPTS "$OUTPUT"
|
393
|
+
else
|
394
|
+
echo " Suggestion: validate the resulting PDF to be sure it's PDF/A-1B compliant."
|
395
|
+
fi
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libis-format
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Dekeyser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: '2.1'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: pdfinfo
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '1.4'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '1.4'
|
167
181
|
description: Collection of tools and classes that help to identify formats of binary
|
168
182
|
files and create derivative copies (e.g. PDF from Word).
|
169
183
|
email:
|
@@ -193,10 +207,10 @@ files:
|
|
193
207
|
- bin/formatinfo
|
194
208
|
- bin/libis_format
|
195
209
|
- bin/pdf_copy
|
210
|
+
- data/AdobeRGB1998.icc
|
196
211
|
- data/ISOcoated_v2_eci.icc
|
197
212
|
- data/PDFA_def.ps
|
198
213
|
- data/ead.xsd
|
199
|
-
- data/eciRGB_v2.icc
|
200
214
|
- data/lias_formats.xml
|
201
215
|
- data/types.yml
|
202
216
|
- data/xlink.xsd
|
@@ -299,6 +313,7 @@ files:
|
|
299
313
|
- tools/fop/lib/xmlgraphics-commons-2.3.jar
|
300
314
|
- tools/fop/lib/xmlgraphics-commons.LICENSE.txt
|
301
315
|
- tools/fop/lib/xmlgraphics-commons.NOTICE.txt
|
316
|
+
- tools/pdf2pdfa
|
302
317
|
- tools/pdfbox/pdfbox-app-2.0.13.jar
|
303
318
|
- tools/pdfbox/preflight-app-2.0.13.jar
|
304
319
|
homepage: ''
|
data/data/eciRGB_v2.icc
DELETED
Binary file
|