libis-format 1.2.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.travis.yml +19 -23
- data/Gemfile +0 -2
- data/README.md +2 -2
- data/base/Dockerfile +5 -3
- data/base/rework_path +5 -10
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/config.rb +3 -3
- data/lib/libis/format/converter/audio_converter.rb +6 -38
- data/lib/libis/format/converter/base.rb +21 -8
- data/lib/libis/format/converter/chain.rb +7 -19
- data/lib/libis/format/converter/fop_pdf_converter.rb +2 -0
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +21 -141
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +4 -4
- data/lib/libis/format/converter/office_converter.rb +5 -3
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +31 -98
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/spreadsheet_converter.rb +5 -3
- data/lib/libis/format/converter/video_converter.rb +3 -6
- data/lib/libis/format/converter/xslt_converter.rb +14 -15
- data/lib/libis/format/identifier.rb +3 -3
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/extension_identification.rb +4 -4
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +1 -10
- data/lib/libis/format/tool/fop_pdf.rb +0 -12
- data/lib/libis/format/tool/identification_tool.rb +6 -6
- data/lib/libis/format/tool/office_to_pdf.rb +1 -10
- data/lib/libis/format/tool/pdf_copy.rb +1 -11
- data/lib/libis/format/tool/pdf_merge.rb +1 -11
- data/lib/libis/format/tool/pdf_optimizer.rb +2 -11
- data/lib/libis/format/tool/pdf_split.rb +1 -11
- data/lib/libis/format/tool/pdf_to_pdfa.rb +43 -59
- data/lib/libis/format/tool/pdfa_validator.rb +35 -28
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +2 -10
- data/lib/libis/format/tool.rb +1 -1
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +5 -2
- data/libis-format.gemspec +3 -5
- data/tools/fop/fop.bat +75 -75
- data/tools/fop/fop.cmd +31 -31
- data/tools/fop/fop.js +341 -341
- data/tools/fop/lib/avalon-framework.NOTICE.TXT +11 -11
- data/tools/fop/lib/xml-apis.LICENSE-SAX.html +17 -17
- data/tools/fop/lib/xml-apis.LICENSE.DOM-documentation.html +74 -74
- data/tools/fop/lib/xml-apis.LICENSE.DOM-software.html +66 -66
- metadata +19 -29
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -38
- data/lib/libis/format/tool/email_to_pdf.rb +0 -75
- data/lib/libis/format/type_database.rb +0 -133
- data/lib/libis/format/type_database_impl.rb +0 -120
- data/tools/emailconverter.jar +0 -0
- data/tools/pdf2pdfa +0 -395
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
# froze_string_litteral: true
|
2
|
+
# coding: utf-8
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
|
8
|
+
class Library
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def implementation=(impl)
|
13
|
+
instance.implementation = impl
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_info(format)
|
17
|
+
instance.get_info(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_info_by(key, value)
|
21
|
+
instance.get_info_by(key, value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_infos_by(key, value)
|
25
|
+
instance.get_infos_by(key, value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_field(format, field)
|
29
|
+
instance.get_field(format, field)
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_field_by(key, value, field)
|
33
|
+
instance.get_field_by(key, value, field)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_fields_by(key, value, field)
|
37
|
+
instance.get_fields_by(key, value, field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def known?(key, value)
|
41
|
+
instance.known?(key, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def enrich(info, map_keys = {})
|
45
|
+
instance.enrich(info, map_keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def normalize(info, map_keys = {})
|
49
|
+
instance.normalize(info, map_keys)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def implementation=(impl)
|
54
|
+
@implementation = impl
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_field(format, field)
|
58
|
+
get_field_by(:name, format, field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_field_by(key, value, field)
|
62
|
+
info = get_info_by(key, value)
|
63
|
+
return nil unless info
|
64
|
+
case field
|
65
|
+
when :mimetype
|
66
|
+
info[:mimetypes]&.first
|
67
|
+
when :puid
|
68
|
+
info[:puids]&.first
|
69
|
+
when :extension
|
70
|
+
info[:extensions]&.first
|
71
|
+
else
|
72
|
+
info[field]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_fields_by(key, value, field)
|
77
|
+
get_infos_by(key, value)&.map { |info| info[field] }.compact
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_info(format)
|
81
|
+
get_info_by(:name, format)
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_info_by(key, value)
|
85
|
+
get_infos_by(key, value)&.first
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_infos_by(key, value)
|
89
|
+
result = @implementation.query(key, value)
|
90
|
+
result.map(&:to_hash)
|
91
|
+
end
|
92
|
+
|
93
|
+
def known?(key, value)
|
94
|
+
!get_info_by(key, value).nil?
|
95
|
+
end
|
96
|
+
|
97
|
+
def enrich(info, map_keys = {})
|
98
|
+
info = normalize(info, map_keys)
|
99
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
100
|
+
unless (format = info[mapper[:name]]).nil?
|
101
|
+
lib_info = get_info(format)
|
102
|
+
mapper.keys.each do |key|
|
103
|
+
case key
|
104
|
+
when :mimetype
|
105
|
+
info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
|
106
|
+
when :puid
|
107
|
+
info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
|
108
|
+
when :extension
|
109
|
+
info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
|
110
|
+
else
|
111
|
+
info[mapper[key]] = lib_info[key] if lib_info[key]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
info
|
116
|
+
end
|
117
|
+
|
118
|
+
# Derive name from the available info
|
119
|
+
def normalize(info, map_keys = {})
|
120
|
+
return {} unless info.is_a? Hash
|
121
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
122
|
+
# fill format from looking up by puid
|
123
|
+
unless (puid = info[mapper[:puid]]).blank?
|
124
|
+
info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
|
125
|
+
end
|
126
|
+
# fill format from looking up by mimetype
|
127
|
+
unless (mime = info[mapper[:mimetype]]).blank?
|
128
|
+
info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
|
129
|
+
end
|
130
|
+
# finally complete the information from looking up by format name
|
131
|
+
unless (format = info[mapper[:name]]).nil?
|
132
|
+
info[mapper[:mimetype]] = get_field(format, :mimetype)
|
133
|
+
info[mapper[:category]] = get_field(format, :category)
|
134
|
+
end
|
135
|
+
info
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
@implementation = eval(Libis::Format::Config[:format_library_implementation])
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
@@ -37,14 +37,14 @@ module Libis
|
|
37
37
|
def runner(*args)
|
38
38
|
|
39
39
|
args.map do |file|
|
40
|
-
info = ::Libis::Format::
|
40
|
+
info = ::Libis::Format::Library.get_info_by(:extension, File.extname(file))
|
41
41
|
if info
|
42
42
|
{
|
43
43
|
filepath: file,
|
44
|
-
mimetype: (info[:
|
45
|
-
puid: (info[:
|
44
|
+
mimetype: (info[:mimetypes].first rescue nil),
|
45
|
+
puid: (info[:puids].first rescue nil),
|
46
46
|
matchtype: 'extension',
|
47
|
-
tool: :
|
47
|
+
tool: :format_library
|
48
48
|
}
|
49
49
|
end
|
50
50
|
end.cleanup
|
@@ -13,11 +13,6 @@ module Libis
|
|
13
13
|
include Singleton
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ffmpeg_cmd], "-h")
|
18
|
-
result[:status] == 0
|
19
|
-
end
|
20
|
-
|
21
16
|
def self.run(source, target, options = {})
|
22
17
|
self.instance.run source, target, options
|
23
18
|
end
|
@@ -43,11 +38,7 @@ module Libis
|
|
43
38
|
|
44
39
|
warn "FFMpeg warnings: #{(result[:err] + result[:out]).join("\n")}" unless result[:err].empty?
|
45
40
|
|
46
|
-
|
47
|
-
command: result,
|
48
|
-
files: [ target ]
|
49
|
-
}
|
50
|
-
|
41
|
+
result[:out]
|
51
42
|
end
|
52
43
|
|
53
44
|
end
|
@@ -13,12 +13,6 @@ module Libis
|
|
13
13
|
class FopPdf
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], "-version")
|
18
|
-
return false unless result[:status] == 0
|
19
|
-
File.exist?(Libis::Format::Config[:fop_jar])
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(xml, target, options = [])
|
23
17
|
self.new.run xml, target, options
|
24
18
|
end
|
@@ -33,7 +27,6 @@ module Libis
|
|
33
27
|
result = Libis::Tools::Command.run(
|
34
28
|
Libis::Format::Config[:java_cmd],
|
35
29
|
"-Dfop.home=#{File.dirname(Libis::Format::Config[:fop_jar])}",
|
36
|
-
'-Djava.awt.headless=true',
|
37
30
|
'-jar', Libis::Format::Config[:fop_jar],
|
38
31
|
'-fo', xml,
|
39
32
|
'-pdf', target,
|
@@ -44,11 +37,6 @@ module Libis
|
|
44
37
|
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
45
38
|
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
46
39
|
|
47
|
-
{
|
48
|
-
command: result,
|
49
|
-
files: [ target ]
|
50
|
-
}
|
51
|
-
|
52
40
|
end
|
53
41
|
end
|
54
42
|
|
@@ -7,7 +7,7 @@ require 'libis/tools/logger'
|
|
7
7
|
require 'libis/tools/command'
|
8
8
|
|
9
9
|
require 'libis/format/config'
|
10
|
-
require 'libis/format/
|
10
|
+
require 'libis/format/library'
|
11
11
|
|
12
12
|
module Libis
|
13
13
|
module Format
|
@@ -99,7 +99,7 @@ module Libis
|
|
99
99
|
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
|
-
Libis::Format::
|
102
|
+
Libis::Format::Library.normalize(result)
|
103
103
|
|
104
104
|
# Default score is 5
|
105
105
|
result[:score] = 5
|
@@ -117,14 +117,14 @@ module Libis
|
|
117
117
|
# Signature match increases score with 2
|
118
118
|
when 'signature'
|
119
119
|
result[:score] += 2
|
120
|
-
# typeinfo = ::Libis::Format::
|
120
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
125
|
when 'container'
|
126
126
|
result[:score] += 4
|
127
|
-
# typeinfo = ::Libis::Format::
|
127
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
@@ -152,11 +152,11 @@ module Libis
|
|
152
152
|
end
|
153
153
|
|
154
154
|
def get_mimetype(puid)
|
155
|
-
::Libis::Format::
|
155
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
|
156
156
|
end
|
157
157
|
|
158
158
|
def get_puid(mimetype)
|
159
|
-
::Libis::Format::
|
159
|
+
::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
|
160
160
|
end
|
161
161
|
|
162
162
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -13,11 +13,6 @@ module Libis
|
|
13
13
|
class OfficeToPdf
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], "--version")
|
18
|
-
result[:status] == 0
|
19
|
-
end
|
20
|
-
|
21
16
|
def self.run(source, target, options = {})
|
22
17
|
self.new.run source, target, options
|
23
18
|
end
|
@@ -52,14 +47,10 @@ module Libis
|
|
52
47
|
|
53
48
|
FileUtils.copy tgt_file, target, preserve: true
|
54
49
|
|
55
|
-
{
|
56
|
-
command: result,
|
57
|
-
files: [ target ]
|
58
|
-
}
|
59
|
-
|
60
50
|
ensure
|
61
51
|
FileUtils.rmtree workdir rescue nil
|
62
52
|
|
53
|
+
result[:out]
|
63
54
|
end
|
64
55
|
end
|
65
56
|
|
@@ -13,12 +13,6 @@ module Libis
|
|
13
13
|
class PdfCopy
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], "-version")
|
18
|
-
return false unless result[:status] == 0
|
19
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, options = [])
|
23
17
|
self.new.run source, target, options
|
24
18
|
end
|
@@ -44,11 +38,7 @@ module Libis
|
|
44
38
|
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
45
39
|
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
46
40
|
|
47
|
-
|
48
|
-
command: result,
|
49
|
-
files: [ target ]
|
50
|
-
}
|
51
|
-
|
41
|
+
result
|
52
42
|
end
|
53
43
|
end
|
54
44
|
|
@@ -13,12 +13,6 @@ module Libis
|
|
13
13
|
class PdfMerge
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], "-version")
|
18
|
-
return false unless result[:status] == 0
|
19
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, options = [])
|
23
17
|
self.new.run source, target, options
|
24
18
|
end
|
@@ -45,11 +39,7 @@ module Libis
|
|
45
39
|
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
40
|
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
47
41
|
|
48
|
-
|
49
|
-
command: result,
|
50
|
-
files: [ target ]
|
51
|
-
}
|
52
|
-
|
42
|
+
result
|
53
43
|
end
|
54
44
|
end
|
55
45
|
|
@@ -13,11 +13,6 @@ module Libis
|
|
13
13
|
class PdfOptimizer
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], "--version")
|
18
|
-
result[:status] == 0
|
19
|
-
end
|
20
|
-
|
21
16
|
def self.run(source, target, quality)
|
22
17
|
self.new.run source, target, quality
|
23
18
|
end
|
@@ -26,7 +21,7 @@ module Libis
|
|
26
21
|
|
27
22
|
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
28
23
|
result = Libis::Tools::Command.run(
|
29
|
-
|
24
|
+
'gs',
|
30
25
|
'-sDEVICE=pdfwrite',
|
31
26
|
'-dCompatibilityLevel=1.4',
|
32
27
|
"-dPDFSETTINGS=/#{quality}",
|
@@ -41,11 +36,7 @@ module Libis
|
|
41
36
|
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
42
37
|
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
43
38
|
|
44
|
-
|
45
|
-
command: result,
|
46
|
-
files: [ target ]
|
47
|
-
}
|
48
|
-
|
39
|
+
result
|
49
40
|
end
|
50
41
|
end
|
51
42
|
|
@@ -13,12 +13,6 @@ module Libis
|
|
13
13
|
class PdfSplit
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
|
-
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], "-version")
|
18
|
-
return false unless result[:status] == 0
|
19
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, options = [])
|
23
17
|
self.new.run source, target, options
|
24
18
|
end
|
@@ -44,11 +38,7 @@ module Libis
|
|
44
38
|
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
45
39
|
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
46
40
|
|
47
|
-
|
48
|
-
command: result,
|
49
|
-
files: [ target ] # TODO: collect the files
|
50
|
-
}
|
51
|
-
|
41
|
+
result
|
52
42
|
end
|
53
43
|
end
|
54
44
|
|
@@ -1,99 +1,83 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require 'pdfinfo'
|
1
|
+
require 'tempfile'
|
2
|
+
require 'csv'
|
3
|
+
require 'fileutils'
|
5
4
|
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
5
|
+
require 'libis/tools/extend/string'
|
6
|
+
require 'libis/tools/logger'
|
7
|
+
require 'libis/tools/command'
|
8
|
+
require 'libis/tools/temp_file'
|
10
9
|
|
11
|
-
require
|
10
|
+
require 'libis/format'
|
12
11
|
|
13
12
|
module Libis
|
14
13
|
module Format
|
15
14
|
module Tool
|
15
|
+
|
16
16
|
class PdfToPdfa
|
17
17
|
include ::Libis::Tools::Logger
|
18
18
|
|
19
|
-
def self.installed?
|
20
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd])
|
21
|
-
result == 0
|
22
|
-
end
|
23
|
-
|
24
19
|
def self.run(source, target = nil, options = {})
|
25
|
-
new.run source, target, options
|
20
|
+
self.new.run source, target, options
|
26
21
|
end
|
27
22
|
|
28
23
|
def run(source, target = nil, options = nil)
|
29
|
-
tmp_target = Tools::TempFile.name(File.basename(source, ".*"), ".pdf")
|
30
|
-
target ||= tmp_target
|
31
24
|
|
32
|
-
|
25
|
+
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
26
|
+
target ||= tmp_target
|
33
27
|
|
34
28
|
icc_info = icc_options(options[:colorspace])
|
35
29
|
|
36
|
-
icc_file = Tools::TempFile.name(icc_info[:icc_name],
|
30
|
+
icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
|
37
31
|
FileUtils.cp(File.join(Libis::Format::DATA_DIR, "#{icc_info[:icc_name]}.icc"), icc_file)
|
38
32
|
|
39
|
-
def_filename = Tools::TempFile.name(
|
40
|
-
File.open(def_filename,
|
41
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR,
|
42
|
-
|
43
|
-
|
44
|
-
.gsub("[**METADATA**]", metadata)
|
33
|
+
def_filename = Tools::TempFile.name('PDFA_def', '.ps')
|
34
|
+
File.open(def_filename, 'w') do |f|
|
35
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
36
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
37
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
45
38
|
end
|
46
39
|
|
47
40
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
48
41
|
result = Libis::Tools::Command.run(
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
source,
|
61
|
-
timeout: timeout,
|
62
|
-
kill_after: timeout * 2
|
42
|
+
Libis::Format::Config[:ghostscript_cmd],
|
43
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
44
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
45
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
46
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
47
|
+
"-sOutputICCProfile=#{icc_file}",
|
48
|
+
'-o', File.absolute_path(target),
|
49
|
+
def_filename,
|
50
|
+
source,
|
51
|
+
timeout: timeout,
|
52
|
+
kill_after: timeout * 2
|
63
53
|
)
|
64
54
|
|
65
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
66
|
-
raise "#{self.class} failed with error #{result[:status]}: \n#{(result[:out] + result[:err]).join("\n")}" if result[:status] != 0
|
55
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
67
56
|
|
68
57
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
69
58
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
59
|
+
unless Format::Tool::PdfaValidator.run(target)
|
60
|
+
result[:status] = -999
|
61
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
62
|
+
end
|
63
|
+
|
64
|
+
result
|
74
65
|
end
|
75
66
|
|
67
|
+
|
76
68
|
private
|
77
69
|
|
78
|
-
def get_metadata(source)
|
79
|
-
info = Pdfinfo.new(source)
|
80
|
-
metadata = "/Title (#{info.title})"
|
81
|
-
metadata += "\n /Author (#{info.author})" if info.author
|
82
|
-
metadata += "\n /Subject (#{info.subject})" if info.subject
|
83
|
-
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
84
|
-
metadata += "\n /Creator (#{info.creator})" if info.creator
|
85
|
-
metadata
|
86
|
-
end
|
87
|
-
|
88
70
|
def icc_options(colorspace)
|
89
71
|
case colorspace.to_s.downcase
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
72
|
+
when 'cmyk'
|
73
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
74
|
+
else
|
75
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
94
76
|
end
|
95
77
|
end
|
78
|
+
|
96
79
|
end
|
80
|
+
|
97
81
|
end
|
98
82
|
end
|
99
83
|
end
|
@@ -1,22 +1,24 @@
|
|
1
|
-
require
|
1
|
+
require 'fileutils'
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
6
|
|
7
|
-
require
|
7
|
+
require 'libis/format/config'
|
8
8
|
|
9
9
|
module Libis
|
10
10
|
module Format
|
11
11
|
module Tool
|
12
|
+
|
12
13
|
class PdfaValidator
|
13
14
|
include ::Libis::Tools::Logger
|
14
15
|
|
15
16
|
def self.run(source)
|
16
|
-
new.run source
|
17
|
+
self.new.run source
|
17
18
|
end
|
18
19
|
|
19
20
|
def run(source)
|
21
|
+
|
20
22
|
src_file = File.absolute_path(source)
|
21
23
|
|
22
24
|
timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
|
@@ -26,41 +28,46 @@ module Libis
|
|
26
28
|
Dir.chdir(Dir.tmpdir)
|
27
29
|
|
28
30
|
result = Libis::Tools::Command.run(
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
pdfa,
|
32
|
+
'--noxml',
|
33
|
+
'--level', 'B',
|
34
|
+
'--verb', '0',
|
35
|
+
src_file,
|
36
|
+
timeout: timeout,
|
37
|
+
kill_after: timeout * 2
|
36
38
|
)
|
37
39
|
|
38
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
41
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
42
|
|
41
43
|
Dir.chdir(previous_wd)
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
|
46
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
47
|
+
result[:out].join("\n")
|
48
|
+
return false
|
49
|
+
end
|
48
50
|
else
|
49
51
|
jar = Libis::Format::Config[:preflight_jar]
|
50
52
|
result = Libis::Tools::Command.run(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
Libis::Format::Config[:java_cmd],
|
54
|
+
'-jar', jar,
|
55
|
+
src_file,
|
56
|
+
timeout: timeout,
|
57
|
+
kill_after: timeout * 2
|
56
58
|
)
|
59
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
57
60
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
+
unless result[:status] == 0
|
62
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
63
|
+
result[:out].join("\n")
|
64
|
+
return false
|
65
|
+
end
|
61
66
|
end
|
67
|
+
true
|
62
68
|
end
|
63
69
|
end
|
70
|
+
|
64
71
|
end
|
65
72
|
end
|
66
73
|
end
|