libis-format 1.3.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -250
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'csv'
|
4
2
|
require 'tmpdir'
|
5
3
|
|
@@ -9,46 +7,47 @@ require 'libis/tools/logger'
|
|
9
7
|
require 'libis/tools/command'
|
10
8
|
|
11
9
|
require 'libis/format/config'
|
12
|
-
require 'libis/format/
|
10
|
+
require 'libis/format/library'
|
13
11
|
|
14
12
|
module Libis
|
15
13
|
module Format
|
16
14
|
module Tool
|
17
|
-
|
15
|
+
|
16
|
+
class IdentificationTool
|
18
17
|
include Singleton
|
19
18
|
include ::Libis::Tools::Logger
|
20
19
|
|
21
20
|
def self.bad_mimetype(mimetype)
|
22
|
-
instance.bad_mimetype(mimetype)
|
21
|
+
self.instance.bad_mimetype(mimetype)
|
23
22
|
end
|
24
23
|
|
25
|
-
def self.run(file, recursive = false,
|
24
|
+
def self.run(file, recursive = false, options = {})
|
25
|
+
options ||= {}
|
26
26
|
if file.is_a?(Array)
|
27
|
-
return run_list file,
|
28
|
-
elsif file.is_a?(String) && File.
|
27
|
+
return run_list file, options
|
28
|
+
elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
|
29
29
|
if File.directory?(file)
|
30
|
-
return run_dir(file, recursive,
|
30
|
+
return run_dir(file, recursive, options)
|
31
31
|
elsif File.file?(file)
|
32
|
-
return instance.run(file,
|
32
|
+
return self.instance.run(file, options)
|
33
33
|
end
|
34
34
|
end
|
35
|
-
|
36
35
|
raise ArgumentError,
|
37
36
|
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
38
37
|
end
|
39
38
|
|
40
|
-
def self.run_dir(file, recursive = true,
|
41
|
-
instance.run_dir file, recursive,
|
39
|
+
def self.run_dir(file, recursive = true, options = {})
|
40
|
+
self.instance.run_dir file, recursive, options
|
42
41
|
end
|
43
42
|
|
44
|
-
def self.run_list(filelist,
|
45
|
-
instance.run_list filelist,
|
43
|
+
def self.run_list(filelist , options = {})
|
44
|
+
self.instance.run_list filelist, options
|
46
45
|
end
|
47
46
|
|
48
47
|
protected
|
49
48
|
|
50
49
|
def create_list_file(filelist)
|
51
|
-
list_file = Tempfile.new(%w
|
50
|
+
list_file = Tempfile.new(%w'file .list')
|
52
51
|
filelist.each do |fname|
|
53
52
|
list_file.write "#{fname}\n"
|
54
53
|
end
|
@@ -84,22 +83,23 @@ module Libis
|
|
84
83
|
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
85
84
|
#
|
86
85
|
def process_output(output)
|
87
|
-
output.
|
86
|
+
output.reduce({}) do |results, x|
|
88
87
|
filepath = File.absolute_path(x.delete(:filepath)).freeze
|
89
88
|
results[filepath] ||= []
|
90
89
|
results[filepath] << annotate(x)
|
90
|
+
results
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
94
|
# Enhance the output with mimetype and score
|
95
95
|
def annotate(result)
|
96
96
|
# Enhance result with mimetype if needed
|
97
|
-
bad_mimetypes.include?(result[:mimetype]) &&
|
98
|
-
!bad_puids.include?(result[:puid]) &&
|
97
|
+
if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
|
99
98
|
result[:mimetype] = get_mimetype(result[:puid])
|
99
|
+
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
|
-
Libis::Format::
|
102
|
+
Libis::Format::Library.normalize(result)
|
103
103
|
|
104
104
|
# Default score is 5
|
105
105
|
result[:score] = 5
|
@@ -108,54 +108,55 @@ module Libis
|
|
108
108
|
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
109
109
|
|
110
110
|
# freeze all strings
|
111
|
-
result.each {
|
111
|
+
result.each {|_, v| v.freeze if v.is_a?(String)}
|
112
112
|
|
113
113
|
# Adapt score based on matchtype
|
114
114
|
result[:matchtype] = result[:matchtype].to_s.downcase
|
115
115
|
case result[:matchtype]
|
116
116
|
|
117
117
|
# Signature match increases score with 2
|
118
|
-
|
119
|
-
|
120
|
-
# typeinfo = ::Libis::Format::
|
118
|
+
when 'signature'
|
119
|
+
result[:score] += 2
|
120
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
|
-
|
126
|
-
|
127
|
-
# typeinfo = ::Libis::Format::
|
125
|
+
when 'container'
|
126
|
+
result[:score] += 4
|
127
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
131
131
|
# Extension match is the weakest identification; score is lowered by 2 points
|
132
|
-
|
133
|
-
|
132
|
+
when 'extension'
|
133
|
+
result[:score] -= 2
|
134
134
|
|
135
135
|
# Magic code (file tool) is to be trused even less
|
136
|
-
|
137
|
-
|
136
|
+
when 'magic'
|
137
|
+
result[:score] -= 3
|
138
138
|
|
139
|
+
# Or no change otherwise
|
140
|
+
else
|
141
|
+
# do nothing
|
139
142
|
end
|
140
143
|
|
141
144
|
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
142
145
|
# Office OpenXML, OpenDocument, jar, maff, svx)
|
143
|
-
|
146
|
+
if result[:mimetype] == 'application/zip'
|
147
|
+
result[:score] -= 2
|
148
|
+
end
|
144
149
|
|
145
150
|
# Return result enhanced with mimetype and score fields
|
146
151
|
result
|
147
152
|
end
|
148
153
|
|
149
154
|
def get_mimetype(puid)
|
150
|
-
::Libis::Format::
|
151
|
-
rescue StandardError
|
152
|
-
nil
|
155
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
|
153
156
|
end
|
154
157
|
|
155
158
|
def get_puid(mimetype)
|
156
|
-
::Libis::Format::
|
157
|
-
rescue StandardError
|
158
|
-
nil
|
159
|
+
::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
|
159
160
|
end
|
160
161
|
|
161
162
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -169,6 +170,7 @@ module Libis
|
|
169
170
|
@bad_mimetypes << mimetype
|
170
171
|
end
|
171
172
|
end
|
173
|
+
|
172
174
|
end
|
173
175
|
end
|
174
176
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,61 +9,51 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class OfficeToPdf
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.run(source, target, **options)
|
23
|
-
new.run source, target, **options
|
16
|
+
def self.run(source, target, options = {})
|
17
|
+
self.new.run source, target, options
|
24
18
|
end
|
25
19
|
|
26
|
-
def run(source, target,
|
20
|
+
def run(source, target, options = {})
|
27
21
|
workdir = '/...'
|
28
22
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
23
|
|
30
|
-
workdir = File.join(workdir, rand(
|
24
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
25
|
FileUtils.mkpath(workdir)
|
32
26
|
|
33
27
|
src_file = File.join(workdir, File.basename(source))
|
34
28
|
FileUtils.symlink source, src_file
|
35
29
|
|
36
|
-
tgt_file = File.join(workdir,
|
30
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
|
37
31
|
|
38
32
|
export_filter = options[:export_filter] || 'pdf'
|
39
33
|
|
40
34
|
timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
|
41
35
|
result = Libis::Tools::Command.run(
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
37
|
+
"-env:UserInstallation=file://#{workdir}",
|
38
|
+
'--convert-to', export_filter,
|
39
|
+
'--outdir', workdir, src_file,
|
40
|
+
timeout: timeout,
|
41
|
+
kill_after: timeout * 2
|
48
42
|
)
|
49
43
|
|
50
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
51
|
-
|
44
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
45
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
53
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
46
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
54
47
|
|
55
48
|
FileUtils.copy tgt_file, target, preserve: true
|
56
49
|
|
57
|
-
{
|
58
|
-
command: result,
|
59
|
-
files: [target]
|
60
|
-
}
|
61
50
|
ensure
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
nil
|
66
|
-
end
|
51
|
+
FileUtils.rmtree workdir rescue nil
|
52
|
+
|
53
|
+
result[:out]
|
67
54
|
end
|
68
55
|
end
|
56
|
+
|
69
57
|
end
|
70
58
|
end
|
71
59
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class PdfCopy
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_copy]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'CopyPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,44 +9,40 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfMerge
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.run(source, target, *options)
|
25
|
-
new.run source, target, options
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
26
18
|
end
|
27
19
|
|
28
|
-
def run(source, target,
|
20
|
+
def run(source, target, options = [])
|
29
21
|
source = [source] unless source.is_a?(Array)
|
30
22
|
|
31
23
|
if OS.java?
|
32
24
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
33
25
|
end
|
34
26
|
|
35
|
-
timeout = Libis::Format::Config[:timeouts][:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
27
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_merge]
|
28
|
+
result = Libis::Tools::Command.run(
|
29
|
+
Libis::Format::Config[:java_cmd],
|
30
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
31
|
+
'MergePdf',
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
*source,
|
35
|
+
timeout: timeout,
|
36
|
+
kill_after: timeout * 2
|
37
|
+
)
|
38
|
+
|
39
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
40
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
48
41
|
|
49
42
|
result
|
50
43
|
end
|
51
44
|
end
|
45
|
+
|
52
46
|
end
|
53
47
|
end
|
54
48
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,38 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfOptimizer
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, quality)
|
23
|
-
new.run source, target, quality
|
17
|
+
self.new.run source, target, quality
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, quality)
|
27
|
-
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
28
|
-
args = [
|
29
|
-
Libis::Format::Config[:ghostscript_cmd],
|
30
|
-
'-sDEVICE=pdfwrite',
|
31
|
-
'-dCompatibilityLevel=1.4',
|
32
|
-
"-dPDFSETTINGS=/#{quality}",
|
33
|
-
'-dNOPAUSE',
|
34
|
-
'-dBATCH',
|
35
|
-
"-sOutputFile=#{target}",
|
36
|
-
source.to_s
|
37
|
-
]
|
38
|
-
|
39
|
-
result = Libis::Tools::Command.run(*args, timeout:, kill_after: timeout * 2)
|
40
21
|
|
41
|
-
|
22
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
23
|
+
result = Libis::Tools::Command.run(
|
24
|
+
'gs',
|
25
|
+
'-sDEVICE=pdfwrite',
|
26
|
+
'-dCompatibilityLevel=1.4',
|
27
|
+
"-dPDFSETTINGS=/#{quality}",
|
28
|
+
'-dNOPAUSE',
|
29
|
+
'-dBATCH',
|
30
|
+
"-sOutputFile=#{target}",
|
31
|
+
"#{source}",
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
34
|
+
)
|
35
|
+
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
42
38
|
|
43
39
|
result
|
44
40
|
end
|
45
41
|
end
|
42
|
+
|
46
43
|
end
|
47
44
|
end
|
48
45
|
end
|
@@ -1,20 +1,47 @@
|
|
1
|
-
|
1
|
+
require 'os'
|
2
2
|
|
3
|
-
require 'libis/
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
4
8
|
|
5
9
|
module Libis
|
6
10
|
module Format
|
7
11
|
module Tool
|
12
|
+
|
8
13
|
class PdfSplit
|
14
|
+
include ::Libis::Tools::Logger
|
9
15
|
|
10
|
-
def self.run(source, target,
|
11
|
-
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
12
18
|
end
|
13
19
|
|
14
|
-
def run(source, target,
|
15
|
-
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'SplitPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
16
42
|
end
|
17
43
|
end
|
44
|
+
|
18
45
|
end
|
19
46
|
end
|
20
47
|
end
|
@@ -1,9 +1,6 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'tempfile'
|
4
2
|
require 'csv'
|
5
3
|
require 'fileutils'
|
6
|
-
require 'pdfinfo'
|
7
4
|
|
8
5
|
require 'libis/tools/extend/string'
|
9
6
|
require 'libis/tools/logger'
|
@@ -15,24 +12,19 @@ require 'libis/format'
|
|
15
12
|
module Libis
|
16
13
|
module Format
|
17
14
|
module Tool
|
15
|
+
|
18
16
|
class PdfToPdfa
|
19
17
|
include ::Libis::Tools::Logger
|
20
18
|
|
21
|
-
def self.
|
22
|
-
|
23
|
-
result.zero?
|
19
|
+
def self.run(source, target = nil, options = {})
|
20
|
+
self.new.run source, target, options
|
24
21
|
end
|
25
22
|
|
26
|
-
def
|
27
|
-
new.run source, target, **options
|
28
|
-
end
|
23
|
+
def run(source, target = nil, options = nil)
|
29
24
|
|
30
|
-
def run(source, target = nil, **options)
|
31
25
|
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
32
26
|
target ||= tmp_target
|
33
27
|
|
34
|
-
metadata = get_metadata(source)
|
35
|
-
|
36
28
|
icc_info = icc_options(options[:colorspace])
|
37
29
|
|
38
30
|
icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
|
@@ -40,58 +32,52 @@ module Libis
|
|
40
32
|
|
41
33
|
def_filename = Tools::TempFile.name('PDFA_def', '.ps')
|
42
34
|
File.open(def_filename, 'w') do |f|
|
43
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
|
44
|
-
|
45
|
-
|
46
|
-
.gsub('[**METADATA**]', metadata)
|
35
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
36
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
37
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
47
38
|
end
|
48
39
|
|
49
40
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
50
41
|
result = Libis::Tools::Command.run(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
source,
|
63
|
-
timeout:,
|
64
|
-
kill_after: timeout * 2
|
42
|
+
Libis::Format::Config[:ghostscript_cmd],
|
43
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
44
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
45
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
46
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
47
|
+
"-sOutputICCProfile=#{icc_file}",
|
48
|
+
'-o', File.absolute_path(target),
|
49
|
+
def_filename,
|
50
|
+
source,
|
51
|
+
timeout: timeout,
|
52
|
+
kill_after: timeout * 2
|
65
53
|
)
|
66
54
|
|
67
|
-
|
55
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
68
56
|
|
69
57
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
70
58
|
|
59
|
+
unless Format::Tool::PdfaValidator.run(target)
|
60
|
+
result[:status] = -999
|
61
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
62
|
+
end
|
63
|
+
|
71
64
|
result
|
72
65
|
end
|
73
66
|
|
74
|
-
private
|
75
67
|
|
76
|
-
|
77
|
-
info = Pdfinfo.new(source)
|
78
|
-
metadata = "/Title (#{info.title})"
|
79
|
-
metadata += "\n /Author (#{info.author})" if info.author
|
80
|
-
metadata += "\n /Subject (#{info.subject})" if info.subject
|
81
|
-
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
82
|
-
metadata += "\n /Creator (#{info.creator})" if info.creator
|
83
|
-
metadata
|
84
|
-
end
|
68
|
+
private
|
85
69
|
|
86
70
|
def icc_options(colorspace)
|
87
71
|
case colorspace.to_s.downcase
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
72
|
+
when 'cmyk'
|
73
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
74
|
+
else
|
75
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
92
76
|
end
|
93
77
|
end
|
78
|
+
|
94
79
|
end
|
80
|
+
|
95
81
|
end
|
96
82
|
end
|
97
83
|
end
|