libis-format 1.3.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -250
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'csv'
|
4
2
|
require 'tmpdir'
|
5
3
|
|
@@ -9,46 +7,47 @@ require 'libis/tools/logger'
|
|
9
7
|
require 'libis/tools/command'
|
10
8
|
|
11
9
|
require 'libis/format/config'
|
12
|
-
require 'libis/format/
|
10
|
+
require 'libis/format/library'
|
13
11
|
|
14
12
|
module Libis
|
15
13
|
module Format
|
16
14
|
module Tool
|
17
|
-
|
15
|
+
|
16
|
+
class IdentificationTool
|
18
17
|
include Singleton
|
19
18
|
include ::Libis::Tools::Logger
|
20
19
|
|
21
20
|
def self.bad_mimetype(mimetype)
|
22
|
-
instance.bad_mimetype(mimetype)
|
21
|
+
self.instance.bad_mimetype(mimetype)
|
23
22
|
end
|
24
23
|
|
25
|
-
def self.run(file, recursive = false,
|
24
|
+
def self.run(file, recursive = false, options = {})
|
25
|
+
options ||= {}
|
26
26
|
if file.is_a?(Array)
|
27
|
-
return run_list file,
|
28
|
-
elsif file.is_a?(String) && File.
|
27
|
+
return run_list file, options
|
28
|
+
elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
|
29
29
|
if File.directory?(file)
|
30
|
-
return run_dir(file, recursive,
|
30
|
+
return run_dir(file, recursive, options)
|
31
31
|
elsif File.file?(file)
|
32
|
-
return instance.run(file,
|
32
|
+
return self.instance.run(file, options)
|
33
33
|
end
|
34
34
|
end
|
35
|
-
|
36
35
|
raise ArgumentError,
|
37
36
|
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
38
37
|
end
|
39
38
|
|
40
|
-
def self.run_dir(file, recursive = true,
|
41
|
-
instance.run_dir file, recursive,
|
39
|
+
def self.run_dir(file, recursive = true, options = {})
|
40
|
+
self.instance.run_dir file, recursive, options
|
42
41
|
end
|
43
42
|
|
44
|
-
def self.run_list(filelist,
|
45
|
-
instance.run_list filelist,
|
43
|
+
def self.run_list(filelist , options = {})
|
44
|
+
self.instance.run_list filelist, options
|
46
45
|
end
|
47
46
|
|
48
47
|
protected
|
49
48
|
|
50
49
|
def create_list_file(filelist)
|
51
|
-
list_file = Tempfile.new(%w
|
50
|
+
list_file = Tempfile.new(%w'file .list')
|
52
51
|
filelist.each do |fname|
|
53
52
|
list_file.write "#{fname}\n"
|
54
53
|
end
|
@@ -84,22 +83,23 @@ module Libis
|
|
84
83
|
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
85
84
|
#
|
86
85
|
def process_output(output)
|
87
|
-
output.
|
86
|
+
output.reduce({}) do |results, x|
|
88
87
|
filepath = File.absolute_path(x.delete(:filepath)).freeze
|
89
88
|
results[filepath] ||= []
|
90
89
|
results[filepath] << annotate(x)
|
90
|
+
results
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
94
|
# Enhance the output with mimetype and score
|
95
95
|
def annotate(result)
|
96
96
|
# Enhance result with mimetype if needed
|
97
|
-
bad_mimetypes.include?(result[:mimetype]) &&
|
98
|
-
!bad_puids.include?(result[:puid]) &&
|
97
|
+
if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
|
99
98
|
result[:mimetype] = get_mimetype(result[:puid])
|
99
|
+
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
|
-
Libis::Format::
|
102
|
+
Libis::Format::Library.normalize(result)
|
103
103
|
|
104
104
|
# Default score is 5
|
105
105
|
result[:score] = 5
|
@@ -108,54 +108,55 @@ module Libis
|
|
108
108
|
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
109
109
|
|
110
110
|
# freeze all strings
|
111
|
-
result.each {
|
111
|
+
result.each {|_, v| v.freeze if v.is_a?(String)}
|
112
112
|
|
113
113
|
# Adapt score based on matchtype
|
114
114
|
result[:matchtype] = result[:matchtype].to_s.downcase
|
115
115
|
case result[:matchtype]
|
116
116
|
|
117
117
|
# Signature match increases score with 2
|
118
|
-
|
119
|
-
|
120
|
-
# typeinfo = ::Libis::Format::
|
118
|
+
when 'signature'
|
119
|
+
result[:score] += 2
|
120
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
|
-
|
126
|
-
|
127
|
-
# typeinfo = ::Libis::Format::
|
125
|
+
when 'container'
|
126
|
+
result[:score] += 4
|
127
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
131
131
|
# Extension match is the weakest identification; score is lowered by 2 points
|
132
|
-
|
133
|
-
|
132
|
+
when 'extension'
|
133
|
+
result[:score] -= 2
|
134
134
|
|
135
135
|
# Magic code (file tool) is to be trused even less
|
136
|
-
|
137
|
-
|
136
|
+
when 'magic'
|
137
|
+
result[:score] -= 3
|
138
138
|
|
139
|
+
# Or no change otherwise
|
140
|
+
else
|
141
|
+
# do nothing
|
139
142
|
end
|
140
143
|
|
141
144
|
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
142
145
|
# Office OpenXML, OpenDocument, jar, maff, svx)
|
143
|
-
|
146
|
+
if result[:mimetype] == 'application/zip'
|
147
|
+
result[:score] -= 2
|
148
|
+
end
|
144
149
|
|
145
150
|
# Return result enhanced with mimetype and score fields
|
146
151
|
result
|
147
152
|
end
|
148
153
|
|
149
154
|
def get_mimetype(puid)
|
150
|
-
::Libis::Format::
|
151
|
-
rescue StandardError
|
152
|
-
nil
|
155
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
|
153
156
|
end
|
154
157
|
|
155
158
|
def get_puid(mimetype)
|
156
|
-
::Libis::Format::
|
157
|
-
rescue StandardError
|
158
|
-
nil
|
159
|
+
::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
|
159
160
|
end
|
160
161
|
|
161
162
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -169,6 +170,7 @@ module Libis
|
|
169
170
|
@bad_mimetypes << mimetype
|
170
171
|
end
|
171
172
|
end
|
173
|
+
|
172
174
|
end
|
173
175
|
end
|
174
176
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,61 +9,51 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class OfficeToPdf
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.run(source, target, **options)
|
23
|
-
new.run source, target, **options
|
16
|
+
def self.run(source, target, options = {})
|
17
|
+
self.new.run source, target, options
|
24
18
|
end
|
25
19
|
|
26
|
-
def run(source, target,
|
20
|
+
def run(source, target, options = {})
|
27
21
|
workdir = '/...'
|
28
22
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
23
|
|
30
|
-
workdir = File.join(workdir, rand(
|
24
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
25
|
FileUtils.mkpath(workdir)
|
32
26
|
|
33
27
|
src_file = File.join(workdir, File.basename(source))
|
34
28
|
FileUtils.symlink source, src_file
|
35
29
|
|
36
|
-
tgt_file = File.join(workdir,
|
30
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
|
37
31
|
|
38
32
|
export_filter = options[:export_filter] || 'pdf'
|
39
33
|
|
40
34
|
timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
|
41
35
|
result = Libis::Tools::Command.run(
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
37
|
+
"-env:UserInstallation=file://#{workdir}",
|
38
|
+
'--convert-to', export_filter,
|
39
|
+
'--outdir', workdir, src_file,
|
40
|
+
timeout: timeout,
|
41
|
+
kill_after: timeout * 2
|
48
42
|
)
|
49
43
|
|
50
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
51
|
-
|
44
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
45
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
53
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
46
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
54
47
|
|
55
48
|
FileUtils.copy tgt_file, target, preserve: true
|
56
49
|
|
57
|
-
{
|
58
|
-
command: result,
|
59
|
-
files: [target]
|
60
|
-
}
|
61
50
|
ensure
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
nil
|
66
|
-
end
|
51
|
+
FileUtils.rmtree workdir rescue nil
|
52
|
+
|
53
|
+
result[:out]
|
67
54
|
end
|
68
55
|
end
|
56
|
+
|
69
57
|
end
|
70
58
|
end
|
71
59
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class PdfCopy
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_copy]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'CopyPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,44 +9,40 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfMerge
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.run(source, target, *options)
|
25
|
-
new.run source, target, options
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
26
18
|
end
|
27
19
|
|
28
|
-
def run(source, target,
|
20
|
+
def run(source, target, options = [])
|
29
21
|
source = [source] unless source.is_a?(Array)
|
30
22
|
|
31
23
|
if OS.java?
|
32
24
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
33
25
|
end
|
34
26
|
|
35
|
-
timeout = Libis::Format::Config[:timeouts][:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
27
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_merge]
|
28
|
+
result = Libis::Tools::Command.run(
|
29
|
+
Libis::Format::Config[:java_cmd],
|
30
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
31
|
+
'MergePdf',
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
*source,
|
35
|
+
timeout: timeout,
|
36
|
+
kill_after: timeout * 2
|
37
|
+
)
|
38
|
+
|
39
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
40
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
48
41
|
|
49
42
|
result
|
50
43
|
end
|
51
44
|
end
|
45
|
+
|
52
46
|
end
|
53
47
|
end
|
54
48
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,38 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfOptimizer
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, quality)
|
23
|
-
new.run source, target, quality
|
17
|
+
self.new.run source, target, quality
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, quality)
|
27
|
-
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
28
|
-
args = [
|
29
|
-
Libis::Format::Config[:ghostscript_cmd],
|
30
|
-
'-sDEVICE=pdfwrite',
|
31
|
-
'-dCompatibilityLevel=1.4',
|
32
|
-
"-dPDFSETTINGS=/#{quality}",
|
33
|
-
'-dNOPAUSE',
|
34
|
-
'-dBATCH',
|
35
|
-
"-sOutputFile=#{target}",
|
36
|
-
source.to_s
|
37
|
-
]
|
38
|
-
|
39
|
-
result = Libis::Tools::Command.run(*args, timeout:, kill_after: timeout * 2)
|
40
21
|
|
41
|
-
|
22
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
23
|
+
result = Libis::Tools::Command.run(
|
24
|
+
'gs',
|
25
|
+
'-sDEVICE=pdfwrite',
|
26
|
+
'-dCompatibilityLevel=1.4',
|
27
|
+
"-dPDFSETTINGS=/#{quality}",
|
28
|
+
'-dNOPAUSE',
|
29
|
+
'-dBATCH',
|
30
|
+
"-sOutputFile=#{target}",
|
31
|
+
"#{source}",
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
34
|
+
)
|
35
|
+
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
42
38
|
|
43
39
|
result
|
44
40
|
end
|
45
41
|
end
|
42
|
+
|
46
43
|
end
|
47
44
|
end
|
48
45
|
end
|
@@ -1,20 +1,47 @@
|
|
1
|
-
|
1
|
+
require 'os'
|
2
2
|
|
3
|
-
require 'libis/
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
4
8
|
|
5
9
|
module Libis
|
6
10
|
module Format
|
7
11
|
module Tool
|
12
|
+
|
8
13
|
class PdfSplit
|
14
|
+
include ::Libis::Tools::Logger
|
9
15
|
|
10
|
-
def self.run(source, target,
|
11
|
-
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
12
18
|
end
|
13
19
|
|
14
|
-
def run(source, target,
|
15
|
-
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'SplitPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
16
42
|
end
|
17
43
|
end
|
44
|
+
|
18
45
|
end
|
19
46
|
end
|
20
47
|
end
|
@@ -1,9 +1,6 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'tempfile'
|
4
2
|
require 'csv'
|
5
3
|
require 'fileutils'
|
6
|
-
require 'pdfinfo'
|
7
4
|
|
8
5
|
require 'libis/tools/extend/string'
|
9
6
|
require 'libis/tools/logger'
|
@@ -15,24 +12,19 @@ require 'libis/format'
|
|
15
12
|
module Libis
|
16
13
|
module Format
|
17
14
|
module Tool
|
15
|
+
|
18
16
|
class PdfToPdfa
|
19
17
|
include ::Libis::Tools::Logger
|
20
18
|
|
21
|
-
def self.
|
22
|
-
|
23
|
-
result.zero?
|
19
|
+
def self.run(source, target = nil, options = {})
|
20
|
+
self.new.run source, target, options
|
24
21
|
end
|
25
22
|
|
26
|
-
def
|
27
|
-
new.run source, target, **options
|
28
|
-
end
|
23
|
+
def run(source, target = nil, options = nil)
|
29
24
|
|
30
|
-
def run(source, target = nil, **options)
|
31
25
|
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
32
26
|
target ||= tmp_target
|
33
27
|
|
34
|
-
metadata = get_metadata(source)
|
35
|
-
|
36
28
|
icc_info = icc_options(options[:colorspace])
|
37
29
|
|
38
30
|
icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
|
@@ -40,58 +32,52 @@ module Libis
|
|
40
32
|
|
41
33
|
def_filename = Tools::TempFile.name('PDFA_def', '.ps')
|
42
34
|
File.open(def_filename, 'w') do |f|
|
43
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
|
44
|
-
|
45
|
-
|
46
|
-
.gsub('[**METADATA**]', metadata)
|
35
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
36
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
37
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
47
38
|
end
|
48
39
|
|
49
40
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
50
41
|
result = Libis::Tools::Command.run(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
source,
|
63
|
-
timeout:,
|
64
|
-
kill_after: timeout * 2
|
42
|
+
Libis::Format::Config[:ghostscript_cmd],
|
43
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
44
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
45
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
46
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
47
|
+
"-sOutputICCProfile=#{icc_file}",
|
48
|
+
'-o', File.absolute_path(target),
|
49
|
+
def_filename,
|
50
|
+
source,
|
51
|
+
timeout: timeout,
|
52
|
+
kill_after: timeout * 2
|
65
53
|
)
|
66
54
|
|
67
|
-
|
55
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
68
56
|
|
69
57
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
70
58
|
|
59
|
+
unless Format::Tool::PdfaValidator.run(target)
|
60
|
+
result[:status] = -999
|
61
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
62
|
+
end
|
63
|
+
|
71
64
|
result
|
72
65
|
end
|
73
66
|
|
74
|
-
private
|
75
67
|
|
76
|
-
|
77
|
-
info = Pdfinfo.new(source)
|
78
|
-
metadata = "/Title (#{info.title})"
|
79
|
-
metadata += "\n /Author (#{info.author})" if info.author
|
80
|
-
metadata += "\n /Subject (#{info.subject})" if info.subject
|
81
|
-
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
82
|
-
metadata += "\n /Creator (#{info.creator})" if info.creator
|
83
|
-
metadata
|
84
|
-
end
|
68
|
+
private
|
85
69
|
|
86
70
|
def icc_options(colorspace)
|
87
71
|
case colorspace.to_s.downcase
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
72
|
+
when 'cmyk'
|
73
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
74
|
+
else
|
75
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
92
76
|
end
|
93
77
|
end
|
78
|
+
|
94
79
|
end
|
80
|
+
|
95
81
|
end
|
96
82
|
end
|
97
83
|
end
|