libis-format 1.3.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +40 -153
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +83 -125
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/tools/extend/string'
|
4
2
|
require 'libis/tools/command'
|
5
3
|
|
@@ -11,33 +9,36 @@ require_relative 'identification_tool'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class Fido < Libis::Format::Tool::IdentificationTool
|
14
|
+
|
15
15
|
def self.add_formats(formats_file)
|
16
|
-
instance.formats << formats_file unless instance.formats.include?(formats_file)
|
16
|
+
self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.del_formats(formats_file)
|
20
|
-
instance.formats.delete(formats_file)
|
20
|
+
self.instance.formats.delete(formats_file)
|
21
21
|
end
|
22
22
|
|
23
23
|
attr_reader :formats
|
24
24
|
|
25
|
-
def run_list(filelist,
|
25
|
+
def run_list(filelist, options = {})
|
26
26
|
create_list_file(filelist) do |list_file|
|
27
|
-
output = runner(nil, '-input', list_file.escape_for_string,
|
27
|
+
output = runner(nil, '-input', list_file.escape_for_string, options)
|
28
28
|
process_output(output)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
def run_dir(dir, recursive = true,
|
32
|
+
def run_dir(dir, recursive = true, options = {})
|
33
33
|
args = []
|
34
34
|
args << '-recurse' if recursive
|
35
|
-
|
35
|
+
args << options
|
36
|
+
output = runner(dir, *args)
|
36
37
|
process_output(output)
|
37
38
|
end
|
38
39
|
|
39
|
-
def run(file,
|
40
|
-
output = runner(file,
|
40
|
+
def run(file, options = {})
|
41
|
+
output = runner(file, options)
|
41
42
|
process_output(output)
|
42
43
|
end
|
43
44
|
|
@@ -52,9 +53,11 @@ module Libis
|
|
52
53
|
|
53
54
|
attr_writer :formats
|
54
55
|
|
55
|
-
def runner(filename, *args
|
56
|
-
|
57
|
-
|
56
|
+
def runner(filename, *args)
|
57
|
+
options = {}
|
58
|
+
options = args.pop if args.last.is_a?(Hash)
|
59
|
+
# Load custome format definitions if present
|
60
|
+
args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
|
58
61
|
|
59
62
|
# Workaround for Fido performance bug
|
60
63
|
args << '-bufsize' << (options[:bufsize] || 1000).to_s
|
@@ -65,7 +68,7 @@ module Libis
|
|
65
68
|
args << '-nocontainer' if options[:nocontainer]
|
66
69
|
|
67
70
|
# Add filename to argument list (optional)
|
68
|
-
args << filename.escape_for_string
|
71
|
+
args << "#{filename.escape_for_string}" if filename
|
69
72
|
|
70
73
|
# No header output
|
71
74
|
args << '-q'
|
@@ -73,20 +76,20 @@ module Libis
|
|
73
76
|
# Run command and capture results
|
74
77
|
timeout = Libis::Format::Config[:timeouts][:fido]
|
75
78
|
result = ::Libis::Tools::Command.run(
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
+
Libis::Format::Config[:fido_cmd], *args,
|
80
|
+
timeout: timeout,
|
81
|
+
kill_after: timeout * 2
|
79
82
|
)
|
80
83
|
|
81
84
|
# Log warning if needed
|
82
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
83
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
85
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
86
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
84
87
|
|
85
88
|
# Parse output (CSV) text into array and return result
|
86
|
-
keys =
|
89
|
+
keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
|
87
90
|
data = CSV.parse(result[:out].join("\n"))
|
88
|
-
|
89
|
-
|
91
|
+
.map {|a| Hash[keys.zip(a)]}
|
92
|
+
.select {|a| a[:status] == 'OK'}
|
90
93
|
data.each do |r|
|
91
94
|
r.delete(:time)
|
92
95
|
r.delete(:status)
|
@@ -94,7 +97,9 @@ module Libis
|
|
94
97
|
r[:tool] = :fido
|
95
98
|
end
|
96
99
|
end
|
100
|
+
|
97
101
|
end
|
102
|
+
|
98
103
|
end
|
99
104
|
end
|
100
105
|
end
|
@@ -1,38 +1,49 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'identification_tool'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
class FileTool < Libis::Format::Tool::IdentificationTool
|
9
|
-
|
8
|
+
|
9
|
+
def run_list(filelist, _options = {})
|
10
|
+
|
10
11
|
create_list_file(filelist) do |list_file|
|
12
|
+
|
11
13
|
output = runner(nil, '--files-from', list_file)
|
12
14
|
|
13
15
|
process_output(output)
|
16
|
+
|
14
17
|
end
|
18
|
+
|
15
19
|
end
|
16
20
|
|
17
|
-
def run_dir(dir, recursive = true,
|
21
|
+
def run_dir(dir, recursive = true, _options = {})
|
22
|
+
|
18
23
|
filelist = find_files(dir, recursive)
|
19
24
|
|
20
25
|
create_list_file(filelist) do |list_file|
|
26
|
+
|
21
27
|
output = runner(nil, '--files-from', list_file)
|
22
28
|
|
23
29
|
process_output(output)
|
30
|
+
|
24
31
|
end
|
32
|
+
|
25
33
|
end
|
26
34
|
|
27
|
-
def run(file,
|
35
|
+
def run(file, _options = {})
|
36
|
+
|
28
37
|
output = runner(file)
|
29
38
|
|
30
39
|
process_output(output)
|
40
|
+
|
31
41
|
end
|
32
42
|
|
33
43
|
protected
|
34
44
|
|
35
45
|
def runner(filename, *args)
|
46
|
+
|
36
47
|
# Create new argument list
|
37
48
|
opts = []
|
38
49
|
|
@@ -50,21 +61,23 @@ module Libis
|
|
50
61
|
# Run the UNIX file command and capture the results
|
51
62
|
timeout = Libis::Format::Config[:timeouts][:file_tool]
|
52
63
|
result = ::Libis::Tools::Command.run(
|
53
|
-
|
54
|
-
|
55
|
-
|
64
|
+
'file', *opts,
|
65
|
+
timeout: timeout,
|
66
|
+
kill_after: timeout * 2
|
56
67
|
)
|
57
68
|
|
58
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
59
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
69
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
70
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
60
71
|
|
61
72
|
# Parse output text into array and return result
|
62
73
|
result[:out].map do |line|
|
63
74
|
r = line.split(/:\s+/)
|
64
|
-
{
|
75
|
+
{filepath: r[0], mimetype: r[1], matchtype: 'magic', tool: :file}
|
65
76
|
end
|
66
77
|
end
|
78
|
+
|
67
79
|
end
|
80
|
+
|
68
81
|
end
|
69
82
|
end
|
70
83
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,46 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class FopPdf
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:fop_jar])
|
16
|
+
def self.run(xml, target, options = [])
|
17
|
+
self.new.run xml, target, options
|
22
18
|
end
|
23
19
|
|
24
|
-
def
|
25
|
-
new.run xml, target
|
26
|
-
end
|
20
|
+
def run(xml, target, options = [])
|
27
21
|
|
28
|
-
def run(xml, target)
|
29
22
|
if OS.java?
|
30
23
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
31
24
|
end
|
32
25
|
|
33
26
|
timeout = Libis::Format::Config[:timeouts][:fop]
|
34
27
|
result = Libis::Tools::Command.run(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
kill_after: timeout * 2
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
"-Dfop.home=#{File.dirname(Libis::Format::Config[:fop_jar])}",
|
30
|
+
'-jar', Libis::Format::Config[:fop_jar],
|
31
|
+
'-fo', xml,
|
32
|
+
'-pdf', target,
|
33
|
+
timeout: timeout,
|
34
|
+
kill_after: timeout * 2
|
43
35
|
)
|
44
36
|
|
45
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
37
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
38
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
47
39
|
|
48
|
-
{
|
49
|
-
command: result,
|
50
|
-
files: [target]
|
51
|
-
}
|
52
40
|
end
|
53
41
|
end
|
42
|
+
|
54
43
|
end
|
55
44
|
end
|
56
45
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'csv'
|
4
2
|
require 'tmpdir'
|
5
3
|
|
@@ -9,46 +7,47 @@ require 'libis/tools/logger'
|
|
9
7
|
require 'libis/tools/command'
|
10
8
|
|
11
9
|
require 'libis/format/config'
|
12
|
-
require 'libis/format/
|
10
|
+
require 'libis/format/library'
|
13
11
|
|
14
12
|
module Libis
|
15
13
|
module Format
|
16
14
|
module Tool
|
17
|
-
|
15
|
+
|
16
|
+
class IdentificationTool
|
18
17
|
include Singleton
|
19
18
|
include ::Libis::Tools::Logger
|
20
19
|
|
21
20
|
def self.bad_mimetype(mimetype)
|
22
|
-
instance.bad_mimetype(mimetype)
|
21
|
+
self.instance.bad_mimetype(mimetype)
|
23
22
|
end
|
24
23
|
|
25
|
-
def self.run(file, recursive = false,
|
24
|
+
def self.run(file, recursive = false, options = {})
|
25
|
+
options ||= {}
|
26
26
|
if file.is_a?(Array)
|
27
|
-
return run_list file,
|
28
|
-
elsif file.is_a?(String) && File.
|
27
|
+
return run_list file, options
|
28
|
+
elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
|
29
29
|
if File.directory?(file)
|
30
|
-
return run_dir(file, recursive,
|
30
|
+
return run_dir(file, recursive, options)
|
31
31
|
elsif File.file?(file)
|
32
|
-
return instance.run(file,
|
32
|
+
return self.instance.run(file, options)
|
33
33
|
end
|
34
34
|
end
|
35
|
-
|
36
35
|
raise ArgumentError,
|
37
36
|
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
38
37
|
end
|
39
38
|
|
40
|
-
def self.run_dir(file, recursive = true,
|
41
|
-
instance.run_dir file, recursive,
|
39
|
+
def self.run_dir(file, recursive = true, options = {})
|
40
|
+
self.instance.run_dir file, recursive, options
|
42
41
|
end
|
43
42
|
|
44
|
-
def self.run_list(filelist,
|
45
|
-
instance.run_list filelist,
|
43
|
+
def self.run_list(filelist , options = {})
|
44
|
+
self.instance.run_list filelist, options
|
46
45
|
end
|
47
46
|
|
48
47
|
protected
|
49
48
|
|
50
49
|
def create_list_file(filelist)
|
51
|
-
list_file = Tempfile.new(%w
|
50
|
+
list_file = Tempfile.new(%w'file .list')
|
52
51
|
filelist.each do |fname|
|
53
52
|
list_file.write "#{fname}\n"
|
54
53
|
end
|
@@ -84,22 +83,23 @@ module Libis
|
|
84
83
|
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
85
84
|
#
|
86
85
|
def process_output(output)
|
87
|
-
output.
|
86
|
+
output.reduce({}) do |results, x|
|
88
87
|
filepath = File.absolute_path(x.delete(:filepath)).freeze
|
89
88
|
results[filepath] ||= []
|
90
89
|
results[filepath] << annotate(x)
|
90
|
+
results
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
94
|
# Enhance the output with mimetype and score
|
95
95
|
def annotate(result)
|
96
96
|
# Enhance result with mimetype if needed
|
97
|
-
bad_mimetypes.include?(result[:mimetype]) &&
|
98
|
-
!bad_puids.include?(result[:puid]) &&
|
97
|
+
if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
|
99
98
|
result[:mimetype] = get_mimetype(result[:puid])
|
99
|
+
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
|
-
Libis::Format::
|
102
|
+
Libis::Format::Library.normalize(result)
|
103
103
|
|
104
104
|
# Default score is 5
|
105
105
|
result[:score] = 5
|
@@ -108,54 +108,55 @@ module Libis
|
|
108
108
|
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
109
109
|
|
110
110
|
# freeze all strings
|
111
|
-
result.each {
|
111
|
+
result.each {|_, v| v.freeze if v.is_a?(String)}
|
112
112
|
|
113
113
|
# Adapt score based on matchtype
|
114
114
|
result[:matchtype] = result[:matchtype].to_s.downcase
|
115
115
|
case result[:matchtype]
|
116
116
|
|
117
117
|
# Signature match increases score with 2
|
118
|
-
|
119
|
-
|
120
|
-
# typeinfo = ::Libis::Format::
|
118
|
+
when 'signature'
|
119
|
+
result[:score] += 2
|
120
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
|
-
|
126
|
-
|
127
|
-
# typeinfo = ::Libis::Format::
|
125
|
+
when 'container'
|
126
|
+
result[:score] += 4
|
127
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
131
131
|
# Extension match is the weakest identification; score is lowered by 2 points
|
132
|
-
|
133
|
-
|
132
|
+
when 'extension'
|
133
|
+
result[:score] -= 2
|
134
134
|
|
135
135
|
# Magic code (file tool) is to be trused even less
|
136
|
-
|
137
|
-
|
136
|
+
when 'magic'
|
137
|
+
result[:score] -= 3
|
138
138
|
|
139
|
+
# Or no change otherwise
|
140
|
+
else
|
141
|
+
# do nothing
|
139
142
|
end
|
140
143
|
|
141
144
|
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
142
145
|
# Office OpenXML, OpenDocument, jar, maff, svx)
|
143
|
-
|
146
|
+
if result[:mimetype] == 'application/zip'
|
147
|
+
result[:score] -= 2
|
148
|
+
end
|
144
149
|
|
145
150
|
# Return result enhanced with mimetype and score fields
|
146
151
|
result
|
147
152
|
end
|
148
153
|
|
149
154
|
def get_mimetype(puid)
|
150
|
-
::Libis::Format::
|
151
|
-
rescue StandardError
|
152
|
-
nil
|
155
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
|
153
156
|
end
|
154
157
|
|
155
158
|
def get_puid(mimetype)
|
156
|
-
::Libis::Format::
|
157
|
-
rescue StandardError
|
158
|
-
nil
|
159
|
+
::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
|
159
160
|
end
|
160
161
|
|
161
162
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -169,6 +170,7 @@ module Libis
|
|
169
170
|
@bad_mimetypes << mimetype
|
170
171
|
end
|
171
172
|
end
|
173
|
+
|
172
174
|
end
|
173
175
|
end
|
174
176
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,61 +9,51 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class OfficeToPdf
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.run(source, target, **options)
|
23
|
-
new.run source, target, **options
|
16
|
+
def self.run(source, target, options = {})
|
17
|
+
self.new.run source, target, options
|
24
18
|
end
|
25
19
|
|
26
|
-
def run(source, target,
|
20
|
+
def run(source, target, options = {})
|
27
21
|
workdir = '/...'
|
28
22
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
23
|
|
30
|
-
workdir = File.join(workdir, rand(
|
24
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
25
|
FileUtils.mkpath(workdir)
|
32
26
|
|
33
27
|
src_file = File.join(workdir, File.basename(source))
|
34
28
|
FileUtils.symlink source, src_file
|
35
29
|
|
36
|
-
tgt_file = File.join(workdir,
|
30
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
|
37
31
|
|
38
32
|
export_filter = options[:export_filter] || 'pdf'
|
39
33
|
|
40
34
|
timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
|
41
35
|
result = Libis::Tools::Command.run(
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
37
|
+
"-env:UserInstallation=file://#{workdir}",
|
38
|
+
'--convert-to', export_filter,
|
39
|
+
'--outdir', workdir, src_file,
|
40
|
+
timeout: timeout,
|
41
|
+
kill_after: timeout * 2
|
48
42
|
)
|
49
43
|
|
50
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
51
|
-
|
44
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
45
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
53
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
46
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
54
47
|
|
55
48
|
FileUtils.copy tgt_file, target, preserve: true
|
56
49
|
|
57
|
-
{
|
58
|
-
command: result,
|
59
|
-
files: [target]
|
60
|
-
}
|
61
50
|
ensure
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
nil
|
66
|
-
end
|
51
|
+
FileUtils.rmtree workdir rescue nil
|
52
|
+
|
53
|
+
result[:out]
|
67
54
|
end
|
68
55
|
end
|
56
|
+
|
69
57
|
end
|
70
58
|
end
|
71
59
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class PdfCopy
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_copy]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'CopyPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,44 +9,40 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfMerge
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.run(source, target, *options)
|
25
|
-
new.run source, target, options
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
26
18
|
end
|
27
19
|
|
28
|
-
def run(source, target,
|
20
|
+
def run(source, target, options = [])
|
29
21
|
source = [source] unless source.is_a?(Array)
|
30
22
|
|
31
23
|
if OS.java?
|
32
24
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
33
25
|
end
|
34
26
|
|
35
|
-
timeout = Libis::Format::Config[:timeouts][:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
27
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_merge]
|
28
|
+
result = Libis::Tools::Command.run(
|
29
|
+
Libis::Format::Config[:java_cmd],
|
30
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
31
|
+
'MergePdf',
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
*source,
|
35
|
+
timeout: timeout,
|
36
|
+
kill_after: timeout * 2
|
37
|
+
)
|
38
|
+
|
39
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
40
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
48
41
|
|
49
42
|
result
|
50
43
|
end
|
51
44
|
end
|
45
|
+
|
52
46
|
end
|
53
47
|
end
|
54
48
|
end
|