libis-format 0.9.41 → 0.9.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +365 -0
- data/bin/droid +1 -1
- data/bin/fido +1 -1
- data/bin/pdf_copy +1 -1
- data/lib/libis/format/config.rb +1 -0
- data/lib/libis/format/converter/audio_converter.rb +1 -1
- data/lib/libis/format/converter/base.rb +2 -1
- data/lib/libis/format/converter/office_converter.rb +2 -2
- data/lib/libis/format/converter/pdf_converter.rb +6 -6
- data/lib/libis/format/converter/video_converter.rb +96 -2
- data/lib/libis/format/identifier.rb +12 -12
- data/lib/libis/format/tool/droid.rb +108 -0
- data/lib/libis/format/tool/extension_identification.rb +58 -0
- data/lib/libis/format/tool/ffmpeg.rb +43 -0
- data/lib/libis/format/tool/fido.rb +91 -0
- data/lib/libis/format/tool/file_tool.rb +78 -0
- data/lib/libis/format/tool/identification_tool.rb +175 -0
- data/lib/libis/format/tool/office_to_pdf.rb +54 -0
- data/lib/libis/format/tool/pdf_copy.rb +42 -0
- data/lib/libis/format/tool/pdf_merge.rb +43 -0
- data/lib/libis/format/tool/pdf_optimizer.rb +38 -0
- data/lib/libis/format/tool/pdf_split.rb +41 -0
- data/lib/libis/format/tool/pdf_to_pdfa.rb +78 -0
- data/lib/libis/format/tool/pdfa_validator.rb +63 -0
- data/lib/libis/format/tool.rb +23 -0
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +1 -15
- data/libis-format.gemspec +1 -2
- data/spec/converter_audio_spec.rb +66 -0
- data/spec/converter_image_spec.rb +166 -0
- data/spec/converter_office_spec.rb +84 -0
- data/spec/converter_pdf_spec.rb +30 -0
- data/spec/converter_repository_spec.rb +91 -0
- data/spec/converter_video_spec.rb +97 -0
- data/spec/data/video/copyright.png +0 -0
- data/spec/identifier_spec.rb +3 -15
- metadata +32 -33
- data/lib/libis/format/droid.rb +0 -106
- data/lib/libis/format/extension_identification.rb +0 -55
- data/lib/libis/format/ffmpeg.rb +0 -41
- data/lib/libis/format/fido.rb +0 -89
- data/lib/libis/format/file_tool.rb +0 -76
- data/lib/libis/format/identification_tool.rb +0 -174
- data/lib/libis/format/office_to_pdf.rb +0 -52
- data/lib/libis/format/pdf_copy.rb +0 -40
- data/lib/libis/format/pdf_merge.rb +0 -41
- data/lib/libis/format/pdf_optimizer.rb +0 -36
- data/lib/libis/format/pdf_split.rb +0 -39
- data/lib/libis/format/pdf_to_pdfa.rb +0 -74
- data/lib/libis/format/pdfa_validator.rb +0 -61
- data/spec/converter_spec.rb +0 -433
data/lib/libis/format/fido.rb
DELETED
@@ -1,89 +0,0 @@
|
|
1
|
-
require 'libis/tools/extend/string'
|
2
|
-
require 'libis/tools/command'
|
3
|
-
|
4
|
-
require 'csv'
|
5
|
-
require 'libis/format/config'
|
6
|
-
|
7
|
-
require_relative 'identification_tool'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class Fido < Libis::Format::IdentificationTool
|
13
|
-
|
14
|
-
def self.add_formats(formats_file)
|
15
|
-
self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.del_formats(formats_file)
|
19
|
-
self.instance.formats.delete(formats_file)
|
20
|
-
end
|
21
|
-
|
22
|
-
attr_reader :formats
|
23
|
-
|
24
|
-
def run_list(filelist)
|
25
|
-
create_list_file(filelist) do |list_file|
|
26
|
-
output = runner(nil, '-input', list_file.escape_for_string)
|
27
|
-
process_output(output)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def run_dir(dir, recursive = true)
|
32
|
-
args = []
|
33
|
-
args << '-recurse' if recursive
|
34
|
-
output = runner(dir, *args)
|
35
|
-
process_output(output)
|
36
|
-
end
|
37
|
-
|
38
|
-
def run(file)
|
39
|
-
output = runner(file)
|
40
|
-
process_output(output)
|
41
|
-
end
|
42
|
-
|
43
|
-
protected
|
44
|
-
|
45
|
-
def initialize
|
46
|
-
super
|
47
|
-
@formats = Libis::Format::Config[:fido_formats].dup
|
48
|
-
bad_mimetype('application/vnd.oasis.opendocument.text')
|
49
|
-
bad_mimetype('application/vnd.oasis.opendocument.spreadsheet')
|
50
|
-
end
|
51
|
-
|
52
|
-
attr_writer :formats
|
53
|
-
|
54
|
-
def runner(filename, *args)
|
55
|
-
# Load custome format definitions if present
|
56
|
-
args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
|
57
|
-
|
58
|
-
# Workaround for Fido performance bug
|
59
|
-
args << '-bufsize' << '1000'
|
60
|
-
|
61
|
-
# Add filename to argument list (optional)
|
62
|
-
args << "#{filename.escape_for_string}" if filename
|
63
|
-
|
64
|
-
# No header output
|
65
|
-
args << '-q'
|
66
|
-
|
67
|
-
# Run command and capture results
|
68
|
-
fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
|
69
|
-
|
70
|
-
# Log warning if needed
|
71
|
-
raise RuntimeError, "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
|
72
|
-
|
73
|
-
# Parse output (CSV) text into array and return result
|
74
|
-
keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
|
75
|
-
result = CSV.parse(fido[:out].join("\n"))
|
76
|
-
.map {|a| Hash[keys.zip(a)]}
|
77
|
-
.select {|a| a[:status] == 'OK'}
|
78
|
-
result.each do |r|
|
79
|
-
r.delete(:time)
|
80
|
-
r.delete(:status)
|
81
|
-
r.delete(:filesize)
|
82
|
-
r[:source] = :fido
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
89
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require_relative 'identification_tool'
|
2
|
-
|
3
|
-
module Libis
|
4
|
-
module Format
|
5
|
-
|
6
|
-
class FileTool < Libis::Format::IdentificationTool
|
7
|
-
|
8
|
-
def run_list(filelist)
|
9
|
-
|
10
|
-
create_list_file(filelist) do |list_file|
|
11
|
-
|
12
|
-
output = runner(nil, '--files-from', list_file)
|
13
|
-
|
14
|
-
process_output(output)
|
15
|
-
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
def run_dir(dir, recursive = true)
|
21
|
-
|
22
|
-
filelist = find_files(dir, recursive)
|
23
|
-
|
24
|
-
create_list_file(filelist) do |list_file|
|
25
|
-
|
26
|
-
output = runner(nil, '--files-from', list_file)
|
27
|
-
|
28
|
-
process_output(output)
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
def run(file)
|
35
|
-
|
36
|
-
output = runner(file)
|
37
|
-
|
38
|
-
process_output(output)
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
protected
|
43
|
-
|
44
|
-
def runner(filename, *args)
|
45
|
-
|
46
|
-
# Create new argument list
|
47
|
-
opts = []
|
48
|
-
|
49
|
-
# Add fixed options
|
50
|
-
# -L : follow symlinks
|
51
|
-
# --mime-type : only print MIME type
|
52
|
-
opts << '-L' << '--mime-type'
|
53
|
-
|
54
|
-
# Append passed arguments
|
55
|
-
opts += args
|
56
|
-
|
57
|
-
# Finally add the filename to process
|
58
|
-
opts << filename.escape_for_string if filename
|
59
|
-
|
60
|
-
# Run the UNIX file command and capture the results
|
61
|
-
file_tool = ::Libis::Tools::Command.run('file', *opts)
|
62
|
-
|
63
|
-
raise RuntimeError, "File command errors: #{file_tool[:err].join("\n")}" unless file_tool[:err].empty?
|
64
|
-
|
65
|
-
|
66
|
-
# Parse output text into array and return result
|
67
|
-
file_tool[:out].map do |line|
|
68
|
-
r = line.split(/:\s+/)
|
69
|
-
{filepath: r[0], mimetype: r[1], matchtype: 'magic', source: :file}
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
|
75
|
-
end
|
76
|
-
end
|
@@ -1,174 +0,0 @@
|
|
1
|
-
require 'csv'
|
2
|
-
require 'tmpdir'
|
3
|
-
|
4
|
-
require 'singleton'
|
5
|
-
require 'libis/tools/extend/string'
|
6
|
-
require 'libis/tools/logger'
|
7
|
-
require 'libis/tools/command'
|
8
|
-
|
9
|
-
require 'libis/format/config'
|
10
|
-
require 'libis/format/type_database'
|
11
|
-
|
12
|
-
module Libis
|
13
|
-
module Format
|
14
|
-
|
15
|
-
class IdentificationTool
|
16
|
-
include Singleton
|
17
|
-
include ::Libis::Tools::Logger
|
18
|
-
|
19
|
-
def self.bad_mimetype(mimetype)
|
20
|
-
self.instance.bad_mimetype(mimetype)
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.run(file, recursive = false)
|
24
|
-
if file.is_a?(Array)
|
25
|
-
return run_list file
|
26
|
-
elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
|
27
|
-
if File.directory?(file)
|
28
|
-
return run_dir(file, recursive)
|
29
|
-
elsif File.file?(file)
|
30
|
-
return self.instance.run(file)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
raise ArgumentError,
|
34
|
-
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
35
|
-
end
|
36
|
-
|
37
|
-
def self.run_dir(file, recursive = true)
|
38
|
-
self.instance.run_dir file, recursive
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.run_list(filelist)
|
42
|
-
self.instance.run_list filelist
|
43
|
-
end
|
44
|
-
|
45
|
-
protected
|
46
|
-
|
47
|
-
def create_list_file(filelist)
|
48
|
-
list_file = Dir::Tmpname.make_tmpname(%w'file .list', nil)
|
49
|
-
File.open(list_file, 'w') do |f|
|
50
|
-
filelist.each do |fname|
|
51
|
-
f.write "#{fname}\n"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
yield(list_file)
|
55
|
-
ensure
|
56
|
-
File.delete(list_file)
|
57
|
-
end
|
58
|
-
|
59
|
-
def find_files(dir, recurse = true)
|
60
|
-
args = []
|
61
|
-
args << '-L'
|
62
|
-
args << dir.escape_for_string
|
63
|
-
args << '-maxdepth' << '1' unless recurse
|
64
|
-
args << '-type' << 'f'
|
65
|
-
args << '-print'
|
66
|
-
output = ::Libis::Tools::Command.run('find', *args)
|
67
|
-
warn "Find command errors: #{output[:err].join("\n")}" unless output[:err].empty?
|
68
|
-
output[:out]
|
69
|
-
end
|
70
|
-
|
71
|
-
# Reformat output to make it easier to post-process and decide on the preferred format
|
72
|
-
#
|
73
|
-
# input format:
|
74
|
-
# [
|
75
|
-
# { filepath: <filename>, mimetype: <mimetype>, matchtype: <matchtype>, ... }
|
76
|
-
# ]
|
77
|
-
#
|
78
|
-
# output format:
|
79
|
-
# { <filename> => [<result>, ...], ... }
|
80
|
-
#
|
81
|
-
# <result> is the enchanced Hash output of the identification tool:
|
82
|
-
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
83
|
-
#
|
84
|
-
def process_output(output)
|
85
|
-
output.reduce({}) do |results, x|
|
86
|
-
filepath = x.delete(:filepath)
|
87
|
-
results[filepath] ||= []
|
88
|
-
results[filepath.freeze] << annotate(x)
|
89
|
-
results
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Enhance the output with mimetype and score
|
94
|
-
def annotate(result)
|
95
|
-
# Enhance result with mimetype if needed
|
96
|
-
if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
|
97
|
-
result[:mimetype] = get_mimetype(result[:puid])
|
98
|
-
end
|
99
|
-
|
100
|
-
# Normalize the mimetype
|
101
|
-
Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
|
102
|
-
|
103
|
-
# Default score is 5
|
104
|
-
result[:score] = 5
|
105
|
-
|
106
|
-
# Weak detection score is 1
|
107
|
-
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
108
|
-
|
109
|
-
# freeze all strings
|
110
|
-
result.each {|_, v| v.freeze if v.is_a?(String)}
|
111
|
-
|
112
|
-
# Adapt score based on matchtype
|
113
|
-
result[:matchtype] = result[:matchtype].to_s.downcase
|
114
|
-
case result[:matchtype]
|
115
|
-
|
116
|
-
# Signature match increases score with 2
|
117
|
-
when 'signature'
|
118
|
-
result[:score] += 2
|
119
|
-
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
120
|
-
# ext = File.extname(result[:filename])
|
121
|
-
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
122
|
-
|
123
|
-
# Container match increases score with 4
|
124
|
-
when 'container'
|
125
|
-
result[:score] += 4
|
126
|
-
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
127
|
-
# ext = File.extname(result[:filename])
|
128
|
-
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
129
|
-
|
130
|
-
# Extension match is the weakest identification; score is lowered by 2 points
|
131
|
-
when 'extension'
|
132
|
-
result[:score] -= 2
|
133
|
-
|
134
|
-
# Magic code (file tool) is to be trused even less
|
135
|
-
when 'magic'
|
136
|
-
result[:score] -= 3
|
137
|
-
|
138
|
-
# Or no change otherwise
|
139
|
-
else
|
140
|
-
# do nothing
|
141
|
-
end
|
142
|
-
|
143
|
-
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
144
|
-
# Office OpenXML, OpenDocument, jar, maff, svx)
|
145
|
-
if result[:mimetype] == 'application/zip'
|
146
|
-
result[:score] -= 2
|
147
|
-
end
|
148
|
-
|
149
|
-
# Return result enhanced with mimetype and score fields
|
150
|
-
result
|
151
|
-
end
|
152
|
-
|
153
|
-
def get_mimetype(puid)
|
154
|
-
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
|
155
|
-
end
|
156
|
-
|
157
|
-
def get_puid(mimetype)
|
158
|
-
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
|
159
|
-
end
|
160
|
-
|
161
|
-
attr_accessor :bad_mimetypes, :bad_puids
|
162
|
-
|
163
|
-
def initialize
|
164
|
-
@bad_mimetypes = [nil, '', 'None', 'application/octet-stream']
|
165
|
-
@bad_puids = [nil, 'fmt/unknown']
|
166
|
-
end
|
167
|
-
|
168
|
-
def bad_mimetype(mimetype)
|
169
|
-
@bad_mimetypes << mimetype
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
end
|
174
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
|
3
|
-
require 'libis/tools/extend/string'
|
4
|
-
require 'libis/tools/logger'
|
5
|
-
require 'libis/tools/command'
|
6
|
-
|
7
|
-
require 'libis/format/config'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class OfficeToPdf
|
13
|
-
include ::Libis::Tools::Logger
|
14
|
-
|
15
|
-
def self.run(source, target, options = {})
|
16
|
-
self.new.run source, target, options
|
17
|
-
end
|
18
|
-
|
19
|
-
def run(source, target, options = {})
|
20
|
-
workdir = '/...'
|
21
|
-
workdir = Dir.tmpdir unless Dir.exist? workdir
|
22
|
-
|
23
|
-
workdir = File.join(workdir, rand(1000000).to_s)
|
24
|
-
FileUtils.mkpath(workdir)
|
25
|
-
|
26
|
-
src_file = File.join(workdir, File.basename(source))
|
27
|
-
FileUtils.symlink source, src_file
|
28
|
-
|
29
|
-
tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
|
30
|
-
|
31
|
-
export_filter = options[:export_filter] || 'pdf'
|
32
|
-
|
33
|
-
result = Libis::Tools::Command.run(
|
34
|
-
Libis::Format::Config[:soffice_path], '--headless',
|
35
|
-
'--convert-to', export_filter,
|
36
|
-
'--outdir', workdir, src_file
|
37
|
-
)
|
38
|
-
|
39
|
-
unless result[:status] == 0
|
40
|
-
warn "PdfConvert errors: #{(result[:err] + result[:out]).join("\n")}"
|
41
|
-
return false
|
42
|
-
end
|
43
|
-
|
44
|
-
FileUtils.copy tgt_file, target, preserve: true
|
45
|
-
FileUtils.rmtree workdir
|
46
|
-
|
47
|
-
result[:out]
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
end
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'os'
|
2
|
-
|
3
|
-
require 'libis/tools/extend/string'
|
4
|
-
require 'libis/tools/logger'
|
5
|
-
require 'libis/tools/command'
|
6
|
-
|
7
|
-
require 'libis/format/config'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class PdfCopy
|
13
|
-
include ::Libis::Tools::Logger
|
14
|
-
|
15
|
-
def self.run(source, target, options = [])
|
16
|
-
self.new.run source, target, options
|
17
|
-
end
|
18
|
-
|
19
|
-
def run(source, target, options = [])
|
20
|
-
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
21
|
-
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
22
|
-
|
23
|
-
if OS.java?
|
24
|
-
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
25
|
-
end
|
26
|
-
|
27
|
-
Libis::Tools::Command.run(
|
28
|
-
Libis::Format::Config[:java_path],
|
29
|
-
'-cp', jar_file,
|
30
|
-
'CopyPdf',
|
31
|
-
'--file_input', source,
|
32
|
-
'--file_output', target,
|
33
|
-
*options
|
34
|
-
)
|
35
|
-
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
end
|
40
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
require 'os'
|
2
|
-
|
3
|
-
require 'libis/tools/extend/string'
|
4
|
-
require 'libis/tools/logger'
|
5
|
-
require 'libis/tools/command'
|
6
|
-
|
7
|
-
require 'libis/format/config'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class PdfMerge
|
13
|
-
include ::Libis::Tools::Logger
|
14
|
-
|
15
|
-
def self.run(source, target, options = [])
|
16
|
-
self.new.run source, target, options
|
17
|
-
end
|
18
|
-
|
19
|
-
def run(source, target, options = [])
|
20
|
-
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
21
|
-
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
22
|
-
source = [source] unless source.is_a?(Array)
|
23
|
-
|
24
|
-
if OS.java?
|
25
|
-
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
26
|
-
end
|
27
|
-
|
28
|
-
Libis::Tools::Command.run(
|
29
|
-
Libis::Format::Config[:java_path],
|
30
|
-
'-cp', jar_file,
|
31
|
-
'MergePdf',
|
32
|
-
'--file_output', target,
|
33
|
-
*options,
|
34
|
-
*source,
|
35
|
-
)
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'os'
|
2
|
-
|
3
|
-
require 'libis/tools/extend/string'
|
4
|
-
require 'libis/tools/logger'
|
5
|
-
require 'libis/tools/command'
|
6
|
-
|
7
|
-
require 'libis/format/config'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class PdfOptimizer
|
13
|
-
include ::Libis::Tools::Logger
|
14
|
-
|
15
|
-
def self.run(source, target, quality)
|
16
|
-
self.new.run source, target, quality
|
17
|
-
end
|
18
|
-
|
19
|
-
def run(source, target, quality)
|
20
|
-
|
21
|
-
Libis::Tools::Command.run(
|
22
|
-
'gs',
|
23
|
-
'-sDEVICE=pdfwrite',
|
24
|
-
'-dCompatibilityLevel=1.4',
|
25
|
-
"-dPDFSETTINGS=/#{quality}",
|
26
|
-
'-dNOPAUSE',
|
27
|
-
'-dBATCH',
|
28
|
-
"-sOutputFile=#{target}",
|
29
|
-
"#{source}"
|
30
|
-
)
|
31
|
-
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'os'
|
2
|
-
|
3
|
-
require 'libis/tools/extend/string'
|
4
|
-
require 'libis/tools/logger'
|
5
|
-
require 'libis/tools/command'
|
6
|
-
|
7
|
-
require 'libis/format/config'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class PdfSplit
|
13
|
-
include ::Libis::Tools::Logger
|
14
|
-
|
15
|
-
def self.run(source, target, options = [])
|
16
|
-
self.new.run source, target, options
|
17
|
-
end
|
18
|
-
|
19
|
-
def run(source, target, options = [])
|
20
|
-
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
21
|
-
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
22
|
-
if OS.java?
|
23
|
-
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
-
end
|
25
|
-
|
26
|
-
Libis::Tools::Command.run(
|
27
|
-
Libis::Format::Config[:java_path],
|
28
|
-
'-cp', jar_file,
|
29
|
-
'SplitPdf',
|
30
|
-
'--file_input', source,
|
31
|
-
'--file_output', target,
|
32
|
-
*options
|
33
|
-
)
|
34
|
-
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
require 'tempfile'
|
2
|
-
require 'csv'
|
3
|
-
require 'fileutils'
|
4
|
-
|
5
|
-
require 'libis/tools/extend/string'
|
6
|
-
require 'libis/tools/logger'
|
7
|
-
require 'libis/tools/command'
|
8
|
-
|
9
|
-
require 'libis/format'
|
10
|
-
|
11
|
-
module Libis
|
12
|
-
module Format
|
13
|
-
|
14
|
-
class PdfToPdfa
|
15
|
-
include ::Libis::Tools::Logger
|
16
|
-
|
17
|
-
def self.run(source, target = nil, options = {})
|
18
|
-
self.new.run source, target, options
|
19
|
-
end
|
20
|
-
|
21
|
-
def run(source, target = nil, options = nil)
|
22
|
-
|
23
|
-
target ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname([File.basename(source, '.*'), '.pdf']))
|
24
|
-
|
25
|
-
icc_info = icc_options(options[:colorspace])
|
26
|
-
|
27
|
-
icc_file = File.join(Dir.tmpdir, "#{icc_info[:icc_name]}#{Random.new.bytes(12).unpack('H*').first}.icc")
|
28
|
-
FileUtils.cp(File.join(Libis::Format::DATA_DIR, "#{icc_info[:icc_name]}.icc"), icc_file)
|
29
|
-
|
30
|
-
def_filename = File.join(Dir.tmpdir, "PDFA_def_#{Random.new.bytes(12).unpack('H*').first}.ps")
|
31
|
-
File.open(def_filename, 'w') do |f|
|
32
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
33
|
-
gsub('[** Fill in ICC profile location **]', icc_file).
|
34
|
-
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
35
|
-
end
|
36
|
-
|
37
|
-
result = Libis::Tools::Command.run(
|
38
|
-
Libis::Format::Config[:ghostscript_path],
|
39
|
-
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
40
|
-
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
41
|
-
"-sProcessColorModel=#{icc_info[:device]}",
|
42
|
-
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
43
|
-
"-sOutputICCProfile=#{icc_file}",
|
44
|
-
'-o', File.absolute_path(target),
|
45
|
-
def_filename,
|
46
|
-
source
|
47
|
-
)
|
48
|
-
|
49
|
-
FileUtils.rm [icc_file, def_filename].compact, force: true
|
50
|
-
|
51
|
-
unless PdfaValidator.run(target)
|
52
|
-
result[:status] = -999
|
53
|
-
result[:err] << 'Failed to validate generated PDF/A file.'
|
54
|
-
end
|
55
|
-
|
56
|
-
result
|
57
|
-
end
|
58
|
-
|
59
|
-
|
60
|
-
private
|
61
|
-
|
62
|
-
def icc_options(colorspace)
|
63
|
-
case colorspace.to_s.downcase
|
64
|
-
when 'cmyk'
|
65
|
-
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
66
|
-
else
|
67
|
-
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
|
3
|
-
require 'libis/tools/extend/string'
|
4
|
-
require 'libis/tools/logger'
|
5
|
-
require 'libis/tools/command'
|
6
|
-
|
7
|
-
require 'libis/format/config'
|
8
|
-
|
9
|
-
module Libis
|
10
|
-
module Format
|
11
|
-
|
12
|
-
class PdfaValidator
|
13
|
-
include ::Libis::Tools::Logger
|
14
|
-
|
15
|
-
def self.run(source)
|
16
|
-
self.new.run source
|
17
|
-
end
|
18
|
-
|
19
|
-
def run(source)
|
20
|
-
|
21
|
-
src_file = File.absolute_path(source)
|
22
|
-
|
23
|
-
if (pdfa = Libis::Format::Config[:pdfa_path])
|
24
|
-
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
25
|
-
previous_wd = Dir.getwd
|
26
|
-
Dir.chdir(Dir.tmpdir)
|
27
|
-
|
28
|
-
result = Libis::Tools::Command.run(
|
29
|
-
pdfa,
|
30
|
-
'--noxml',
|
31
|
-
'--level', 'B',
|
32
|
-
'--verb', '0',
|
33
|
-
src_file
|
34
|
-
)
|
35
|
-
|
36
|
-
Dir.chdir(previous_wd)
|
37
|
-
|
38
|
-
unless result[:out].any? { |line| line =~ /^VLD-\[PASS\]/ }
|
39
|
-
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
40
|
-
result[:out].join("\n")
|
41
|
-
return false
|
42
|
-
end
|
43
|
-
else
|
44
|
-
jar = File.join(Libis::Format::ROOT_DIR, 'tools', 'pdfbox', 'preflight-app-1.8.10.jar')
|
45
|
-
result = Libis::Tools::Command.run(
|
46
|
-
Libis::Format::Config[:java_path],
|
47
|
-
'-jar', jar,
|
48
|
-
src_file
|
49
|
-
)
|
50
|
-
unless result[:status] == 0
|
51
|
-
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
52
|
-
result[:out].join("\n")
|
53
|
-
return false
|
54
|
-
end
|
55
|
-
end
|
56
|
-
true
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
end
|
61
|
-
end
|