libis-format 1.3.7.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -62
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -248
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +73 -109
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -36
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
# froze_string_litteral: true
|
2
|
+
# coding: utf-8
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
|
8
|
+
class Library
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def implementation=(impl)
|
13
|
+
instance.implementation = impl
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_info(format)
|
17
|
+
instance.get_info(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_info_by(key, value)
|
21
|
+
instance.get_info_by(key, value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_infos_by(key, value)
|
25
|
+
instance.get_infos_by(key, value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_field(format, field)
|
29
|
+
instance.get_field(format, field)
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_field_by(key, value, field)
|
33
|
+
instance.get_field_by(key, value, field)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_fields_by(key, value, field)
|
37
|
+
instance.get_fields_by(key, value, field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def known?(key, value)
|
41
|
+
instance.known?(key, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def enrich(info, map_keys = {})
|
45
|
+
instance.enrich(info, map_keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def normalize(info, map_keys = {})
|
49
|
+
instance.normalize(info, map_keys)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def implementation=(impl)
|
54
|
+
@implementation = impl
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_field(format, field)
|
58
|
+
get_field_by(:name, format, field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_field_by(key, value, field)
|
62
|
+
info = get_info_by(key, value)
|
63
|
+
return nil unless info
|
64
|
+
case field
|
65
|
+
when :mimetype
|
66
|
+
info[:mimetypes]&.first
|
67
|
+
when :puid
|
68
|
+
info[:puids]&.first
|
69
|
+
when :extension
|
70
|
+
info[:extensions]&.first
|
71
|
+
else
|
72
|
+
info[field]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_fields_by(key, value, field)
|
77
|
+
get_infos_by(key, value)&.map { |info| info[field] }.compact
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_info(format)
|
81
|
+
get_info_by(:name, format)
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_info_by(key, value)
|
85
|
+
get_infos_by(key, value)&.first
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_infos_by(key, value)
|
89
|
+
result = @implementation.query(key, value)
|
90
|
+
result.map(&:to_hash)
|
91
|
+
end
|
92
|
+
|
93
|
+
def known?(key, value)
|
94
|
+
!get_info_by(key, value).nil?
|
95
|
+
end
|
96
|
+
|
97
|
+
def enrich(info, map_keys = {})
|
98
|
+
info = normalize(info, map_keys)
|
99
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
100
|
+
unless (format = info[mapper[:name]]).nil?
|
101
|
+
lib_info = get_info(format)
|
102
|
+
mapper.keys.each do |key|
|
103
|
+
case key
|
104
|
+
when :mimetype
|
105
|
+
info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
|
106
|
+
when :puid
|
107
|
+
info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
|
108
|
+
when :extension
|
109
|
+
info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
|
110
|
+
else
|
111
|
+
info[mapper[key]] = lib_info[key] if lib_info[key]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
info
|
116
|
+
end
|
117
|
+
|
118
|
+
# Derive name from the available info
|
119
|
+
def normalize(info, map_keys = {})
|
120
|
+
return {} unless info.is_a? Hash
|
121
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
122
|
+
# fill format from looking up by puid
|
123
|
+
unless (puid = info[mapper[:puid]]).blank?
|
124
|
+
info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
|
125
|
+
end
|
126
|
+
# fill format from looking up by mimetype
|
127
|
+
unless (mime = info[mapper[:mimetype]]).blank?
|
128
|
+
info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
|
129
|
+
end
|
130
|
+
# finally complete the information from looking up by format name
|
131
|
+
unless (format = info[mapper[:name]]).nil?
|
132
|
+
info[mapper[:mimetype]] = get_field(format, :mimetype)
|
133
|
+
info[mapper[:category]] = get_field(format, :category)
|
134
|
+
end
|
135
|
+
info
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
@implementation = eval(Libis::Format::Config[:format_library_implementation])
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'singleton'
|
4
2
|
|
5
3
|
require 'tempfile'
|
@@ -7,10 +5,10 @@ require 'csv'
|
|
7
5
|
|
8
6
|
require 'libis/format/config'
|
9
7
|
|
10
|
-
unless CSV::HeaderConverters.
|
11
|
-
CSV::HeaderConverters[:droid_headers] = lambda {
|
12
|
-
h.encode(ConverterEncoding).downcase.strip
|
13
|
-
|
8
|
+
unless CSV::HeaderConverters.has_key?(:droid_headers)
|
9
|
+
CSV::HeaderConverters[:droid_headers] = lambda {|h|
|
10
|
+
h.encode(ConverterEncoding).downcase.strip.
|
11
|
+
gsub(/\W+/, "").to_sym
|
14
12
|
}
|
15
13
|
end
|
16
14
|
|
@@ -19,7 +17,9 @@ require_relative 'identification_tool'
|
|
19
17
|
module Libis
|
20
18
|
module Format
|
21
19
|
module Tool
|
20
|
+
|
22
21
|
class Droid < Libis::Format::Tool::IdentificationTool
|
22
|
+
|
23
23
|
def run_list(filelist, _options = {})
|
24
24
|
runner(filelist)
|
25
25
|
end
|
@@ -43,13 +43,12 @@ module Libis
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def parse_report(report)
|
46
|
-
keys =
|
47
|
-
|
48
|
-
|
49
|
-
]
|
46
|
+
keys = [
|
47
|
+
:id, :parent_id, :uri, :filepath, :filename, :matchtype, :status, :filesize, :type, :extension,
|
48
|
+
:mod_time, :ext_mismatch, :hash, :format_count, :puid, :mimetype, :format_name, :format_version]
|
50
49
|
result = CSV.parse(File.readlines(report).join)
|
51
|
-
|
52
|
-
|
50
|
+
.map {|a| Hash[keys.zip(a)]}
|
51
|
+
.select {|a| a[:type] == 'File'}
|
53
52
|
# File.delete report
|
54
53
|
result.each do |r|
|
55
54
|
r.delete(:id)
|
@@ -71,39 +70,39 @@ module Libis
|
|
71
70
|
|
72
71
|
def create_report(profile, report)
|
73
72
|
args = [
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
'-e', report,
|
74
|
+
'-p', profile,
|
75
|
+
'-q'
|
77
76
|
]
|
78
77
|
timeout = Libis::Format::Config[:timeouts][:droid]
|
79
78
|
result = Libis::Tools::Command.run(
|
80
|
-
|
81
|
-
|
82
|
-
|
79
|
+
Libis::Format::Config[:droid_cmd], *args,
|
80
|
+
timeout: timeout,
|
81
|
+
kill_after: timeout * 2
|
83
82
|
)
|
84
|
-
result[:err].select! {
|
85
|
-
raise "#{self.class} report took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
86
|
-
raise "#{self.class} report errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
83
|
+
result[:err].select! {|x| x =~ /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} ERROR /}
|
84
|
+
raise RuntimeError, "#{self.class} report took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
85
|
+
raise RuntimeError, "#{self.class} report errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
87
86
|
|
88
87
|
File.delete profile
|
89
88
|
end
|
90
89
|
|
91
90
|
def create_profile(file_or_list, profile, recursive = false)
|
92
91
|
args = []
|
93
|
-
files = file_or_list.is_a?(Array) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
|
94
|
-
files.each {
|
92
|
+
files = (file_or_list.is_a?(Array)) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
|
93
|
+
files.each {|file| args << '-a' << file}
|
95
94
|
args << '-q'
|
96
95
|
args << '-p' << profile
|
97
96
|
args << '-R' if recursive
|
98
97
|
timeout = Libis::Format::Config[:timeouts][:droid]
|
99
98
|
result = Libis::Tools::Command.run(
|
100
|
-
|
101
|
-
|
102
|
-
|
99
|
+
Libis::Format::Config[:droid_cmd], *args,
|
100
|
+
timeout: timeout,
|
101
|
+
kill_after: timeout * 2
|
103
102
|
)
|
104
|
-
result[:err].select! {
|
105
|
-
raise "#{self.class} profile took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
106
|
-
raise "#{self.class} profile errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
103
|
+
result[:err].select! {|x| x =~ /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} ERROR /}
|
104
|
+
raise RuntimeError, "#{self.class} profile took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
105
|
+
raise RuntimeError, "#{self.class} profile errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
107
106
|
end
|
108
107
|
|
109
108
|
def profile_file_name
|
@@ -113,7 +112,9 @@ module Libis
|
|
113
112
|
def result_file_name
|
114
113
|
Tools::TempFile.name('droid', '.csv')
|
115
114
|
end
|
115
|
+
|
116
116
|
end
|
117
|
+
|
117
118
|
end
|
118
119
|
end
|
119
120
|
end
|
@@ -1,56 +1,58 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'identification_tool'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
class ExtensionIdentification < Libis::Format::Tool::IdentificationTool
|
9
|
-
|
8
|
+
|
9
|
+
def run_list(filelist, _options = {})
|
10
|
+
|
10
11
|
output = runner(nil, filelist)
|
11
12
|
|
12
13
|
process_output(output)
|
14
|
+
|
13
15
|
end
|
14
16
|
|
15
|
-
def run_dir(dir, recursive = true,
|
17
|
+
def run_dir(dir, recursive = true, _options = {})
|
18
|
+
|
16
19
|
filelist = find_files(dir, recursive)
|
17
20
|
|
18
21
|
output = runner(nil, filelist)
|
19
22
|
|
20
23
|
process_output(output)
|
24
|
+
|
21
25
|
end
|
22
26
|
|
23
|
-
def run(file,
|
27
|
+
def run(file, _options)
|
28
|
+
|
24
29
|
output = runner(file)
|
25
30
|
|
26
31
|
process_output(output)
|
32
|
+
|
27
33
|
end
|
28
34
|
|
29
35
|
protected
|
30
36
|
|
31
37
|
def runner(*args)
|
38
|
+
|
32
39
|
args.map do |file|
|
33
|
-
info = ::Libis::Format::
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
puid: begin
|
44
|
-
info[:PUID].first
|
45
|
-
rescue StandardError
|
46
|
-
nil
|
47
|
-
end,
|
48
|
-
matchtype: 'extension',
|
49
|
-
tool: :type_database
|
50
|
-
}
|
40
|
+
info = ::Libis::Format::Library.get_info_by(:extension, File.extname(file))
|
41
|
+
if info
|
42
|
+
{
|
43
|
+
filepath: file,
|
44
|
+
mimetype: (info[:mimetypes].first rescue nil),
|
45
|
+
puid: (info[:puids].first rescue nil),
|
46
|
+
matchtype: 'extension',
|
47
|
+
tool: :format_library
|
48
|
+
}
|
49
|
+
end
|
51
50
|
end.cleanup
|
51
|
+
|
52
52
|
end
|
53
|
+
|
53
54
|
end
|
55
|
+
|
54
56
|
end
|
55
57
|
end
|
56
|
-
end
|
58
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/tools/extend/string'
|
4
2
|
require 'libis/tools/extend/empty'
|
5
3
|
require 'libis/tools/command'
|
@@ -10,17 +8,13 @@ require 'libis/format/config'
|
|
10
8
|
module Libis
|
11
9
|
module Format
|
12
10
|
module Tool
|
11
|
+
|
13
12
|
class FFMpeg
|
14
13
|
include Singleton
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ffmpeg_cmd], '-h')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, options = {})
|
23
|
-
instance.run source, target, options
|
17
|
+
self.instance.run source, target, options
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, options = {})
|
@@ -34,22 +28,21 @@ module Libis
|
|
34
28
|
|
35
29
|
timeout = Libis::Format::Config[:timeouts][:ffmpeg]
|
36
30
|
result = Libis::Tools::Command.run(
|
37
|
-
|
38
|
-
|
39
|
-
|
31
|
+
Libis::Format::Config[:ffmpeg_cmd], *opts,
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
40
34
|
)
|
41
35
|
|
42
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
43
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
44
38
|
|
45
39
|
warn "FFMpeg warnings: #{(result[:err] + result[:out]).join("\n")}" unless result[:err].empty?
|
46
40
|
|
47
|
-
|
48
|
-
command: result,
|
49
|
-
files: [target]
|
50
|
-
}
|
41
|
+
result[:out]
|
51
42
|
end
|
43
|
+
|
52
44
|
end
|
45
|
+
|
53
46
|
end
|
54
47
|
end
|
55
48
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/tools/extend/string'
|
4
2
|
require 'libis/tools/command'
|
5
3
|
|
@@ -11,33 +9,36 @@ require_relative 'identification_tool'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class Fido < Libis::Format::Tool::IdentificationTool
|
14
|
+
|
15
15
|
def self.add_formats(formats_file)
|
16
|
-
instance.formats << formats_file unless instance.formats.include?(formats_file)
|
16
|
+
self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.del_formats(formats_file)
|
20
|
-
instance.formats.delete(formats_file)
|
20
|
+
self.instance.formats.delete(formats_file)
|
21
21
|
end
|
22
22
|
|
23
23
|
attr_reader :formats
|
24
24
|
|
25
|
-
def run_list(filelist,
|
25
|
+
def run_list(filelist, options = {})
|
26
26
|
create_list_file(filelist) do |list_file|
|
27
|
-
output = runner(nil, '-input', list_file.escape_for_string,
|
27
|
+
output = runner(nil, '-input', list_file.escape_for_string, options)
|
28
28
|
process_output(output)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
-
def run_dir(dir, recursive = true,
|
32
|
+
def run_dir(dir, recursive = true, options = {})
|
33
33
|
args = []
|
34
34
|
args << '-recurse' if recursive
|
35
|
-
|
35
|
+
args << options
|
36
|
+
output = runner(dir, *args)
|
36
37
|
process_output(output)
|
37
38
|
end
|
38
39
|
|
39
|
-
def run(file,
|
40
|
-
output = runner(file,
|
40
|
+
def run(file, options = {})
|
41
|
+
output = runner(file, options)
|
41
42
|
process_output(output)
|
42
43
|
end
|
43
44
|
|
@@ -52,9 +53,11 @@ module Libis
|
|
52
53
|
|
53
54
|
attr_writer :formats
|
54
55
|
|
55
|
-
def runner(filename, *args
|
56
|
-
|
57
|
-
|
56
|
+
def runner(filename, *args)
|
57
|
+
options = {}
|
58
|
+
options = args.pop if args.last.is_a?(Hash)
|
59
|
+
# Load custome format definitions if present
|
60
|
+
args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
|
58
61
|
|
59
62
|
# Workaround for Fido performance bug
|
60
63
|
args << '-bufsize' << (options[:bufsize] || 1000).to_s
|
@@ -65,7 +68,7 @@ module Libis
|
|
65
68
|
args << '-nocontainer' if options[:nocontainer]
|
66
69
|
|
67
70
|
# Add filename to argument list (optional)
|
68
|
-
args << filename.escape_for_string
|
71
|
+
args << "#{filename.escape_for_string}" if filename
|
69
72
|
|
70
73
|
# No header output
|
71
74
|
args << '-q'
|
@@ -73,20 +76,20 @@ module Libis
|
|
73
76
|
# Run command and capture results
|
74
77
|
timeout = Libis::Format::Config[:timeouts][:fido]
|
75
78
|
result = ::Libis::Tools::Command.run(
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
+
Libis::Format::Config[:fido_cmd], *args,
|
80
|
+
timeout: timeout,
|
81
|
+
kill_after: timeout * 2
|
79
82
|
)
|
80
83
|
|
81
84
|
# Log warning if needed
|
82
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
83
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
85
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
86
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
84
87
|
|
85
88
|
# Parse output (CSV) text into array and return result
|
86
|
-
keys =
|
89
|
+
keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
|
87
90
|
data = CSV.parse(result[:out].join("\n"))
|
88
|
-
|
89
|
-
|
91
|
+
.map {|a| Hash[keys.zip(a)]}
|
92
|
+
.select {|a| a[:status] == 'OK'}
|
90
93
|
data.each do |r|
|
91
94
|
r.delete(:time)
|
92
95
|
r.delete(:status)
|
@@ -94,7 +97,9 @@ module Libis
|
|
94
97
|
r[:tool] = :fido
|
95
98
|
end
|
96
99
|
end
|
100
|
+
|
97
101
|
end
|
102
|
+
|
98
103
|
end
|
99
104
|
end
|
100
105
|
end
|
@@ -1,38 +1,49 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'identification_tool'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
class FileTool < Libis::Format::Tool::IdentificationTool
|
9
|
-
|
8
|
+
|
9
|
+
def run_list(filelist, _options = {})
|
10
|
+
|
10
11
|
create_list_file(filelist) do |list_file|
|
12
|
+
|
11
13
|
output = runner(nil, '--files-from', list_file)
|
12
14
|
|
13
15
|
process_output(output)
|
16
|
+
|
14
17
|
end
|
18
|
+
|
15
19
|
end
|
16
20
|
|
17
|
-
def run_dir(dir, recursive = true,
|
21
|
+
def run_dir(dir, recursive = true, _options = {})
|
22
|
+
|
18
23
|
filelist = find_files(dir, recursive)
|
19
24
|
|
20
25
|
create_list_file(filelist) do |list_file|
|
26
|
+
|
21
27
|
output = runner(nil, '--files-from', list_file)
|
22
28
|
|
23
29
|
process_output(output)
|
30
|
+
|
24
31
|
end
|
32
|
+
|
25
33
|
end
|
26
34
|
|
27
|
-
def run(file,
|
35
|
+
def run(file, _options = {})
|
36
|
+
|
28
37
|
output = runner(file)
|
29
38
|
|
30
39
|
process_output(output)
|
40
|
+
|
31
41
|
end
|
32
42
|
|
33
43
|
protected
|
34
44
|
|
35
45
|
def runner(filename, *args)
|
46
|
+
|
36
47
|
# Create new argument list
|
37
48
|
opts = []
|
38
49
|
|
@@ -50,21 +61,23 @@ module Libis
|
|
50
61
|
# Run the UNIX file command and capture the results
|
51
62
|
timeout = Libis::Format::Config[:timeouts][:file_tool]
|
52
63
|
result = ::Libis::Tools::Command.run(
|
53
|
-
|
54
|
-
|
55
|
-
|
64
|
+
'file', *opts,
|
65
|
+
timeout: timeout,
|
66
|
+
kill_after: timeout * 2
|
56
67
|
)
|
57
68
|
|
58
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
59
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
69
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
70
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
60
71
|
|
61
72
|
# Parse output text into array and return result
|
62
73
|
result[:out].map do |line|
|
63
74
|
r = line.split(/:\s+/)
|
64
|
-
{
|
75
|
+
{filepath: r[0], mimetype: r[1], matchtype: 'magic', tool: :file}
|
65
76
|
end
|
66
77
|
end
|
78
|
+
|
67
79
|
end
|
80
|
+
|
68
81
|
end
|
69
82
|
end
|
70
83
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,46 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class FopPdf
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:fop_jar])
|
16
|
+
def self.run(xml, target, options = [])
|
17
|
+
self.new.run xml, target, options
|
22
18
|
end
|
23
19
|
|
24
|
-
def
|
25
|
-
new.run xml, target
|
26
|
-
end
|
20
|
+
def run(xml, target, options = [])
|
27
21
|
|
28
|
-
def run(xml, target)
|
29
22
|
if OS.java?
|
30
23
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
31
24
|
end
|
32
25
|
|
33
26
|
timeout = Libis::Format::Config[:timeouts][:fop]
|
34
27
|
result = Libis::Tools::Command.run(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
kill_after: timeout * 2
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
"-Dfop.home=#{File.dirname(Libis::Format::Config[:fop_jar])}",
|
30
|
+
'-jar', Libis::Format::Config[:fop_jar],
|
31
|
+
'-fo', xml,
|
32
|
+
'-pdf', target,
|
33
|
+
timeout: timeout,
|
34
|
+
kill_after: timeout * 2
|
43
35
|
)
|
44
36
|
|
45
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
37
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
38
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
47
39
|
|
48
|
-
{
|
49
|
-
command: result,
|
50
|
-
files: [target]
|
51
|
-
}
|
52
40
|
end
|
53
41
|
end
|
42
|
+
|
54
43
|
end
|
55
44
|
end
|
56
45
|
end
|