libis-format 1.3.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +40 -153
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +83 -125
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'singleton'
|
4
4
|
require 'pathname'
|
@@ -7,8 +7,9 @@ require 'libis-tools'
|
|
7
7
|
require 'libis/tools/extend/hash'
|
8
8
|
require 'libis/tools/extend/string'
|
9
9
|
require 'libis/tools/extend/empty'
|
10
|
+
require 'nori/core_ext/object'
|
10
11
|
|
11
|
-
require 'libis/format/
|
12
|
+
require 'libis/format/library'
|
12
13
|
|
13
14
|
require_relative 'config'
|
14
15
|
require_relative 'tool/fido'
|
@@ -18,10 +19,13 @@ require_relative 'tool/extension_identification'
|
|
18
19
|
|
19
20
|
module Libis
|
20
21
|
module Format
|
22
|
+
|
21
23
|
class Identifier
|
22
24
|
include ::Libis::Tools::Logger
|
23
25
|
include Singleton
|
24
26
|
|
27
|
+
public
|
28
|
+
|
25
29
|
def self.add_xml_validation(mimetype, xsd_file)
|
26
30
|
instance.xml_validations[mimetype] = xsd_file
|
27
31
|
end
|
@@ -37,28 +41,29 @@ module Libis
|
|
37
41
|
attr_reader :xml_validations
|
38
42
|
|
39
43
|
def get(file, options = {})
|
40
|
-
|
41
|
-
options[:
|
42
|
-
options[:
|
44
|
+
|
45
|
+
options[:droid] = true unless options.keys.include?(:droid) or (options[:tool] and options[:tool] != :droid)
|
46
|
+
options[:fido] = true unless options.keys.include?(:fido) or (options[:tool] and options[:tool] != :fido)
|
47
|
+
options[:file] = true unless options.keys.include?(:file) or (options[:tool] and options[:tool] != :file)
|
43
48
|
options[:xml_validation] = true if options[:xml_validation].nil?
|
44
49
|
|
45
|
-
result = {
|
50
|
+
result = {messages: [], output: {}, formats: {}}
|
46
51
|
|
47
52
|
begin
|
48
53
|
get_droid_identification(file, result, options) if options[:droid]
|
49
|
-
rescue
|
54
|
+
rescue => e
|
50
55
|
log_msg(result, :error, "Error running Droid: #{e.message} @ #{e.backtrace.first}")
|
51
56
|
end
|
52
57
|
|
53
58
|
begin
|
54
59
|
get_fido_identification(file, result, options) if options[:fido]
|
55
|
-
rescue
|
60
|
+
rescue => e
|
56
61
|
log_msg(result, :error, "Error running Fido: #{e.message} @ #{e.backtrace.first}")
|
57
62
|
end
|
58
63
|
|
59
64
|
begin
|
60
65
|
get_file_identification(file, result, options) if options[:file]
|
61
|
-
rescue
|
66
|
+
rescue => e
|
62
67
|
log_msg(result, :error, "Error running File: #{e.message} @ #{e.backtrace.first}")
|
63
68
|
end
|
64
69
|
|
@@ -69,13 +74,14 @@ module Libis
|
|
69
74
|
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
70
75
|
begin
|
71
76
|
validate_against_xml_schema(result, options[:base_dir]) if options[:xml_validation]
|
72
|
-
rescue
|
77
|
+
rescue => e
|
73
78
|
log_msg(result, :error, "Error validating XML files: #{e.message} @ #{e.backtrace.first}")
|
74
79
|
end
|
75
80
|
|
76
81
|
process_results(result, !options[:keep_output])
|
77
82
|
|
78
83
|
result
|
84
|
+
|
79
85
|
end
|
80
86
|
|
81
87
|
protected
|
@@ -91,7 +97,7 @@ module Libis
|
|
91
97
|
end
|
92
98
|
|
93
99
|
def get_fido_identification(file, result, options)
|
94
|
-
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive],
|
100
|
+
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive], options[:fido_options])
|
95
101
|
process_tool_output(output, result, options[:base_dir])
|
96
102
|
output
|
97
103
|
end
|
@@ -117,21 +123,18 @@ module Libis
|
|
117
123
|
end
|
118
124
|
|
119
125
|
def xml_validate(file, file_result, result, base_dir)
|
120
|
-
return unless file_result[:mimetype] =~
|
121
|
-
|
126
|
+
return unless file_result[:mimetype] =~ /^(text|application)\/xml$/
|
122
127
|
filepath = base_dir ? File.join(base_dir, file) : file
|
123
128
|
doc = ::Libis::Tools::XmlDocument.open filepath
|
124
129
|
xml_validations.each do |mime, xsd_file|
|
125
130
|
next unless xsd_file
|
126
|
-
|
127
131
|
begin
|
128
132
|
if doc.validates_against?(xsd_file)
|
129
133
|
log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
|
130
|
-
info = {
|
131
|
-
|
132
|
-
file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
|
134
|
+
info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
|
135
|
+
file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
|
133
136
|
end
|
134
|
-
rescue
|
137
|
+
rescue => e
|
135
138
|
# Do nothing - probably Nokogiri chrashed during validation. Could have many causes
|
136
139
|
# (remote schema (firewall, network, link rot, ...), schema syntax error, corrupt XML,...)
|
137
140
|
# so we log and continue.
|
@@ -139,7 +142,7 @@ module Libis
|
|
139
142
|
"Error during XML validation of file #{file} against #{File.basename(xsd_file)}: #{e.message}")
|
140
143
|
end
|
141
144
|
end
|
142
|
-
rescue
|
145
|
+
rescue => e
|
143
146
|
# Not much we can do. probably Nokogiri chrashed opening the XML file. What caused this?
|
144
147
|
# (XML not parsable, false XML identification, ???)
|
145
148
|
# so we log and continue.
|
@@ -147,30 +150,30 @@ module Libis
|
|
147
150
|
end
|
148
151
|
|
149
152
|
def process_results(result, delete_output = true)
|
150
|
-
result[:output].
|
153
|
+
result[:output].keys.each do |file|
|
151
154
|
output = result[:output][file]
|
152
155
|
file_result = result[:formats][file] = {}
|
153
156
|
if output.empty?
|
154
157
|
log_msg(result, :warn, "Could not identify format of '#{file}'.")
|
155
|
-
file_result
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
158
|
+
file_result = {
|
159
|
+
mimetype: 'application/octet-stream',
|
160
|
+
puid: 'fmt/unknown',
|
161
|
+
score: 0,
|
162
|
+
tool: nil
|
163
|
+
}
|
161
164
|
else
|
162
|
-
format_matches = output.group_by {
|
165
|
+
format_matches = output.group_by {|x| [x[:mimetype], x[:puid]]}
|
163
166
|
format_matches.each do |match, group|
|
164
|
-
format_matches[match] = group.group_by {
|
167
|
+
format_matches[match] = group.group_by {|x| x[:score]}.sort.reverse.to_h
|
165
168
|
end
|
166
169
|
case format_matches.count
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
170
|
+
when 0
|
171
|
+
# No this really cannot happen. If there are no hits, there will be at least a format [nil,nil]
|
172
|
+
when 1
|
173
|
+
# only one match, that's easy. The first of the highest score will be used
|
174
|
+
file_result.merge!(get_best_result(output))
|
175
|
+
else
|
176
|
+
process_multiple_formats(file_result, format_matches, output)
|
174
177
|
end
|
175
178
|
end
|
176
179
|
end
|
@@ -181,13 +184,12 @@ module Libis
|
|
181
184
|
# multiple matches. Let's select the highest score matches
|
182
185
|
file_result.merge!(get_best_result(output))
|
183
186
|
file_result[:alternatives] = []
|
184
|
-
format_matches.
|
187
|
+
format_matches.keys.each do |mime, puid|
|
185
188
|
next if file_result[:mimetype] == mime && puid.nil?
|
186
|
-
|
187
|
-
selection = output.select { |x| x[:mimetype] == mime && x[:puid] == puid }
|
189
|
+
selection = output.select {|x| x[:mimetype] == mime && x[:puid] == puid}
|
188
190
|
file_result[:alternatives] << get_best_result(selection)
|
189
191
|
end
|
190
|
-
file_result[:alternatives] = file_result[:alternatives].sort_by {
|
192
|
+
file_result[:alternatives] = file_result[:alternatives].sort_by {|x| x[:score]}.reverse
|
191
193
|
file_result.delete(:alternatives) if file_result[:alternatives].size <= 1
|
192
194
|
end
|
193
195
|
|
@@ -206,15 +208,15 @@ module Libis
|
|
206
208
|
end
|
207
209
|
|
208
210
|
def get_mimetype(puid)
|
209
|
-
::Libis::Format::
|
210
|
-
rescue StandardError
|
211
|
-
nil
|
211
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
|
212
212
|
end
|
213
213
|
|
214
214
|
def get_best_result(results)
|
215
|
-
score = results.map {
|
216
|
-
results.select {
|
215
|
+
score = results.map {|x| x[:score]}.max
|
216
|
+
results.select {|x| x[:score] == score}.reduce(:apply_defaults)
|
217
217
|
end
|
218
|
+
|
218
219
|
end
|
220
|
+
|
219
221
|
end
|
220
222
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Libis
|
2
|
+
module Format
|
3
|
+
class Info
|
4
|
+
attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
|
5
|
+
|
6
|
+
def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
|
7
|
+
@name = name
|
8
|
+
@category = category
|
9
|
+
@description = description
|
10
|
+
@puids = puids
|
11
|
+
@mimetypes = mimetypes
|
12
|
+
@extensions = extensions
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
{
|
17
|
+
name: name,
|
18
|
+
description: description.dup,
|
19
|
+
category: category,
|
20
|
+
puids: puids.dup,
|
21
|
+
mimetypes: mimetypes.dup,
|
22
|
+
extensions: extensions.dup
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# froze_string_litteral: true
|
2
|
+
# coding: utf-8
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
|
8
|
+
class Library
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def implementation=(impl)
|
13
|
+
instance.implementation = impl
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_info(format)
|
17
|
+
instance.get_info(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_info_by(key, value)
|
21
|
+
instance.get_info_by(key, value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_infos_by(key, value)
|
25
|
+
instance.get_infos_by(key, value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_field(format, field)
|
29
|
+
instance.get_field(format, field)
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_field_by(key, value, field)
|
33
|
+
instance.get_field_by(key, value, field)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_fields_by(key, value, field)
|
37
|
+
instance.get_fields_by(key, value, field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def known?(key, value)
|
41
|
+
instance.known?(key, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def enrich(info, map_keys = {})
|
45
|
+
instance.enrich(info, map_keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def normalize(info, map_keys = {})
|
49
|
+
instance.normalize(info, map_keys)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def implementation=(impl)
|
54
|
+
@implementation = impl
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_field(format, field)
|
58
|
+
get_field_by(:name, format, field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_field_by(key, value, field)
|
62
|
+
info = get_info_by(key, value)
|
63
|
+
return nil unless info
|
64
|
+
case field
|
65
|
+
when :mimetype
|
66
|
+
info[:mimetypes]&.first
|
67
|
+
when :puid
|
68
|
+
info[:puids]&.first
|
69
|
+
when :extension
|
70
|
+
info[:extensions]&.first
|
71
|
+
else
|
72
|
+
info[field]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_fields_by(key, value, field)
|
77
|
+
get_infos_by(key, value)&.map { |info| info[field] }.compact
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_info(format)
|
81
|
+
get_info_by(:name, format)
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_info_by(key, value)
|
85
|
+
get_infos_by(key, value)&.first
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_infos_by(key, value)
|
89
|
+
result = @implementation.query(key, value)
|
90
|
+
result.map(&:to_hash)
|
91
|
+
end
|
92
|
+
|
93
|
+
def known?(key, value)
|
94
|
+
!get_info_by(key, value).nil?
|
95
|
+
end
|
96
|
+
|
97
|
+
def enrich(info, map_keys = {})
|
98
|
+
info = normalize(info, map_keys)
|
99
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
100
|
+
unless (format = info[mapper[:name]]).nil?
|
101
|
+
lib_info = get_info(format)
|
102
|
+
mapper.keys.each do |key|
|
103
|
+
case key
|
104
|
+
when :mimetype
|
105
|
+
info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
|
106
|
+
when :puid
|
107
|
+
info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
|
108
|
+
when :extension
|
109
|
+
info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
|
110
|
+
else
|
111
|
+
info[mapper[key]] = lib_info[key] if lib_info[key]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
info
|
116
|
+
end
|
117
|
+
|
118
|
+
# Derive name from the available info
|
119
|
+
def normalize(info, map_keys = {})
|
120
|
+
return {} unless info.is_a? Hash
|
121
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
122
|
+
# fill format from looking up by puid
|
123
|
+
unless (puid = info[mapper[:puid]]).blank?
|
124
|
+
info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
|
125
|
+
end
|
126
|
+
# fill format from looking up by mimetype
|
127
|
+
unless (mime = info[mapper[:mimetype]]).blank?
|
128
|
+
info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
|
129
|
+
end
|
130
|
+
# finally complete the information from looking up by format name
|
131
|
+
unless (format = info[mapper[:name]]).nil?
|
132
|
+
info[mapper[:mimetype]] = get_field(format, :mimetype)
|
133
|
+
info[mapper[:category]] = get_field(format, :category)
|
134
|
+
end
|
135
|
+
info
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
@implementation = eval(Libis::Format::Config[:format_library_implementation])
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'singleton'
|
4
2
|
|
5
3
|
require 'tempfile'
|
@@ -7,10 +5,10 @@ require 'csv'
|
|
7
5
|
|
8
6
|
require 'libis/format/config'
|
9
7
|
|
10
|
-
unless CSV::HeaderConverters.
|
11
|
-
CSV::HeaderConverters[:droid_headers] = lambda {
|
12
|
-
h.encode(ConverterEncoding).downcase.strip
|
13
|
-
|
8
|
+
unless CSV::HeaderConverters.has_key?(:droid_headers)
|
9
|
+
CSV::HeaderConverters[:droid_headers] = lambda {|h|
|
10
|
+
h.encode(ConverterEncoding).downcase.strip.
|
11
|
+
gsub(/\W+/, "").to_sym
|
14
12
|
}
|
15
13
|
end
|
16
14
|
|
@@ -19,7 +17,9 @@ require_relative 'identification_tool'
|
|
19
17
|
module Libis
|
20
18
|
module Format
|
21
19
|
module Tool
|
20
|
+
|
22
21
|
class Droid < Libis::Format::Tool::IdentificationTool
|
22
|
+
|
23
23
|
def run_list(filelist, _options = {})
|
24
24
|
runner(filelist)
|
25
25
|
end
|
@@ -43,13 +43,12 @@ module Libis
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def parse_report(report)
|
46
|
-
keys =
|
47
|
-
|
48
|
-
|
49
|
-
]
|
46
|
+
keys = [
|
47
|
+
:id, :parent_id, :uri, :filepath, :filename, :matchtype, :status, :filesize, :type, :extension,
|
48
|
+
:mod_time, :ext_mismatch, :hash, :format_count, :puid, :mimetype, :format_name, :format_version]
|
50
49
|
result = CSV.parse(File.readlines(report).join)
|
51
|
-
|
52
|
-
|
50
|
+
.map {|a| Hash[keys.zip(a)]}
|
51
|
+
.select {|a| a[:type] == 'File'}
|
53
52
|
# File.delete report
|
54
53
|
result.each do |r|
|
55
54
|
r.delete(:id)
|
@@ -71,39 +70,39 @@ module Libis
|
|
71
70
|
|
72
71
|
def create_report(profile, report)
|
73
72
|
args = [
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
'-e', report,
|
74
|
+
'-p', profile,
|
75
|
+
'-q'
|
77
76
|
]
|
78
77
|
timeout = Libis::Format::Config[:timeouts][:droid]
|
79
78
|
result = Libis::Tools::Command.run(
|
80
|
-
|
81
|
-
|
82
|
-
|
79
|
+
Libis::Format::Config[:droid_cmd], *args,
|
80
|
+
timeout: timeout,
|
81
|
+
kill_after: timeout * 2
|
83
82
|
)
|
84
|
-
result[:err].select! {
|
85
|
-
raise "#{self.class} report took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
86
|
-
raise "#{self.class} report errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
83
|
+
result[:err].select! {|x| x =~ /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} ERROR /}
|
84
|
+
raise RuntimeError, "#{self.class} report took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
85
|
+
raise RuntimeError, "#{self.class} report errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
87
86
|
|
88
87
|
File.delete profile
|
89
88
|
end
|
90
89
|
|
91
90
|
def create_profile(file_or_list, profile, recursive = false)
|
92
91
|
args = []
|
93
|
-
files = file_or_list.is_a?(Array) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
|
94
|
-
files.each {
|
92
|
+
files = (file_or_list.is_a?(Array)) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
|
93
|
+
files.each {|file| args << '-a' << file}
|
95
94
|
args << '-q'
|
96
95
|
args << '-p' << profile
|
97
96
|
args << '-R' if recursive
|
98
97
|
timeout = Libis::Format::Config[:timeouts][:droid]
|
99
98
|
result = Libis::Tools::Command.run(
|
100
|
-
|
101
|
-
|
102
|
-
|
99
|
+
Libis::Format::Config[:droid_cmd], *args,
|
100
|
+
timeout: timeout,
|
101
|
+
kill_after: timeout * 2
|
103
102
|
)
|
104
|
-
result[:err].select! {
|
105
|
-
raise "#{self.class} profile took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
106
|
-
raise "#{self.class} profile errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
103
|
+
result[:err].select! {|x| x =~ /^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} ERROR /}
|
104
|
+
raise RuntimeError, "#{self.class} profile took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
105
|
+
raise RuntimeError, "#{self.class} profile errors: #{result[:err].join("\n")}" unless result[:err].empty?
|
107
106
|
end
|
108
107
|
|
109
108
|
def profile_file_name
|
@@ -113,7 +112,9 @@ module Libis
|
|
113
112
|
def result_file_name
|
114
113
|
Tools::TempFile.name('droid', '.csv')
|
115
114
|
end
|
115
|
+
|
116
116
|
end
|
117
|
+
|
117
118
|
end
|
118
119
|
end
|
119
120
|
end
|
@@ -1,56 +1,58 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'identification_tool'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
class ExtensionIdentification < Libis::Format::Tool::IdentificationTool
|
9
|
-
|
8
|
+
|
9
|
+
def run_list(filelist, _options = {})
|
10
|
+
|
10
11
|
output = runner(nil, filelist)
|
11
12
|
|
12
13
|
process_output(output)
|
14
|
+
|
13
15
|
end
|
14
16
|
|
15
|
-
def run_dir(dir, recursive = true,
|
17
|
+
def run_dir(dir, recursive = true, _options = {})
|
18
|
+
|
16
19
|
filelist = find_files(dir, recursive)
|
17
20
|
|
18
21
|
output = runner(nil, filelist)
|
19
22
|
|
20
23
|
process_output(output)
|
24
|
+
|
21
25
|
end
|
22
26
|
|
23
|
-
def run(file,
|
27
|
+
def run(file, _options)
|
28
|
+
|
24
29
|
output = runner(file)
|
25
30
|
|
26
31
|
process_output(output)
|
32
|
+
|
27
33
|
end
|
28
34
|
|
29
35
|
protected
|
30
36
|
|
31
37
|
def runner(*args)
|
38
|
+
|
32
39
|
args.map do |file|
|
33
|
-
info = ::Libis::Format::
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
puid: begin
|
44
|
-
info[:PUID].first
|
45
|
-
rescue StandardError
|
46
|
-
nil
|
47
|
-
end,
|
48
|
-
matchtype: 'extension',
|
49
|
-
tool: :type_database
|
50
|
-
}
|
40
|
+
info = ::Libis::Format::Library.get_info_by(:extension, File.extname(file))
|
41
|
+
if info
|
42
|
+
{
|
43
|
+
filepath: file,
|
44
|
+
mimetype: (info[:mimetypes].first rescue nil),
|
45
|
+
puid: (info[:puids].first rescue nil),
|
46
|
+
matchtype: 'extension',
|
47
|
+
tool: :format_library
|
48
|
+
}
|
49
|
+
end
|
51
50
|
end.cleanup
|
51
|
+
|
52
52
|
end
|
53
|
+
|
53
54
|
end
|
55
|
+
|
54
56
|
end
|
55
57
|
end
|
56
|
-
end
|
58
|
+
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/tools/extend/string'
|
4
2
|
require 'libis/tools/extend/empty'
|
5
3
|
require 'libis/tools/command'
|
@@ -10,17 +8,13 @@ require 'libis/format/config'
|
|
10
8
|
module Libis
|
11
9
|
module Format
|
12
10
|
module Tool
|
11
|
+
|
13
12
|
class FFMpeg
|
14
13
|
include Singleton
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ffmpeg_cmd], '-h')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, options = {})
|
23
|
-
instance.run source, target, options
|
17
|
+
self.instance.run source, target, options
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, options = {})
|
@@ -34,22 +28,21 @@ module Libis
|
|
34
28
|
|
35
29
|
timeout = Libis::Format::Config[:timeouts][:ffmpeg]
|
36
30
|
result = Libis::Tools::Command.run(
|
37
|
-
|
38
|
-
|
39
|
-
|
31
|
+
Libis::Format::Config[:ffmpeg_cmd], *opts,
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
40
34
|
)
|
41
35
|
|
42
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
43
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
44
38
|
|
45
39
|
warn "FFMpeg warnings: #{(result[:err] + result[:out]).join("\n")}" unless result[:err].empty?
|
46
40
|
|
47
|
-
|
48
|
-
command: result,
|
49
|
-
files: [target]
|
50
|
-
}
|
41
|
+
result[:out]
|
51
42
|
end
|
43
|
+
|
52
44
|
end
|
45
|
+
|
53
46
|
end
|
54
47
|
end
|
55
48
|
end
|