libis-format 1.3.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -250
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,26 +1,29 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'base'
|
4
|
-
require 'libis/format/tool/
|
2
|
+
require 'libis/format/tool/ffmpeg'
|
5
3
|
|
6
4
|
require 'fileutils'
|
7
5
|
|
8
6
|
module Libis
|
9
7
|
module Format
|
10
8
|
module Converter
|
9
|
+
|
11
10
|
class VideoConverter < Libis::Format::Converter::Base
|
11
|
+
|
12
12
|
def self.input_types
|
13
|
-
|
13
|
+
[:WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.output_types(format = nil)
|
17
|
-
return [] unless input_types.include?(format)
|
17
|
+
return [] unless input_types.include?(format) if format
|
18
|
+
[:GIF, :WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
|
19
|
+
end
|
18
20
|
|
19
|
-
|
21
|
+
def initialize
|
22
|
+
super
|
20
23
|
end
|
21
24
|
|
22
|
-
def quiet(
|
23
|
-
@flags[:quiet] = !!
|
25
|
+
def quiet(v)
|
26
|
+
@flags[:quiet] = !!v
|
24
27
|
end
|
25
28
|
|
26
29
|
def format(format)
|
@@ -123,10 +126,10 @@ module Libis
|
|
123
126
|
|
124
127
|
# @param [Boolean] value If set to true automatically selects optimal format for web viewing. Default: false
|
125
128
|
def web_stream(value)
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
129
|
+
if value
|
130
|
+
@options[:video_codec] = 'h264'
|
131
|
+
@options[:audio_codec] = 'acc'
|
132
|
+
end
|
130
133
|
end
|
131
134
|
|
132
135
|
# @param [String] name name of a preset. See FFMpeg documentation for more info
|
@@ -167,7 +170,7 @@ module Libis
|
|
167
170
|
|
168
171
|
elsif File.directory?(source)
|
169
172
|
|
170
|
-
sources = Dir[File.join(source, '**', '*')].reject {
|
173
|
+
sources = Dir[File.join(source, '**', '*')].reject {|p| File.directory? p}
|
171
174
|
assemble_and_convert(sources, target)
|
172
175
|
|
173
176
|
else
|
@@ -176,17 +179,15 @@ module Libis
|
|
176
179
|
|
177
180
|
end
|
178
181
|
|
179
|
-
|
180
|
-
|
181
|
-
converter: self.class.name
|
182
|
-
}
|
182
|
+
target
|
183
|
+
|
183
184
|
end
|
184
185
|
|
185
186
|
def assemble_and_convert(sources, target)
|
186
|
-
Tempfile.create(%w
|
187
|
-
sources.each {
|
187
|
+
Tempfile.create(%w(list .txt)) do |f|
|
188
|
+
sources.each {|src| f.puts src}
|
188
189
|
opts[:global] ||= []
|
189
|
-
opts[:global] += %w
|
190
|
+
opts[:global] += %w(-f concat)
|
190
191
|
f.close
|
191
192
|
target = convert_file(f.to_path, target)
|
192
193
|
end
|
@@ -197,15 +198,15 @@ module Libis
|
|
197
198
|
|
198
199
|
def convert_file(source, target)
|
199
200
|
# FLV special: only supports aac and speex audio codecs
|
200
|
-
format = (@options[:format] || File.extname(target)[1
|
201
|
-
@options[:audio_codec] ||= 'aac' if %w
|
201
|
+
format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
|
202
|
+
@options[:audio_codec] ||= 'aac' if %w'flv'.include?(format)
|
202
203
|
|
203
204
|
# SWF special: only supports mp3 audio codec
|
204
|
-
format = (@options[:format] || File.extname(target)[1
|
205
|
-
@options[:audio_codec] ||= 'mp3' if %w
|
205
|
+
format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
|
206
|
+
@options[:audio_codec] ||= 'mp3' if %w'swf'.include?(format)
|
206
207
|
|
207
208
|
# Set up FFMpeg command line parameters
|
208
|
-
opts = {
|
209
|
+
opts = {global: [], input: [], filter: [], output: []}
|
209
210
|
opts[:global] << '-hide_banner'
|
210
211
|
opts[:global] << '-loglevel' << (@options[:quiet] ? 'fatal' : 'warning')
|
211
212
|
|
@@ -213,21 +214,29 @@ module Libis
|
|
213
214
|
@options[:watermark_opacity] ||= 0.5
|
214
215
|
if @options[:watermark_image]
|
215
216
|
opts[:filter] << '-i' << @options[:watermark_image] << '-filter_complex'
|
216
|
-
opts[:filter] <<
|
217
|
-
|
217
|
+
opts[:filter] << "[1:v]format=argb,colorchannelmixer=aa=%f[wm];[0:v][wm]overlay=%s" %
|
218
|
+
[@options[:watermark_opacity], watermark_position_text]
|
218
219
|
elsif @options[:watermark_text]
|
219
220
|
@options[:watermark_text_size] ||= 10
|
220
221
|
@options[:watermark_text_color] ||= 'white'
|
221
222
|
@options[:watermark_text_shadow_color] ||= 'black'
|
222
223
|
@options[:watermark_text_shadow_offset] ||= 1
|
223
|
-
filter_text =
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
224
|
+
filter_text = "drawtext=text='%s':%s:fontfile=%s:fontsize=%d:fontcolor=%s@%f" %
|
225
|
+
[
|
226
|
+
@options[:watermark_text],
|
227
|
+
watermark_position_text(true),
|
228
|
+
Config[:watermark_font],
|
229
|
+
@options[:watermark_text_size],
|
230
|
+
@options[:watermark_text_color],
|
231
|
+
@options[:watermark_opacity]
|
232
|
+
]
|
233
|
+
filter_text += ':shadowcolor=%s@%f:shadowx=%d:shadowy=%d' %
|
234
|
+
[
|
235
|
+
@options[:watermark_text_shadow_color],
|
236
|
+
@options[:watermark_opacity],
|
237
|
+
@options[:watermark_text_shadow_offset],
|
238
|
+
@options[:watermark_text_shadow_offset]
|
239
|
+
] if @options[:watermark_text_shadow_offset] > 0
|
231
240
|
opts[:filter] << '-vf' << filter_text
|
232
241
|
end
|
233
242
|
opts[:output] << '-ac' << @options[:audio_channels] if @options[:audio_channels]
|
@@ -239,7 +248,7 @@ module Libis
|
|
239
248
|
opts[:output] << '-map_metadata:g' << '0:g' # Copy global metadata
|
240
249
|
opts[:output] << '-map_metadata:s:a' << '0:s:a' # Copy audio metadata
|
241
250
|
opts[:output] << '-map_metadata:s:v' << '0:s:v' # Copy video metadata
|
242
|
-
opts[:input] << '-accurate_seek' << (@options[:start].to_i
|
251
|
+
opts[:input] << '-accurate_seek' << (@options[:start].to_i < 0 ? '-sseof' : '-ss') << @options[:start] if @options[:start]
|
243
252
|
opts[:input] << '-t' << @options[:duration] if @options[:duration]
|
244
253
|
opts[:output] << '-qscale' << @options[:video_quality] if @options[:video_quality]
|
245
254
|
opts[:output] << '-q:a' << @options[:audio_quality] if @options[:audio_quality]
|
@@ -265,19 +274,21 @@ module Libis
|
|
265
274
|
w = for_text ? 'tw' : 'w'
|
266
275
|
h = for_text ? 'th' : 'h'
|
267
276
|
case @options[:watermark_position]
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
277
|
+
when 'bottom_left'
|
278
|
+
"x=#{margin}:y=H-#{h}-#{margin}"
|
279
|
+
when 'top_left'
|
280
|
+
"x=#{margin}:y=#{margin}"
|
281
|
+
when 'bottom_right'
|
282
|
+
"x=W-#{w}-#{margin}:y=H-#{h}-#{margin}"
|
283
|
+
when 'top_right'
|
284
|
+
"x=W-#{w}-#{margin}:y=#{margin}"
|
285
|
+
else
|
286
|
+
"x=#{margin}:y=H-#{h}-#{margin}"
|
278
287
|
end
|
279
288
|
end
|
289
|
+
|
280
290
|
end
|
291
|
+
|
281
292
|
end
|
282
293
|
end
|
283
|
-
end
|
294
|
+
end
|
@@ -1,19 +1,18 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'base'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Converter
|
6
|
+
|
8
7
|
class XsltConverter < Libis::Format::Converter::Base
|
8
|
+
|
9
9
|
def self.input_types
|
10
10
|
[:XML]
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.output_types(format = nil)
|
14
|
-
return []
|
15
|
-
|
16
|
-
%i[XML HTML TXT]
|
14
|
+
return [] unless input_types.include?(format) if format
|
15
|
+
[:XML, :HTML, :TXT]
|
17
16
|
end
|
18
17
|
|
19
18
|
def xsl_file(file_path)
|
@@ -35,12 +34,12 @@ module Libis
|
|
35
34
|
|
36
35
|
FileUtils.mkpath(File.dirname(target))
|
37
36
|
|
38
|
-
if RUBY_PLATFORM ==
|
37
|
+
if RUBY_PLATFORM == "java"
|
39
38
|
require 'saxon-xslt'
|
40
39
|
xsl = Saxon.XSLT(File.open(@options[:xsl_file]))
|
41
40
|
xml = Saxon.XML(File.open(source))
|
42
41
|
result = xsl.transform(xml)
|
43
|
-
File.
|
42
|
+
File.open(target, 'w') {|f| f.write(result.to_s)}
|
44
43
|
else
|
45
44
|
require 'nokogiri'
|
46
45
|
|
@@ -87,17 +86,16 @@ module Libis
|
|
87
86
|
error "Error transforming '#{source}' with '#{file}': #{e.message} @ #{e.backtrace[0]}"
|
88
87
|
return nil
|
89
88
|
ensure
|
90
|
-
fp.close unless fp.nil?
|
89
|
+
fp.close unless fp.nil? or fp.closed?
|
91
90
|
end
|
92
91
|
|
93
|
-
|
94
|
-
command: { status: 0 },
|
95
|
-
files: [target]
|
96
|
-
}
|
97
|
-
|
92
|
+
target
|
98
93
|
end
|
94
|
+
|
99
95
|
end
|
96
|
+
|
100
97
|
end
|
98
|
+
|
101
99
|
end
|
102
100
|
end
|
103
101
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'singleton'
|
4
4
|
require 'pathname'
|
@@ -7,8 +7,9 @@ require 'libis-tools'
|
|
7
7
|
require 'libis/tools/extend/hash'
|
8
8
|
require 'libis/tools/extend/string'
|
9
9
|
require 'libis/tools/extend/empty'
|
10
|
+
require 'nori/core_ext/object'
|
10
11
|
|
11
|
-
require 'libis/format/
|
12
|
+
require 'libis/format/library'
|
12
13
|
|
13
14
|
require_relative 'config'
|
14
15
|
require_relative 'tool/fido'
|
@@ -18,10 +19,13 @@ require_relative 'tool/extension_identification'
|
|
18
19
|
|
19
20
|
module Libis
|
20
21
|
module Format
|
22
|
+
|
21
23
|
class Identifier
|
22
24
|
include ::Libis::Tools::Logger
|
23
25
|
include Singleton
|
24
26
|
|
27
|
+
public
|
28
|
+
|
25
29
|
def self.add_xml_validation(mimetype, xsd_file)
|
26
30
|
instance.xml_validations[mimetype] = xsd_file
|
27
31
|
end
|
@@ -37,28 +41,29 @@ module Libis
|
|
37
41
|
attr_reader :xml_validations
|
38
42
|
|
39
43
|
def get(file, options = {})
|
40
|
-
|
41
|
-
options[:
|
42
|
-
options[:
|
44
|
+
|
45
|
+
options[:droid] = true unless options.keys.include?(:droid) or (options[:tool] and options[:tool] != :droid)
|
46
|
+
options[:fido] = true unless options.keys.include?(:fido) or (options[:tool] and options[:tool] != :fido)
|
47
|
+
options[:file] = true unless options.keys.include?(:file) or (options[:tool] and options[:tool] != :file)
|
43
48
|
options[:xml_validation] = true if options[:xml_validation].nil?
|
44
49
|
|
45
|
-
result = {
|
50
|
+
result = {messages: [], output: {}, formats: {}}
|
46
51
|
|
47
52
|
begin
|
48
53
|
get_droid_identification(file, result, options) if options[:droid]
|
49
|
-
rescue
|
54
|
+
rescue => e
|
50
55
|
log_msg(result, :error, "Error running Droid: #{e.message} @ #{e.backtrace.first}")
|
51
56
|
end
|
52
57
|
|
53
58
|
begin
|
54
59
|
get_fido_identification(file, result, options) if options[:fido]
|
55
|
-
rescue
|
60
|
+
rescue => e
|
56
61
|
log_msg(result, :error, "Error running Fido: #{e.message} @ #{e.backtrace.first}")
|
57
62
|
end
|
58
63
|
|
59
64
|
begin
|
60
65
|
get_file_identification(file, result, options) if options[:file]
|
61
|
-
rescue
|
66
|
+
rescue => e
|
62
67
|
log_msg(result, :error, "Error running File: #{e.message} @ #{e.backtrace.first}")
|
63
68
|
end
|
64
69
|
|
@@ -69,13 +74,14 @@ module Libis
|
|
69
74
|
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
70
75
|
begin
|
71
76
|
validate_against_xml_schema(result, options[:base_dir]) if options[:xml_validation]
|
72
|
-
rescue
|
77
|
+
rescue => e
|
73
78
|
log_msg(result, :error, "Error validating XML files: #{e.message} @ #{e.backtrace.first}")
|
74
79
|
end
|
75
80
|
|
76
81
|
process_results(result, !options[:keep_output])
|
77
82
|
|
78
83
|
result
|
84
|
+
|
79
85
|
end
|
80
86
|
|
81
87
|
protected
|
@@ -91,7 +97,7 @@ module Libis
|
|
91
97
|
end
|
92
98
|
|
93
99
|
def get_fido_identification(file, result, options)
|
94
|
-
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive],
|
100
|
+
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive], options[:fido_options])
|
95
101
|
process_tool_output(output, result, options[:base_dir])
|
96
102
|
output
|
97
103
|
end
|
@@ -117,21 +123,18 @@ module Libis
|
|
117
123
|
end
|
118
124
|
|
119
125
|
def xml_validate(file, file_result, result, base_dir)
|
120
|
-
return unless file_result[:mimetype] =~
|
121
|
-
|
126
|
+
return unless file_result[:mimetype] =~ /^(text|application)\/xml$/
|
122
127
|
filepath = base_dir ? File.join(base_dir, file) : file
|
123
128
|
doc = ::Libis::Tools::XmlDocument.open filepath
|
124
129
|
xml_validations.each do |mime, xsd_file|
|
125
130
|
next unless xsd_file
|
126
|
-
|
127
131
|
begin
|
128
132
|
if doc.validates_against?(xsd_file)
|
129
133
|
log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
|
130
|
-
info = {
|
131
|
-
|
132
|
-
file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
|
134
|
+
info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
|
135
|
+
file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
|
133
136
|
end
|
134
|
-
rescue
|
137
|
+
rescue => e
|
135
138
|
# Do nothing - probably Nokogiri chrashed during validation. Could have many causes
|
136
139
|
# (remote schema (firewall, network, link rot, ...), schema syntax error, corrupt XML,...)
|
137
140
|
# so we log and continue.
|
@@ -139,7 +142,7 @@ module Libis
|
|
139
142
|
"Error during XML validation of file #{file} against #{File.basename(xsd_file)}: #{e.message}")
|
140
143
|
end
|
141
144
|
end
|
142
|
-
rescue
|
145
|
+
rescue => e
|
143
146
|
# Not much we can do. probably Nokogiri chrashed opening the XML file. What caused this?
|
144
147
|
# (XML not parsable, false XML identification, ???)
|
145
148
|
# so we log and continue.
|
@@ -147,30 +150,30 @@ module Libis
|
|
147
150
|
end
|
148
151
|
|
149
152
|
def process_results(result, delete_output = true)
|
150
|
-
result[:output].
|
153
|
+
result[:output].keys.each do |file|
|
151
154
|
output = result[:output][file]
|
152
155
|
file_result = result[:formats][file] = {}
|
153
156
|
if output.empty?
|
154
157
|
log_msg(result, :warn, "Could not identify format of '#{file}'.")
|
155
|
-
file_result
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
158
|
+
file_result = {
|
159
|
+
mimetype: 'application/octet-stream',
|
160
|
+
puid: 'fmt/unknown',
|
161
|
+
score: 0,
|
162
|
+
tool: nil
|
163
|
+
}
|
161
164
|
else
|
162
|
-
format_matches = output.group_by {
|
165
|
+
format_matches = output.group_by {|x| [x[:mimetype], x[:puid]]}
|
163
166
|
format_matches.each do |match, group|
|
164
|
-
format_matches[match] = group.group_by {
|
167
|
+
format_matches[match] = group.group_by {|x| x[:score]}.sort.reverse.to_h
|
165
168
|
end
|
166
169
|
case format_matches.count
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
170
|
+
when 0
|
171
|
+
# No this really cannot happen. If there are no hits, there will be at least a format [nil,nil]
|
172
|
+
when 1
|
173
|
+
# only one match, that's easy. The first of the highest score will be used
|
174
|
+
file_result.merge!(get_best_result(output))
|
175
|
+
else
|
176
|
+
process_multiple_formats(file_result, format_matches, output)
|
174
177
|
end
|
175
178
|
end
|
176
179
|
end
|
@@ -181,13 +184,12 @@ module Libis
|
|
181
184
|
# multiple matches. Let's select the highest score matches
|
182
185
|
file_result.merge!(get_best_result(output))
|
183
186
|
file_result[:alternatives] = []
|
184
|
-
format_matches.
|
187
|
+
format_matches.keys.each do |mime, puid|
|
185
188
|
next if file_result[:mimetype] == mime && puid.nil?
|
186
|
-
|
187
|
-
selection = output.select { |x| x[:mimetype] == mime && x[:puid] == puid }
|
189
|
+
selection = output.select {|x| x[:mimetype] == mime && x[:puid] == puid}
|
188
190
|
file_result[:alternatives] << get_best_result(selection)
|
189
191
|
end
|
190
|
-
file_result[:alternatives] = file_result[:alternatives].sort_by {
|
192
|
+
file_result[:alternatives] = file_result[:alternatives].sort_by {|x| x[:score]}.reverse
|
191
193
|
file_result.delete(:alternatives) if file_result[:alternatives].size <= 1
|
192
194
|
end
|
193
195
|
|
@@ -206,15 +208,15 @@ module Libis
|
|
206
208
|
end
|
207
209
|
|
208
210
|
def get_mimetype(puid)
|
209
|
-
::Libis::Format::
|
210
|
-
rescue StandardError
|
211
|
-
nil
|
211
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
|
212
212
|
end
|
213
213
|
|
214
214
|
def get_best_result(results)
|
215
|
-
score = results.map {
|
216
|
-
results.select {
|
215
|
+
score = results.map {|x| x[:score]}.max
|
216
|
+
results.select {|x| x[:score] == score}.reduce(:apply_defaults)
|
217
217
|
end
|
218
|
+
|
218
219
|
end
|
220
|
+
|
219
221
|
end
|
220
222
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Libis
|
2
|
+
module Format
|
3
|
+
class Info
|
4
|
+
attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
|
5
|
+
|
6
|
+
def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
|
7
|
+
@name = name
|
8
|
+
@category = category
|
9
|
+
@description = description
|
10
|
+
@puids = puids
|
11
|
+
@mimetypes = mimetypes
|
12
|
+
@extensions = extensions
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
{
|
17
|
+
name: name,
|
18
|
+
description: description.dup,
|
19
|
+
category: category,
|
20
|
+
puids: puids.dup,
|
21
|
+
mimetypes: mimetypes.dup,
|
22
|
+
extensions: extensions.dup
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# froze_string_litteral: true
|
2
|
+
# coding: utf-8
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
|
8
|
+
class Library
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def implementation=(impl)
|
13
|
+
instance.implementation = impl
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_info(format)
|
17
|
+
instance.get_info(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_info_by(key, value)
|
21
|
+
instance.get_info_by(key, value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_infos_by(key, value)
|
25
|
+
instance.get_infos_by(key, value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_field(format, field)
|
29
|
+
instance.get_field(format, field)
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_field_by(key, value, field)
|
33
|
+
instance.get_field_by(key, value, field)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_fields_by(key, value, field)
|
37
|
+
instance.get_fields_by(key, value, field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def known?(key, value)
|
41
|
+
instance.known?(key, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def enrich(info, map_keys = {})
|
45
|
+
instance.enrich(info, map_keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def normalize(info, map_keys = {})
|
49
|
+
instance.normalize(info, map_keys)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def implementation=(impl)
|
54
|
+
@implementation = impl
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_field(format, field)
|
58
|
+
get_field_by(:name, format, field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_field_by(key, value, field)
|
62
|
+
info = get_info_by(key, value)
|
63
|
+
return nil unless info
|
64
|
+
case field
|
65
|
+
when :mimetype
|
66
|
+
info[:mimetypes]&.first
|
67
|
+
when :puid
|
68
|
+
info[:puids]&.first
|
69
|
+
when :extension
|
70
|
+
info[:extensions]&.first
|
71
|
+
else
|
72
|
+
info[field]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_fields_by(key, value, field)
|
77
|
+
get_infos_by(key, value)&.map { |info| info[field] }.compact
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_info(format)
|
81
|
+
get_info_by(:name, format)
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_info_by(key, value)
|
85
|
+
get_infos_by(key, value)&.first
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_infos_by(key, value)
|
89
|
+
result = @implementation.query(key, value)
|
90
|
+
result.map(&:to_hash)
|
91
|
+
end
|
92
|
+
|
93
|
+
def known?(key, value)
|
94
|
+
!get_info_by(key, value).nil?
|
95
|
+
end
|
96
|
+
|
97
|
+
def enrich(info, map_keys = {})
|
98
|
+
info = normalize(info, map_keys)
|
99
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
100
|
+
unless (format = info[mapper[:name]]).nil?
|
101
|
+
lib_info = get_info(format)
|
102
|
+
mapper.keys.each do |key|
|
103
|
+
case key
|
104
|
+
when :mimetype
|
105
|
+
info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
|
106
|
+
when :puid
|
107
|
+
info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
|
108
|
+
when :extension
|
109
|
+
info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
|
110
|
+
else
|
111
|
+
info[mapper[key]] = lib_info[key] if lib_info[key]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
info
|
116
|
+
end
|
117
|
+
|
118
|
+
# Derive name from the available info
|
119
|
+
def normalize(info, map_keys = {})
|
120
|
+
return {} unless info.is_a? Hash
|
121
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
122
|
+
# fill format from looking up by puid
|
123
|
+
unless (puid = info[mapper[:puid]]).blank?
|
124
|
+
info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
|
125
|
+
end
|
126
|
+
# fill format from looking up by mimetype
|
127
|
+
unless (mime = info[mapper[:mimetype]]).blank?
|
128
|
+
info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
|
129
|
+
end
|
130
|
+
# finally complete the information from looking up by format name
|
131
|
+
unless (format = info[mapper[:name]]).nil?
|
132
|
+
info[mapper[:mimetype]] = get_field(format, :mimetype)
|
133
|
+
info[mapper[:category]] = get_field(format, :category)
|
134
|
+
end
|
135
|
+
info
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
@implementation = eval(Libis::Format::Config[:format_library_implementation])
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|