libis-format 1.3.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -250
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,26 +1,29 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'base'
|
4
|
-
require 'libis/format/tool/
|
2
|
+
require 'libis/format/tool/ffmpeg'
|
5
3
|
|
6
4
|
require 'fileutils'
|
7
5
|
|
8
6
|
module Libis
|
9
7
|
module Format
|
10
8
|
module Converter
|
9
|
+
|
11
10
|
class VideoConverter < Libis::Format::Converter::Base
|
11
|
+
|
12
12
|
def self.input_types
|
13
|
-
|
13
|
+
[:WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.output_types(format = nil)
|
17
|
-
return [] unless input_types.include?(format)
|
17
|
+
return [] unless input_types.include?(format) if format
|
18
|
+
[:GIF, :WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
|
19
|
+
end
|
18
20
|
|
19
|
-
|
21
|
+
def initialize
|
22
|
+
super
|
20
23
|
end
|
21
24
|
|
22
|
-
def quiet(
|
23
|
-
@flags[:quiet] = !!
|
25
|
+
def quiet(v)
|
26
|
+
@flags[:quiet] = !!v
|
24
27
|
end
|
25
28
|
|
26
29
|
def format(format)
|
@@ -123,10 +126,10 @@ module Libis
|
|
123
126
|
|
124
127
|
# @param [Boolean] value If set to true automatically selects optimal format for web viewing. Default: false
|
125
128
|
def web_stream(value)
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
129
|
+
if value
|
130
|
+
@options[:video_codec] = 'h264'
|
131
|
+
@options[:audio_codec] = 'acc'
|
132
|
+
end
|
130
133
|
end
|
131
134
|
|
132
135
|
# @param [String] name name of a preset. See FFMpeg documentation for more info
|
@@ -167,7 +170,7 @@ module Libis
|
|
167
170
|
|
168
171
|
elsif File.directory?(source)
|
169
172
|
|
170
|
-
sources = Dir[File.join(source, '**', '*')].reject {
|
173
|
+
sources = Dir[File.join(source, '**', '*')].reject {|p| File.directory? p}
|
171
174
|
assemble_and_convert(sources, target)
|
172
175
|
|
173
176
|
else
|
@@ -176,17 +179,15 @@ module Libis
|
|
176
179
|
|
177
180
|
end
|
178
181
|
|
179
|
-
|
180
|
-
|
181
|
-
converter: self.class.name
|
182
|
-
}
|
182
|
+
target
|
183
|
+
|
183
184
|
end
|
184
185
|
|
185
186
|
def assemble_and_convert(sources, target)
|
186
|
-
Tempfile.create(%w
|
187
|
-
sources.each {
|
187
|
+
Tempfile.create(%w(list .txt)) do |f|
|
188
|
+
sources.each {|src| f.puts src}
|
188
189
|
opts[:global] ||= []
|
189
|
-
opts[:global] += %w
|
190
|
+
opts[:global] += %w(-f concat)
|
190
191
|
f.close
|
191
192
|
target = convert_file(f.to_path, target)
|
192
193
|
end
|
@@ -197,15 +198,15 @@ module Libis
|
|
197
198
|
|
198
199
|
def convert_file(source, target)
|
199
200
|
# FLV special: only supports aac and speex audio codecs
|
200
|
-
format = (@options[:format] || File.extname(target)[1
|
201
|
-
@options[:audio_codec] ||= 'aac' if %w
|
201
|
+
format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
|
202
|
+
@options[:audio_codec] ||= 'aac' if %w'flv'.include?(format)
|
202
203
|
|
203
204
|
# SWF special: only supports mp3 audio codec
|
204
|
-
format = (@options[:format] || File.extname(target)[1
|
205
|
-
@options[:audio_codec] ||= 'mp3' if %w
|
205
|
+
format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
|
206
|
+
@options[:audio_codec] ||= 'mp3' if %w'swf'.include?(format)
|
206
207
|
|
207
208
|
# Set up FFMpeg command line parameters
|
208
|
-
opts = {
|
209
|
+
opts = {global: [], input: [], filter: [], output: []}
|
209
210
|
opts[:global] << '-hide_banner'
|
210
211
|
opts[:global] << '-loglevel' << (@options[:quiet] ? 'fatal' : 'warning')
|
211
212
|
|
@@ -213,21 +214,29 @@ module Libis
|
|
213
214
|
@options[:watermark_opacity] ||= 0.5
|
214
215
|
if @options[:watermark_image]
|
215
216
|
opts[:filter] << '-i' << @options[:watermark_image] << '-filter_complex'
|
216
|
-
opts[:filter] <<
|
217
|
-
|
217
|
+
opts[:filter] << "[1:v]format=argb,colorchannelmixer=aa=%f[wm];[0:v][wm]overlay=%s" %
|
218
|
+
[@options[:watermark_opacity], watermark_position_text]
|
218
219
|
elsif @options[:watermark_text]
|
219
220
|
@options[:watermark_text_size] ||= 10
|
220
221
|
@options[:watermark_text_color] ||= 'white'
|
221
222
|
@options[:watermark_text_shadow_color] ||= 'black'
|
222
223
|
@options[:watermark_text_shadow_offset] ||= 1
|
223
|
-
filter_text =
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
224
|
+
filter_text = "drawtext=text='%s':%s:fontfile=%s:fontsize=%d:fontcolor=%s@%f" %
|
225
|
+
[
|
226
|
+
@options[:watermark_text],
|
227
|
+
watermark_position_text(true),
|
228
|
+
Config[:watermark_font],
|
229
|
+
@options[:watermark_text_size],
|
230
|
+
@options[:watermark_text_color],
|
231
|
+
@options[:watermark_opacity]
|
232
|
+
]
|
233
|
+
filter_text += ':shadowcolor=%s@%f:shadowx=%d:shadowy=%d' %
|
234
|
+
[
|
235
|
+
@options[:watermark_text_shadow_color],
|
236
|
+
@options[:watermark_opacity],
|
237
|
+
@options[:watermark_text_shadow_offset],
|
238
|
+
@options[:watermark_text_shadow_offset]
|
239
|
+
] if @options[:watermark_text_shadow_offset] > 0
|
231
240
|
opts[:filter] << '-vf' << filter_text
|
232
241
|
end
|
233
242
|
opts[:output] << '-ac' << @options[:audio_channels] if @options[:audio_channels]
|
@@ -239,7 +248,7 @@ module Libis
|
|
239
248
|
opts[:output] << '-map_metadata:g' << '0:g' # Copy global metadata
|
240
249
|
opts[:output] << '-map_metadata:s:a' << '0:s:a' # Copy audio metadata
|
241
250
|
opts[:output] << '-map_metadata:s:v' << '0:s:v' # Copy video metadata
|
242
|
-
opts[:input] << '-accurate_seek' << (@options[:start].to_i
|
251
|
+
opts[:input] << '-accurate_seek' << (@options[:start].to_i < 0 ? '-sseof' : '-ss') << @options[:start] if @options[:start]
|
243
252
|
opts[:input] << '-t' << @options[:duration] if @options[:duration]
|
244
253
|
opts[:output] << '-qscale' << @options[:video_quality] if @options[:video_quality]
|
245
254
|
opts[:output] << '-q:a' << @options[:audio_quality] if @options[:audio_quality]
|
@@ -265,19 +274,21 @@ module Libis
|
|
265
274
|
w = for_text ? 'tw' : 'w'
|
266
275
|
h = for_text ? 'th' : 'h'
|
267
276
|
case @options[:watermark_position]
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
277
|
+
when 'bottom_left'
|
278
|
+
"x=#{margin}:y=H-#{h}-#{margin}"
|
279
|
+
when 'top_left'
|
280
|
+
"x=#{margin}:y=#{margin}"
|
281
|
+
when 'bottom_right'
|
282
|
+
"x=W-#{w}-#{margin}:y=H-#{h}-#{margin}"
|
283
|
+
when 'top_right'
|
284
|
+
"x=W-#{w}-#{margin}:y=#{margin}"
|
285
|
+
else
|
286
|
+
"x=#{margin}:y=H-#{h}-#{margin}"
|
278
287
|
end
|
279
288
|
end
|
289
|
+
|
280
290
|
end
|
291
|
+
|
281
292
|
end
|
282
293
|
end
|
283
|
-
end
|
294
|
+
end
|
@@ -1,19 +1,18 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'base'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Converter
|
6
|
+
|
8
7
|
class XsltConverter < Libis::Format::Converter::Base
|
8
|
+
|
9
9
|
def self.input_types
|
10
10
|
[:XML]
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.output_types(format = nil)
|
14
|
-
return []
|
15
|
-
|
16
|
-
%i[XML HTML TXT]
|
14
|
+
return [] unless input_types.include?(format) if format
|
15
|
+
[:XML, :HTML, :TXT]
|
17
16
|
end
|
18
17
|
|
19
18
|
def xsl_file(file_path)
|
@@ -35,12 +34,12 @@ module Libis
|
|
35
34
|
|
36
35
|
FileUtils.mkpath(File.dirname(target))
|
37
36
|
|
38
|
-
if RUBY_PLATFORM ==
|
37
|
+
if RUBY_PLATFORM == "java"
|
39
38
|
require 'saxon-xslt'
|
40
39
|
xsl = Saxon.XSLT(File.open(@options[:xsl_file]))
|
41
40
|
xml = Saxon.XML(File.open(source))
|
42
41
|
result = xsl.transform(xml)
|
43
|
-
File.
|
42
|
+
File.open(target, 'w') {|f| f.write(result.to_s)}
|
44
43
|
else
|
45
44
|
require 'nokogiri'
|
46
45
|
|
@@ -87,17 +86,16 @@ module Libis
|
|
87
86
|
error "Error transforming '#{source}' with '#{file}': #{e.message} @ #{e.backtrace[0]}"
|
88
87
|
return nil
|
89
88
|
ensure
|
90
|
-
fp.close unless fp.nil?
|
89
|
+
fp.close unless fp.nil? or fp.closed?
|
91
90
|
end
|
92
91
|
|
93
|
-
|
94
|
-
command: { status: 0 },
|
95
|
-
files: [target]
|
96
|
-
}
|
97
|
-
|
92
|
+
target
|
98
93
|
end
|
94
|
+
|
99
95
|
end
|
96
|
+
|
100
97
|
end
|
98
|
+
|
101
99
|
end
|
102
100
|
end
|
103
101
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'singleton'
|
4
4
|
require 'pathname'
|
@@ -7,8 +7,9 @@ require 'libis-tools'
|
|
7
7
|
require 'libis/tools/extend/hash'
|
8
8
|
require 'libis/tools/extend/string'
|
9
9
|
require 'libis/tools/extend/empty'
|
10
|
+
require 'nori/core_ext/object'
|
10
11
|
|
11
|
-
require 'libis/format/
|
12
|
+
require 'libis/format/library'
|
12
13
|
|
13
14
|
require_relative 'config'
|
14
15
|
require_relative 'tool/fido'
|
@@ -18,10 +19,13 @@ require_relative 'tool/extension_identification'
|
|
18
19
|
|
19
20
|
module Libis
|
20
21
|
module Format
|
22
|
+
|
21
23
|
class Identifier
|
22
24
|
include ::Libis::Tools::Logger
|
23
25
|
include Singleton
|
24
26
|
|
27
|
+
public
|
28
|
+
|
25
29
|
def self.add_xml_validation(mimetype, xsd_file)
|
26
30
|
instance.xml_validations[mimetype] = xsd_file
|
27
31
|
end
|
@@ -37,28 +41,29 @@ module Libis
|
|
37
41
|
attr_reader :xml_validations
|
38
42
|
|
39
43
|
def get(file, options = {})
|
40
|
-
|
41
|
-
options[:
|
42
|
-
options[:
|
44
|
+
|
45
|
+
options[:droid] = true unless options.keys.include?(:droid) or (options[:tool] and options[:tool] != :droid)
|
46
|
+
options[:fido] = true unless options.keys.include?(:fido) or (options[:tool] and options[:tool] != :fido)
|
47
|
+
options[:file] = true unless options.keys.include?(:file) or (options[:tool] and options[:tool] != :file)
|
43
48
|
options[:xml_validation] = true if options[:xml_validation].nil?
|
44
49
|
|
45
|
-
result = {
|
50
|
+
result = {messages: [], output: {}, formats: {}}
|
46
51
|
|
47
52
|
begin
|
48
53
|
get_droid_identification(file, result, options) if options[:droid]
|
49
|
-
rescue
|
54
|
+
rescue => e
|
50
55
|
log_msg(result, :error, "Error running Droid: #{e.message} @ #{e.backtrace.first}")
|
51
56
|
end
|
52
57
|
|
53
58
|
begin
|
54
59
|
get_fido_identification(file, result, options) if options[:fido]
|
55
|
-
rescue
|
60
|
+
rescue => e
|
56
61
|
log_msg(result, :error, "Error running Fido: #{e.message} @ #{e.backtrace.first}")
|
57
62
|
end
|
58
63
|
|
59
64
|
begin
|
60
65
|
get_file_identification(file, result, options) if options[:file]
|
61
|
-
rescue
|
66
|
+
rescue => e
|
62
67
|
log_msg(result, :error, "Error running File: #{e.message} @ #{e.backtrace.first}")
|
63
68
|
end
|
64
69
|
|
@@ -69,13 +74,14 @@ module Libis
|
|
69
74
|
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
70
75
|
begin
|
71
76
|
validate_against_xml_schema(result, options[:base_dir]) if options[:xml_validation]
|
72
|
-
rescue
|
77
|
+
rescue => e
|
73
78
|
log_msg(result, :error, "Error validating XML files: #{e.message} @ #{e.backtrace.first}")
|
74
79
|
end
|
75
80
|
|
76
81
|
process_results(result, !options[:keep_output])
|
77
82
|
|
78
83
|
result
|
84
|
+
|
79
85
|
end
|
80
86
|
|
81
87
|
protected
|
@@ -91,7 +97,7 @@ module Libis
|
|
91
97
|
end
|
92
98
|
|
93
99
|
def get_fido_identification(file, result, options)
|
94
|
-
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive],
|
100
|
+
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive], options[:fido_options])
|
95
101
|
process_tool_output(output, result, options[:base_dir])
|
96
102
|
output
|
97
103
|
end
|
@@ -117,21 +123,18 @@ module Libis
|
|
117
123
|
end
|
118
124
|
|
119
125
|
def xml_validate(file, file_result, result, base_dir)
|
120
|
-
return unless file_result[:mimetype] =~
|
121
|
-
|
126
|
+
return unless file_result[:mimetype] =~ /^(text|application)\/xml$/
|
122
127
|
filepath = base_dir ? File.join(base_dir, file) : file
|
123
128
|
doc = ::Libis::Tools::XmlDocument.open filepath
|
124
129
|
xml_validations.each do |mime, xsd_file|
|
125
130
|
next unless xsd_file
|
126
|
-
|
127
131
|
begin
|
128
132
|
if doc.validates_against?(xsd_file)
|
129
133
|
log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
|
130
|
-
info = {
|
131
|
-
|
132
|
-
file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
|
134
|
+
info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
|
135
|
+
file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
|
133
136
|
end
|
134
|
-
rescue
|
137
|
+
rescue => e
|
135
138
|
# Do nothing - probably Nokogiri chrashed during validation. Could have many causes
|
136
139
|
# (remote schema (firewall, network, link rot, ...), schema syntax error, corrupt XML,...)
|
137
140
|
# so we log and continue.
|
@@ -139,7 +142,7 @@ module Libis
|
|
139
142
|
"Error during XML validation of file #{file} against #{File.basename(xsd_file)}: #{e.message}")
|
140
143
|
end
|
141
144
|
end
|
142
|
-
rescue
|
145
|
+
rescue => e
|
143
146
|
# Not much we can do. probably Nokogiri chrashed opening the XML file. What caused this?
|
144
147
|
# (XML not parsable, false XML identification, ???)
|
145
148
|
# so we log and continue.
|
@@ -147,30 +150,30 @@ module Libis
|
|
147
150
|
end
|
148
151
|
|
149
152
|
def process_results(result, delete_output = true)
|
150
|
-
result[:output].
|
153
|
+
result[:output].keys.each do |file|
|
151
154
|
output = result[:output][file]
|
152
155
|
file_result = result[:formats][file] = {}
|
153
156
|
if output.empty?
|
154
157
|
log_msg(result, :warn, "Could not identify format of '#{file}'.")
|
155
|
-
file_result
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
158
|
+
file_result = {
|
159
|
+
mimetype: 'application/octet-stream',
|
160
|
+
puid: 'fmt/unknown',
|
161
|
+
score: 0,
|
162
|
+
tool: nil
|
163
|
+
}
|
161
164
|
else
|
162
|
-
format_matches = output.group_by {
|
165
|
+
format_matches = output.group_by {|x| [x[:mimetype], x[:puid]]}
|
163
166
|
format_matches.each do |match, group|
|
164
|
-
format_matches[match] = group.group_by {
|
167
|
+
format_matches[match] = group.group_by {|x| x[:score]}.sort.reverse.to_h
|
165
168
|
end
|
166
169
|
case format_matches.count
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
170
|
+
when 0
|
171
|
+
# No this really cannot happen. If there are no hits, there will be at least a format [nil,nil]
|
172
|
+
when 1
|
173
|
+
# only one match, that's easy. The first of the highest score will be used
|
174
|
+
file_result.merge!(get_best_result(output))
|
175
|
+
else
|
176
|
+
process_multiple_formats(file_result, format_matches, output)
|
174
177
|
end
|
175
178
|
end
|
176
179
|
end
|
@@ -181,13 +184,12 @@ module Libis
|
|
181
184
|
# multiple matches. Let's select the highest score matches
|
182
185
|
file_result.merge!(get_best_result(output))
|
183
186
|
file_result[:alternatives] = []
|
184
|
-
format_matches.
|
187
|
+
format_matches.keys.each do |mime, puid|
|
185
188
|
next if file_result[:mimetype] == mime && puid.nil?
|
186
|
-
|
187
|
-
selection = output.select { |x| x[:mimetype] == mime && x[:puid] == puid }
|
189
|
+
selection = output.select {|x| x[:mimetype] == mime && x[:puid] == puid}
|
188
190
|
file_result[:alternatives] << get_best_result(selection)
|
189
191
|
end
|
190
|
-
file_result[:alternatives] = file_result[:alternatives].sort_by {
|
192
|
+
file_result[:alternatives] = file_result[:alternatives].sort_by {|x| x[:score]}.reverse
|
191
193
|
file_result.delete(:alternatives) if file_result[:alternatives].size <= 1
|
192
194
|
end
|
193
195
|
|
@@ -206,15 +208,15 @@ module Libis
|
|
206
208
|
end
|
207
209
|
|
208
210
|
def get_mimetype(puid)
|
209
|
-
::Libis::Format::
|
210
|
-
rescue StandardError
|
211
|
-
nil
|
211
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
|
212
212
|
end
|
213
213
|
|
214
214
|
def get_best_result(results)
|
215
|
-
score = results.map {
|
216
|
-
results.select {
|
215
|
+
score = results.map {|x| x[:score]}.max
|
216
|
+
results.select {|x| x[:score] == score}.reduce(:apply_defaults)
|
217
217
|
end
|
218
|
+
|
218
219
|
end
|
220
|
+
|
219
221
|
end
|
220
222
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Libis
|
2
|
+
module Format
|
3
|
+
class Info
|
4
|
+
attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
|
5
|
+
|
6
|
+
def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
|
7
|
+
@name = name
|
8
|
+
@category = category
|
9
|
+
@description = description
|
10
|
+
@puids = puids
|
11
|
+
@mimetypes = mimetypes
|
12
|
+
@extensions = extensions
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
{
|
17
|
+
name: name,
|
18
|
+
description: description.dup,
|
19
|
+
category: category,
|
20
|
+
puids: puids.dup,
|
21
|
+
mimetypes: mimetypes.dup,
|
22
|
+
extensions: extensions.dup
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# froze_string_litteral: true
|
2
|
+
# coding: utf-8
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
|
8
|
+
class Library
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def implementation=(impl)
|
13
|
+
instance.implementation = impl
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_info(format)
|
17
|
+
instance.get_info(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_info_by(key, value)
|
21
|
+
instance.get_info_by(key, value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_infos_by(key, value)
|
25
|
+
instance.get_infos_by(key, value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_field(format, field)
|
29
|
+
instance.get_field(format, field)
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_field_by(key, value, field)
|
33
|
+
instance.get_field_by(key, value, field)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_fields_by(key, value, field)
|
37
|
+
instance.get_fields_by(key, value, field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def known?(key, value)
|
41
|
+
instance.known?(key, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def enrich(info, map_keys = {})
|
45
|
+
instance.enrich(info, map_keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def normalize(info, map_keys = {})
|
49
|
+
instance.normalize(info, map_keys)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def implementation=(impl)
|
54
|
+
@implementation = impl
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_field(format, field)
|
58
|
+
get_field_by(:name, format, field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_field_by(key, value, field)
|
62
|
+
info = get_info_by(key, value)
|
63
|
+
return nil unless info
|
64
|
+
case field
|
65
|
+
when :mimetype
|
66
|
+
info[:mimetypes]&.first
|
67
|
+
when :puid
|
68
|
+
info[:puids]&.first
|
69
|
+
when :extension
|
70
|
+
info[:extensions]&.first
|
71
|
+
else
|
72
|
+
info[field]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_fields_by(key, value, field)
|
77
|
+
get_infos_by(key, value)&.map { |info| info[field] }.compact
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_info(format)
|
81
|
+
get_info_by(:name, format)
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_info_by(key, value)
|
85
|
+
get_infos_by(key, value)&.first
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_infos_by(key, value)
|
89
|
+
result = @implementation.query(key, value)
|
90
|
+
result.map(&:to_hash)
|
91
|
+
end
|
92
|
+
|
93
|
+
def known?(key, value)
|
94
|
+
!get_info_by(key, value).nil?
|
95
|
+
end
|
96
|
+
|
97
|
+
def enrich(info, map_keys = {})
|
98
|
+
info = normalize(info, map_keys)
|
99
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
100
|
+
unless (format = info[mapper[:name]]).nil?
|
101
|
+
lib_info = get_info(format)
|
102
|
+
mapper.keys.each do |key|
|
103
|
+
case key
|
104
|
+
when :mimetype
|
105
|
+
info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
|
106
|
+
when :puid
|
107
|
+
info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
|
108
|
+
when :extension
|
109
|
+
info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
|
110
|
+
else
|
111
|
+
info[mapper[key]] = lib_info[key] if lib_info[key]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
info
|
116
|
+
end
|
117
|
+
|
118
|
+
# Derive name from the available info
|
119
|
+
def normalize(info, map_keys = {})
|
120
|
+
return {} unless info.is_a? Hash
|
121
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
122
|
+
# fill format from looking up by puid
|
123
|
+
unless (puid = info[mapper[:puid]]).blank?
|
124
|
+
info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
|
125
|
+
end
|
126
|
+
# fill format from looking up by mimetype
|
127
|
+
unless (mime = info[mapper[:mimetype]]).blank?
|
128
|
+
info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
|
129
|
+
end
|
130
|
+
# finally complete the information from looking up by format name
|
131
|
+
unless (format = info[mapper[:name]]).nil?
|
132
|
+
info[mapper[:mimetype]] = get_field(format, :mimetype)
|
133
|
+
info[mapper[:category]] = get_field(format, :category)
|
134
|
+
end
|
135
|
+
info
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
@implementation = eval(Libis::Format::Config[:format_library_implementation])
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|