libis-format 1.3.7.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -62
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -248
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +73 -109
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -36
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,26 +1,29 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'base'
|
4
|
-
require 'libis/format/tool/
|
2
|
+
require 'libis/format/tool/ffmpeg'
|
5
3
|
|
6
4
|
require 'fileutils'
|
7
5
|
|
8
6
|
module Libis
|
9
7
|
module Format
|
10
8
|
module Converter
|
9
|
+
|
11
10
|
class VideoConverter < Libis::Format::Converter::Base
|
11
|
+
|
12
12
|
def self.input_types
|
13
|
-
|
13
|
+
[:WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
|
14
14
|
end
|
15
15
|
|
16
16
|
def self.output_types(format = nil)
|
17
|
-
return [] unless input_types.include?(format)
|
17
|
+
return [] unless input_types.include?(format) if format
|
18
|
+
[:GIF, :WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
|
19
|
+
end
|
18
20
|
|
19
|
-
|
21
|
+
def initialize
|
22
|
+
super
|
20
23
|
end
|
21
24
|
|
22
|
-
def quiet(
|
23
|
-
@flags[:quiet] = !!
|
25
|
+
def quiet(v)
|
26
|
+
@flags[:quiet] = !!v
|
24
27
|
end
|
25
28
|
|
26
29
|
def format(format)
|
@@ -64,7 +67,7 @@ module Libis
|
|
64
67
|
end
|
65
68
|
|
66
69
|
def constant_rate_factor(value)
|
67
|
-
@options[:
|
70
|
+
@options[:crf] = value.to_s
|
68
71
|
end
|
69
72
|
|
70
73
|
def frame_rate(value)
|
@@ -84,66 +87,36 @@ module Libis
|
|
84
87
|
@options[:watermark_image] = file
|
85
88
|
end
|
86
89
|
|
87
|
-
# @param [String] value
|
90
|
+
# @param [String] value text for watermark. No watermark if nil (default)
|
88
91
|
def watermark_text(value)
|
89
|
-
@options[:watermark_text] = value
|
90
|
-
end
|
91
|
-
|
92
|
-
# @param [Boolean] value Should the filename be appended to the watermark text; use any value to enable
|
93
|
-
def watermark_text_add_filename(value)
|
94
|
-
@options[:watermark_text_add_filename] = !!value
|
92
|
+
@options[:watermark_text] = value
|
95
93
|
end
|
96
94
|
|
97
|
-
# @param [
|
95
|
+
# @param [Integer] value Font size for watermark text. Default: 10
|
98
96
|
# Note that the font is selected by the Config[:watermark_font] setting
|
99
97
|
def watermark_text_size(value)
|
100
|
-
@options[:watermark_text_size] = value.
|
98
|
+
@options[:watermark_text_size] = value.to_i
|
101
99
|
end
|
102
100
|
|
103
101
|
# @param [String] value Text color for the watermark text. Default: white
|
104
102
|
def watermark_text_color(value)
|
105
|
-
@options[:watermark_text_color] = value
|
106
|
-
end
|
107
|
-
|
108
|
-
# @param [Integer] value Offset of the watermark text shadow. Used for both x and y offset; default: 1
|
109
|
-
# If the offset is set to 0, no shadow will be printed
|
110
|
-
def watermark_text_shadow_offset(value)
|
111
|
-
@options[:watermark_text_shadow_offset] = value.to_i
|
103
|
+
@options[:watermark_text_color] = value
|
112
104
|
end
|
113
105
|
|
114
106
|
# @param [String] value Text color for the watermark text shadow. Default: black
|
115
107
|
def watermark_text_shadow_color(value)
|
116
|
-
@options[:watermark_text_shadow_color] = value
|
117
|
-
end
|
118
|
-
|
119
|
-
# @param [Integer] value Enable/disable watermark text box. 1 to enanble, 0 to disable; default: 0
|
120
|
-
def watermark_text_box(value)
|
121
|
-
@options[:watermark_text_box] = value.to_i
|
122
|
-
end
|
123
|
-
|
124
|
-
# @param [String] value Color of the watermark text box; default: white
|
125
|
-
def watermark_text_box_color(value)
|
126
|
-
@options[:watermark_text_box_color] = value.to_s
|
108
|
+
@options[:watermark_text_shadow_color] = value
|
127
109
|
end
|
128
110
|
|
129
|
-
# @param [
|
130
|
-
|
131
|
-
|
111
|
+
# @param [Integer] value Offset of the watermark text shadow. Used for both x and y offset; default: 1
|
112
|
+
# If the offset is set to 0, no shadow will be printed
|
113
|
+
def watermark_text_shadow_offset(value)
|
114
|
+
@options[:watermark_text_offset] = value.to_i
|
132
115
|
end
|
133
116
|
|
134
117
|
# @param [String] value one of 'bottom_left' (default), 'top_left', 'bottom_right', 'top_right', 'center'
|
135
118
|
def watermark_position(value)
|
136
|
-
@options[:watermark_position] = value
|
137
|
-
end
|
138
|
-
|
139
|
-
# @param [String] value offset x value for the text box. Default: 10
|
140
|
-
def watermark_offset_x(value)
|
141
|
-
@options[:watermark_offset_x] = value.to_s
|
142
|
-
end
|
143
|
-
|
144
|
-
# @param [String] value offset y value for the text box. Default: 10
|
145
|
-
def watermark_offset_y(value)
|
146
|
-
@options[:watermark_offset_y] = value.to_s
|
119
|
+
@options[:watermark_position] = value
|
147
120
|
end
|
148
121
|
|
149
122
|
# @param [Number] value watermark opacity (0-1) with 0 = invisible and 1 = 100% opaque. Default: 0.5
|
@@ -151,17 +124,12 @@ module Libis
|
|
151
124
|
@options[:watermark_opacity] = value.to_f
|
152
125
|
end
|
153
126
|
|
154
|
-
# @param [Number] value watermark blending (0-1) with 0 = invisible and 1 = 100% opaque. Default: 0.5
|
155
|
-
def watermark_blending(value)
|
156
|
-
@options[:watermark_blending] = value.to_f
|
157
|
-
end
|
158
|
-
|
159
127
|
# @param [Boolean] value If set to true automatically selects optimal format for web viewing. Default: false
|
160
128
|
def web_stream(value)
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
129
|
+
if value
|
130
|
+
@options[:video_codec] = 'h264'
|
131
|
+
@options[:audio_codec] = 'acc'
|
132
|
+
end
|
165
133
|
end
|
166
134
|
|
167
135
|
# @param [String] name name of a preset. See FFMpeg documentation for more info
|
@@ -202,7 +170,7 @@ module Libis
|
|
202
170
|
|
203
171
|
elsif File.directory?(source)
|
204
172
|
|
205
|
-
sources = Dir[File.join(source, '**', '*')].reject {
|
173
|
+
sources = Dir[File.join(source, '**', '*')].reject {|p| File.directory? p}
|
206
174
|
assemble_and_convert(sources, target)
|
207
175
|
|
208
176
|
else
|
@@ -211,17 +179,15 @@ module Libis
|
|
211
179
|
|
212
180
|
end
|
213
181
|
|
214
|
-
|
215
|
-
|
216
|
-
converter: self.class.name
|
217
|
-
}
|
182
|
+
target
|
183
|
+
|
218
184
|
end
|
219
185
|
|
220
186
|
def assemble_and_convert(sources, target)
|
221
|
-
Tempfile.create(%w
|
222
|
-
sources.each {
|
187
|
+
Tempfile.create(%w(list .txt)) do |f|
|
188
|
+
sources.each {|src| f.puts src}
|
223
189
|
opts[:global] ||= []
|
224
|
-
opts[:global] += %w
|
190
|
+
opts[:global] += %w(-f concat)
|
225
191
|
f.close
|
226
192
|
target = convert_file(f.to_path, target)
|
227
193
|
end
|
@@ -232,15 +198,15 @@ module Libis
|
|
232
198
|
|
233
199
|
def convert_file(source, target)
|
234
200
|
# FLV special: only supports aac and speex audio codecs
|
235
|
-
format = (@options[:format] || File.extname(target)[1
|
236
|
-
@options[:audio_codec] ||= 'aac' if %w
|
201
|
+
format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
|
202
|
+
@options[:audio_codec] ||= 'aac' if %w'flv'.include?(format)
|
237
203
|
|
238
204
|
# SWF special: only supports mp3 audio codec
|
239
|
-
format = (@options[:format] || File.extname(target)[1
|
240
|
-
@options[:audio_codec] ||= 'mp3' if %w
|
205
|
+
format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
|
206
|
+
@options[:audio_codec] ||= 'mp3' if %w'swf'.include?(format)
|
241
207
|
|
242
208
|
# Set up FFMpeg command line parameters
|
243
|
-
opts = {
|
209
|
+
opts = {global: [], input: [], filter: [], output: []}
|
244
210
|
opts[:global] << '-hide_banner'
|
245
211
|
opts[:global] << '-loglevel' << (@options[:quiet] ? 'fatal' : 'warning')
|
246
212
|
|
@@ -248,31 +214,29 @@ module Libis
|
|
248
214
|
@options[:watermark_opacity] ||= 0.5
|
249
215
|
if @options[:watermark_image]
|
250
216
|
opts[:filter] << '-i' << @options[:watermark_image] << '-filter_complex'
|
251
|
-
opts[:filter] <<
|
252
|
-
|
217
|
+
opts[:filter] << "[1:v]format=argb,colorchannelmixer=aa=%f[wm];[0:v][wm]overlay=%s" %
|
218
|
+
[@options[:watermark_opacity], watermark_position_text]
|
253
219
|
elsif @options[:watermark_text]
|
254
|
-
|
255
|
-
wm_text += File.basename(source, '.*') if @options[:watermark_text_add_filename]
|
256
|
-
@options[:watermark_text_size] ||= '10'
|
220
|
+
@options[:watermark_text_size] ||= 10
|
257
221
|
@options[:watermark_text_color] ||= 'white'
|
258
222
|
@options[:watermark_text_shadow_color] ||= 'black'
|
259
223
|
@options[:watermark_text_shadow_offset] ||= 1
|
260
|
-
filter_text =
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
224
|
+
filter_text = "drawtext=text='%s':%s:fontfile=%s:fontsize=%d:fontcolor=%s@%f" %
|
225
|
+
[
|
226
|
+
@options[:watermark_text],
|
227
|
+
watermark_position_text(true),
|
228
|
+
Config[:watermark_font],
|
229
|
+
@options[:watermark_text_size],
|
230
|
+
@options[:watermark_text_color],
|
231
|
+
@options[:watermark_opacity]
|
232
|
+
]
|
233
|
+
filter_text += ':shadowcolor=%s@%f:shadowx=%d:shadowy=%d' %
|
234
|
+
[
|
235
|
+
@options[:watermark_text_shadow_color],
|
236
|
+
@options[:watermark_opacity],
|
237
|
+
@options[:watermark_text_shadow_offset],
|
238
|
+
@options[:watermark_text_shadow_offset]
|
239
|
+
] if @options[:watermark_text_shadow_offset] > 0
|
276
240
|
opts[:filter] << '-vf' << filter_text
|
277
241
|
end
|
278
242
|
opts[:output] << '-ac' << @options[:audio_channels] if @options[:audio_channels]
|
@@ -280,11 +244,11 @@ module Libis
|
|
280
244
|
opts[:output] << '-c:v' << @options[:video_codec] if @options[:video_codec]
|
281
245
|
opts[:output] << '-b:a' << @options[:audio_bitrate] if @options[:audio_bitrate]
|
282
246
|
opts[:output] << '-b:v' << @options[:video_bitrate] if @options[:video_bitrate]
|
283
|
-
opts[:output] << '-crf' << @options[:
|
247
|
+
opts[:output] << '-crf' << @options[:crf] if @options[:crf]
|
284
248
|
opts[:output] << '-map_metadata:g' << '0:g' # Copy global metadata
|
285
249
|
opts[:output] << '-map_metadata:s:a' << '0:s:a' # Copy audio metadata
|
286
250
|
opts[:output] << '-map_metadata:s:v' << '0:s:v' # Copy video metadata
|
287
|
-
opts[:input] << '-accurate_seek' << (@options[:start].to_i
|
251
|
+
opts[:input] << '-accurate_seek' << (@options[:start].to_i < 0 ? '-sseof' : '-ss') << @options[:start] if @options[:start]
|
288
252
|
opts[:input] << '-t' << @options[:duration] if @options[:duration]
|
289
253
|
opts[:output] << '-qscale' << @options[:video_quality] if @options[:video_quality]
|
290
254
|
opts[:output] << '-q:a' << @options[:audio_quality] if @options[:audio_quality]
|
@@ -306,25 +270,25 @@ module Libis
|
|
306
270
|
target
|
307
271
|
end
|
308
272
|
|
309
|
-
def
|
310
|
-
margin_x = @options[:watermark_offset_x] || 10
|
311
|
-
margin_y = @options[:watermark_offset_y] || 10
|
273
|
+
def watermark_position_text(for_text = false, margin = 10)
|
312
274
|
w = for_text ? 'tw' : 'w'
|
313
275
|
h = for_text ? 'th' : 'h'
|
314
276
|
case @options[:watermark_position]
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
277
|
+
when 'bottom_left'
|
278
|
+
"x=#{margin}:y=H-#{h}-#{margin}"
|
279
|
+
when 'top_left'
|
280
|
+
"x=#{margin}:y=#{margin}"
|
281
|
+
when 'bottom_right'
|
282
|
+
"x=W-#{w}-#{margin}:y=H-#{h}-#{margin}"
|
283
|
+
when 'top_right'
|
284
|
+
"x=W-#{w}-#{margin}:y=#{margin}"
|
285
|
+
else
|
286
|
+
"x=#{margin}:y=H-#{h}-#{margin}"
|
325
287
|
end
|
326
288
|
end
|
289
|
+
|
327
290
|
end
|
291
|
+
|
328
292
|
end
|
329
293
|
end
|
330
|
-
end
|
294
|
+
end
|
@@ -1,19 +1,18 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require_relative 'base'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Converter
|
6
|
+
|
8
7
|
class XsltConverter < Libis::Format::Converter::Base
|
8
|
+
|
9
9
|
def self.input_types
|
10
10
|
[:XML]
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.output_types(format = nil)
|
14
|
-
return []
|
15
|
-
|
16
|
-
%i[XML HTML TXT]
|
14
|
+
return [] unless input_types.include?(format) if format
|
15
|
+
[:XML, :HTML, :TXT]
|
17
16
|
end
|
18
17
|
|
19
18
|
def xsl_file(file_path)
|
@@ -35,12 +34,12 @@ module Libis
|
|
35
34
|
|
36
35
|
FileUtils.mkpath(File.dirname(target))
|
37
36
|
|
38
|
-
if RUBY_PLATFORM ==
|
37
|
+
if RUBY_PLATFORM == "java"
|
39
38
|
require 'saxon-xslt'
|
40
39
|
xsl = Saxon.XSLT(File.open(@options[:xsl_file]))
|
41
40
|
xml = Saxon.XML(File.open(source))
|
42
41
|
result = xsl.transform(xml)
|
43
|
-
File.
|
42
|
+
File.open(target, 'w') {|f| f.write(result.to_s)}
|
44
43
|
else
|
45
44
|
require 'nokogiri'
|
46
45
|
|
@@ -87,17 +86,16 @@ module Libis
|
|
87
86
|
error "Error transforming '#{source}' with '#{file}': #{e.message} @ #{e.backtrace[0]}"
|
88
87
|
return nil
|
89
88
|
ensure
|
90
|
-
fp.close unless fp.nil?
|
89
|
+
fp.close unless fp.nil? or fp.closed?
|
91
90
|
end
|
92
91
|
|
93
|
-
|
94
|
-
command: { status: 0 },
|
95
|
-
files: [target]
|
96
|
-
}
|
97
|
-
|
92
|
+
target
|
98
93
|
end
|
94
|
+
|
99
95
|
end
|
96
|
+
|
100
97
|
end
|
98
|
+
|
101
99
|
end
|
102
100
|
end
|
103
101
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'singleton'
|
4
4
|
require 'pathname'
|
@@ -7,8 +7,9 @@ require 'libis-tools'
|
|
7
7
|
require 'libis/tools/extend/hash'
|
8
8
|
require 'libis/tools/extend/string'
|
9
9
|
require 'libis/tools/extend/empty'
|
10
|
+
require 'nori/core_ext/object'
|
10
11
|
|
11
|
-
require 'libis/format/
|
12
|
+
require 'libis/format/library'
|
12
13
|
|
13
14
|
require_relative 'config'
|
14
15
|
require_relative 'tool/fido'
|
@@ -18,10 +19,13 @@ require_relative 'tool/extension_identification'
|
|
18
19
|
|
19
20
|
module Libis
|
20
21
|
module Format
|
22
|
+
|
21
23
|
class Identifier
|
22
24
|
include ::Libis::Tools::Logger
|
23
25
|
include Singleton
|
24
26
|
|
27
|
+
public
|
28
|
+
|
25
29
|
def self.add_xml_validation(mimetype, xsd_file)
|
26
30
|
instance.xml_validations[mimetype] = xsd_file
|
27
31
|
end
|
@@ -37,28 +41,29 @@ module Libis
|
|
37
41
|
attr_reader :xml_validations
|
38
42
|
|
39
43
|
def get(file, options = {})
|
40
|
-
|
41
|
-
options[:
|
42
|
-
options[:
|
44
|
+
|
45
|
+
options[:droid] = true unless options.keys.include?(:droid) or (options[:tool] and options[:tool] != :droid)
|
46
|
+
options[:fido] = true unless options.keys.include?(:fido) or (options[:tool] and options[:tool] != :fido)
|
47
|
+
options[:file] = true unless options.keys.include?(:file) or (options[:tool] and options[:tool] != :file)
|
43
48
|
options[:xml_validation] = true if options[:xml_validation].nil?
|
44
49
|
|
45
|
-
result = {
|
50
|
+
result = {messages: [], output: {}, formats: {}}
|
46
51
|
|
47
52
|
begin
|
48
53
|
get_droid_identification(file, result, options) if options[:droid]
|
49
|
-
rescue
|
54
|
+
rescue => e
|
50
55
|
log_msg(result, :error, "Error running Droid: #{e.message} @ #{e.backtrace.first}")
|
51
56
|
end
|
52
57
|
|
53
58
|
begin
|
54
59
|
get_fido_identification(file, result, options) if options[:fido]
|
55
|
-
rescue
|
60
|
+
rescue => e
|
56
61
|
log_msg(result, :error, "Error running Fido: #{e.message} @ #{e.backtrace.first}")
|
57
62
|
end
|
58
63
|
|
59
64
|
begin
|
60
65
|
get_file_identification(file, result, options) if options[:file]
|
61
|
-
rescue
|
66
|
+
rescue => e
|
62
67
|
log_msg(result, :error, "Error running File: #{e.message} @ #{e.backtrace.first}")
|
63
68
|
end
|
64
69
|
|
@@ -69,13 +74,14 @@ module Libis
|
|
69
74
|
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
70
75
|
begin
|
71
76
|
validate_against_xml_schema(result, options[:base_dir]) if options[:xml_validation]
|
72
|
-
rescue
|
77
|
+
rescue => e
|
73
78
|
log_msg(result, :error, "Error validating XML files: #{e.message} @ #{e.backtrace.first}")
|
74
79
|
end
|
75
80
|
|
76
81
|
process_results(result, !options[:keep_output])
|
77
82
|
|
78
83
|
result
|
84
|
+
|
79
85
|
end
|
80
86
|
|
81
87
|
protected
|
@@ -91,7 +97,7 @@ module Libis
|
|
91
97
|
end
|
92
98
|
|
93
99
|
def get_fido_identification(file, result, options)
|
94
|
-
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive],
|
100
|
+
output = ::Libis::Format::Tool::Fido.run(file, options[:recursive], options[:fido_options])
|
95
101
|
process_tool_output(output, result, options[:base_dir])
|
96
102
|
output
|
97
103
|
end
|
@@ -117,21 +123,18 @@ module Libis
|
|
117
123
|
end
|
118
124
|
|
119
125
|
def xml_validate(file, file_result, result, base_dir)
|
120
|
-
return unless file_result[:mimetype] =~
|
121
|
-
|
126
|
+
return unless file_result[:mimetype] =~ /^(text|application)\/xml$/
|
122
127
|
filepath = base_dir ? File.join(base_dir, file) : file
|
123
128
|
doc = ::Libis::Tools::XmlDocument.open filepath
|
124
129
|
xml_validations.each do |mime, xsd_file|
|
125
130
|
next unless xsd_file
|
126
|
-
|
127
131
|
begin
|
128
132
|
if doc.validates_against?(xsd_file)
|
129
133
|
log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
|
130
|
-
info = {
|
131
|
-
|
132
|
-
file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
|
134
|
+
info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
|
135
|
+
file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
|
133
136
|
end
|
134
|
-
rescue
|
137
|
+
rescue => e
|
135
138
|
# Do nothing - probably Nokogiri chrashed during validation. Could have many causes
|
136
139
|
# (remote schema (firewall, network, link rot, ...), schema syntax error, corrupt XML,...)
|
137
140
|
# so we log and continue.
|
@@ -139,7 +142,7 @@ module Libis
|
|
139
142
|
"Error during XML validation of file #{file} against #{File.basename(xsd_file)}: #{e.message}")
|
140
143
|
end
|
141
144
|
end
|
142
|
-
rescue
|
145
|
+
rescue => e
|
143
146
|
# Not much we can do. probably Nokogiri chrashed opening the XML file. What caused this?
|
144
147
|
# (XML not parsable, false XML identification, ???)
|
145
148
|
# so we log and continue.
|
@@ -147,30 +150,30 @@ module Libis
|
|
147
150
|
end
|
148
151
|
|
149
152
|
def process_results(result, delete_output = true)
|
150
|
-
result[:output].
|
153
|
+
result[:output].keys.each do |file|
|
151
154
|
output = result[:output][file]
|
152
155
|
file_result = result[:formats][file] = {}
|
153
156
|
if output.empty?
|
154
157
|
log_msg(result, :warn, "Could not identify format of '#{file}'.")
|
155
|
-
file_result
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
158
|
+
file_result = {
|
159
|
+
mimetype: 'application/octet-stream',
|
160
|
+
puid: 'fmt/unknown',
|
161
|
+
score: 0,
|
162
|
+
tool: nil
|
163
|
+
}
|
161
164
|
else
|
162
|
-
format_matches = output.group_by {
|
165
|
+
format_matches = output.group_by {|x| [x[:mimetype], x[:puid]]}
|
163
166
|
format_matches.each do |match, group|
|
164
|
-
format_matches[match] = group.group_by {
|
167
|
+
format_matches[match] = group.group_by {|x| x[:score]}.sort.reverse.to_h
|
165
168
|
end
|
166
169
|
case format_matches.count
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
170
|
+
when 0
|
171
|
+
# No this really cannot happen. If there are no hits, there will be at least a format [nil,nil]
|
172
|
+
when 1
|
173
|
+
# only one match, that's easy. The first of the highest score will be used
|
174
|
+
file_result.merge!(get_best_result(output))
|
175
|
+
else
|
176
|
+
process_multiple_formats(file_result, format_matches, output)
|
174
177
|
end
|
175
178
|
end
|
176
179
|
end
|
@@ -181,13 +184,12 @@ module Libis
|
|
181
184
|
# multiple matches. Let's select the highest score matches
|
182
185
|
file_result.merge!(get_best_result(output))
|
183
186
|
file_result[:alternatives] = []
|
184
|
-
format_matches.
|
187
|
+
format_matches.keys.each do |mime, puid|
|
185
188
|
next if file_result[:mimetype] == mime && puid.nil?
|
186
|
-
|
187
|
-
selection = output.select { |x| x[:mimetype] == mime && x[:puid] == puid }
|
189
|
+
selection = output.select {|x| x[:mimetype] == mime && x[:puid] == puid}
|
188
190
|
file_result[:alternatives] << get_best_result(selection)
|
189
191
|
end
|
190
|
-
file_result[:alternatives] = file_result[:alternatives].sort_by {
|
192
|
+
file_result[:alternatives] = file_result[:alternatives].sort_by {|x| x[:score]}.reverse
|
191
193
|
file_result.delete(:alternatives) if file_result[:alternatives].size <= 1
|
192
194
|
end
|
193
195
|
|
@@ -206,15 +208,15 @@ module Libis
|
|
206
208
|
end
|
207
209
|
|
208
210
|
def get_mimetype(puid)
|
209
|
-
::Libis::Format::
|
210
|
-
rescue StandardError
|
211
|
-
nil
|
211
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
|
212
212
|
end
|
213
213
|
|
214
214
|
def get_best_result(results)
|
215
|
-
score = results.map {
|
216
|
-
results.select {
|
215
|
+
score = results.map {|x| x[:score]}.max
|
216
|
+
results.select {|x| x[:score] == score}.reduce(:apply_defaults)
|
217
217
|
end
|
218
|
+
|
218
219
|
end
|
220
|
+
|
219
221
|
end
|
220
222
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Libis
|
2
|
+
module Format
|
3
|
+
class Info
|
4
|
+
attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
|
5
|
+
|
6
|
+
def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
|
7
|
+
@name = name
|
8
|
+
@category = category
|
9
|
+
@description = description
|
10
|
+
@puids = puids
|
11
|
+
@mimetypes = mimetypes
|
12
|
+
@extensions = extensions
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
{
|
17
|
+
name: name,
|
18
|
+
description: description.dup,
|
19
|
+
category: category,
|
20
|
+
puids: puids.dup,
|
21
|
+
mimetypes: mimetypes.dup,
|
22
|
+
extensions: extensions.dup
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|