libis-format 1.3.7.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -62
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +45 -248
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +73 -109
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +19 -25
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +33 -6
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +86 -128
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -36
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,26 +1,29 @@
1
- # frozen_string_literal: true
2
-
3
1
  require_relative 'base'
4
- require 'libis/format/tool/ff_mpeg'
2
+ require 'libis/format/tool/ffmpeg'
5
3
 
6
4
  require 'fileutils'
7
5
 
8
6
  module Libis
9
7
  module Format
10
8
  module Converter
9
+
11
10
  class VideoConverter < Libis::Format::Converter::Base
11
+
12
12
  def self.input_types
13
- %i[WEBM MP4 MPG MKV MJP2 QTFF AVI OGGV WMV DV FLV SWF]
13
+ [:WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
14
14
  end
15
15
 
16
16
  def self.output_types(format = nil)
17
- return [] unless input_types.include?(format)
17
+ return [] unless input_types.include?(format) if format
18
+ [:GIF, :WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
19
+ end
18
20
 
19
- %i[GIF WEBM MP4 MPG MKV MJP2 QTFF AVI OGGV WMV DV FLV SWF]
21
+ def initialize
22
+ super
20
23
  end
21
24
 
22
- def quiet(value)
23
- @flags[:quiet] = !!value
25
+ def quiet(v)
26
+ @flags[:quiet] = !!v
24
27
  end
25
28
 
26
29
  def format(format)
@@ -64,7 +67,7 @@ module Libis
64
67
  end
65
68
 
66
69
  def constant_rate_factor(value)
67
- @options[:constant_rate_factor] = value.to_s
70
+ @options[:crf] = value.to_s
68
71
  end
69
72
 
70
73
  def frame_rate(value)
@@ -84,66 +87,36 @@ module Libis
84
87
  @options[:watermark_image] = file
85
88
  end
86
89
 
87
- # @param [String] value Text for watermark. No watermark if nil (default)
90
+ # @param [String] value text for watermark. No watermark if nil (default)
88
91
  def watermark_text(value)
89
- @options[:watermark_text] = value.to_s
90
- end
91
-
92
- # @param [Boolean] value Should the filename be appended to the watermark text; use any value to enable
93
- def watermark_text_add_filename(value)
94
- @options[:watermark_text_add_filename] = !!value
92
+ @options[:watermark_text] = value
95
93
  end
96
94
 
97
- # @param [String] value Font size for watermark text. Default: 10
95
+ # @param [Integer] value Font size for watermark text. Default: 10
98
96
  # Note that the font is selected by the Config[:watermark_font] setting
99
97
  def watermark_text_size(value)
100
- @options[:watermark_text_size] = value.to_s
98
+ @options[:watermark_text_size] = value.to_i
101
99
  end
102
100
 
103
101
  # @param [String] value Text color for the watermark text. Default: white
104
102
  def watermark_text_color(value)
105
- @options[:watermark_text_color] = value.to_s
106
- end
107
-
108
- # @param [Integer] value Offset of the watermark text shadow. Used for both x and y offset; default: 1
109
- # If the offset is set to 0, no shadow will be printed
110
- def watermark_text_shadow_offset(value)
111
- @options[:watermark_text_shadow_offset] = value.to_i
103
+ @options[:watermark_text_color] = value
112
104
  end
113
105
 
114
106
  # @param [String] value Text color for the watermark text shadow. Default: black
115
107
  def watermark_text_shadow_color(value)
116
- @options[:watermark_text_shadow_color] = value.to_s
117
- end
118
-
119
- # @param [Integer] value Enable/disable watermark text box. 1 to enanble, 0 to disable; default: 0
120
- def watermark_text_box(value)
121
- @options[:watermark_text_box] = value.to_i
122
- end
123
-
124
- # @param [String] value Color of the watermark text box; default: white
125
- def watermark_text_box_color(value)
126
- @options[:watermark_text_box_color] = value.to_s
108
+ @options[:watermark_text_shadow_color] = value
127
109
  end
128
110
 
129
- # @param [String] value Border width of the watermark text box
130
- def watermark_text_box_width(value)
131
- @options[:watermark_text_box_width] = value.to_s
111
+ # @param [Integer] value Offset of the watermark text shadow. Used for both x and y offset; default: 1
112
+ # If the offset is set to 0, no shadow will be printed
113
+ def watermark_text_shadow_offset(value)
114
+ @options[:watermark_text_offset] = value.to_i
132
115
  end
133
116
 
134
117
  # @param [String] value one of 'bottom_left' (default), 'top_left', 'bottom_right', 'top_right', 'center'
135
118
  def watermark_position(value)
136
- @options[:watermark_position] = value.to_s
137
- end
138
-
139
- # @param [String] value offset x value for the text box. Default: 10
140
- def watermark_offset_x(value)
141
- @options[:watermark_offset_x] = value.to_s
142
- end
143
-
144
- # @param [String] value offset y value for the text box. Default: 10
145
- def watermark_offset_y(value)
146
- @options[:watermark_offset_y] = value.to_s
119
+ @options[:watermark_position] = value
147
120
  end
148
121
 
149
122
  # @param [Number] value watermark opacity (0-1) with 0 = invisible and 1 = 100% opaque. Default: 0.5
@@ -151,17 +124,12 @@ module Libis
151
124
  @options[:watermark_opacity] = value.to_f
152
125
  end
153
126
 
154
- # @param [Number] value watermark blending (0-1) with 0 = invisible and 1 = 100% opaque. Default: 0.5
155
- def watermark_blending(value)
156
- @options[:watermark_blending] = value.to_f
157
- end
158
-
159
127
  # @param [Boolean] value If set to true automatically selects optimal format for web viewing. Default: false
160
128
  def web_stream(value)
161
- return unless value
162
-
163
- @options[:video_codec] = 'h264'
164
- @options[:audio_codec] = 'acc'
129
+ if value
130
+ @options[:video_codec] = 'h264'
131
+ @options[:audio_codec] = 'acc'
132
+ end
165
133
  end
166
134
 
167
135
  # @param [String] name name of a preset. See FFMpeg documentation for more info
@@ -202,7 +170,7 @@ module Libis
202
170
 
203
171
  elsif File.directory?(source)
204
172
 
205
- sources = Dir[File.join(source, '**', '*')].reject { |p| File.directory? p }
173
+ sources = Dir[File.join(source, '**', '*')].reject {|p| File.directory? p}
206
174
  assemble_and_convert(sources, target)
207
175
 
208
176
  else
@@ -211,17 +179,15 @@ module Libis
211
179
 
212
180
  end
213
181
 
214
- {
215
- files: [target],
216
- converter: self.class.name
217
- }
182
+ target
183
+
218
184
  end
219
185
 
220
186
  def assemble_and_convert(sources, target)
221
- Tempfile.create(%w[list .txt]) do |f|
222
- sources.each { |src| f.puts src }
187
+ Tempfile.create(%w(list .txt)) do |f|
188
+ sources.each {|src| f.puts src}
223
189
  opts[:global] ||= []
224
- opts[:global] += %w[-f concat]
190
+ opts[:global] += %w(-f concat)
225
191
  f.close
226
192
  target = convert_file(f.to_path, target)
227
193
  end
@@ -232,15 +198,15 @@ module Libis
232
198
 
233
199
  def convert_file(source, target)
234
200
  # FLV special: only supports aac and speex audio codecs
235
- format = (@options[:format] || File.extname(target)[1..]).to_s.downcase
236
- @options[:audio_codec] ||= 'aac' if %w[flv].include?(format)
201
+ format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
202
+ @options[:audio_codec] ||= 'aac' if %w'flv'.include?(format)
237
203
 
238
204
  # SWF special: only supports mp3 audio codec
239
- format = (@options[:format] || File.extname(target)[1..]).to_s.downcase
240
- @options[:audio_codec] ||= 'mp3' if %w[swf].include?(format)
205
+ format = (@options[:format] || File.extname(target)[1..-1]).to_s.downcase
206
+ @options[:audio_codec] ||= 'mp3' if %w'swf'.include?(format)
241
207
 
242
208
  # Set up FFMpeg command line parameters
243
- opts = { global: [], input: [], filter: [], output: [] }
209
+ opts = {global: [], input: [], filter: [], output: []}
244
210
  opts[:global] << '-hide_banner'
245
211
  opts[:global] << '-loglevel' << (@options[:quiet] ? 'fatal' : 'warning')
246
212
 
@@ -248,31 +214,29 @@ module Libis
248
214
  @options[:watermark_opacity] ||= 0.5
249
215
  if @options[:watermark_image]
250
216
  opts[:filter] << '-i' << @options[:watermark_image] << '-filter_complex'
251
- opts[:filter] << Kernel.format('[1:v]format=argb,colorchannelmixer=aa=%f[wm];[0:v][wm]overlay=%s',
252
- @options[:watermark_opacity], watermark_position_filter)
217
+ opts[:filter] << "[1:v]format=argb,colorchannelmixer=aa=%f[wm];[0:v][wm]overlay=%s" %
218
+ [@options[:watermark_opacity], watermark_position_text]
253
219
  elsif @options[:watermark_text]
254
- wm_text = @options[:watermark_text]
255
- wm_text += File.basename(source, '.*') if @options[:watermark_text_add_filename]
256
- @options[:watermark_text_size] ||= '10'
220
+ @options[:watermark_text_size] ||= 10
257
221
  @options[:watermark_text_color] ||= 'white'
258
222
  @options[:watermark_text_shadow_color] ||= 'black'
259
223
  @options[:watermark_text_shadow_offset] ||= 1
260
- filter_text = Kernel.format("drawtext=text='%s':%s:fontfile=%s:fontsize=%s:fontcolor=%s@%f",
261
- wm_text, watermark_position_filter(true), Config[:watermark_font],
262
- @options[:watermark_text_size], @options[:watermark_text_color], @options[:watermark_opacity])
263
- if !(@options[:watermark_text_shadow_offset] == 0)
264
- filter_text += Kernel.format(':shadowcolor=%s@%f:shadowx=%d:shadowy=%d',
265
- @options[:watermark_text_shadow_color], @options[:watermark_opacity],
266
- @options[:watermark_text_shadow_offset], @options[:watermark_text_shadow_offset])
267
- end
268
- @options[:watermark_text_box] ||= 0
269
- if (@options[:watermark_text_box]).positive?
270
- filter_text += Kernel.format(':box=1:boxcolor=%s:boxborderw=%s',
271
- @options[:watermark_text_box_color], @options[:watermark_text_box_width])
272
- end
273
- if (@options[:watermark_blending])
274
- filter_text += Kernel.format(':alpha=%f', @options[:watermark_blending])
275
- end
224
+ filter_text = "drawtext=text='%s':%s:fontfile=%s:fontsize=%d:fontcolor=%s@%f" %
225
+ [
226
+ @options[:watermark_text],
227
+ watermark_position_text(true),
228
+ Config[:watermark_font],
229
+ @options[:watermark_text_size],
230
+ @options[:watermark_text_color],
231
+ @options[:watermark_opacity]
232
+ ]
233
+ filter_text += ':shadowcolor=%s@%f:shadowx=%d:shadowy=%d' %
234
+ [
235
+ @options[:watermark_text_shadow_color],
236
+ @options[:watermark_opacity],
237
+ @options[:watermark_text_shadow_offset],
238
+ @options[:watermark_text_shadow_offset]
239
+ ] if @options[:watermark_text_shadow_offset] > 0
276
240
  opts[:filter] << '-vf' << filter_text
277
241
  end
278
242
  opts[:output] << '-ac' << @options[:audio_channels] if @options[:audio_channels]
@@ -280,11 +244,11 @@ module Libis
280
244
  opts[:output] << '-c:v' << @options[:video_codec] if @options[:video_codec]
281
245
  opts[:output] << '-b:a' << @options[:audio_bitrate] if @options[:audio_bitrate]
282
246
  opts[:output] << '-b:v' << @options[:video_bitrate] if @options[:video_bitrate]
283
- opts[:output] << '-crf' << @options[:constant_rate_factor] if @options[:constant_rate_factor]
247
+ opts[:output] << '-crf' << @options[:crf] if @options[:crf]
284
248
  opts[:output] << '-map_metadata:g' << '0:g' # Copy global metadata
285
249
  opts[:output] << '-map_metadata:s:a' << '0:s:a' # Copy audio metadata
286
250
  opts[:output] << '-map_metadata:s:v' << '0:s:v' # Copy video metadata
287
- opts[:input] << '-accurate_seek' << (@options[:start].to_i.negative? ? '-sseof' : '-ss') << @options[:start] if @options[:start]
251
+ opts[:input] << '-accurate_seek' << (@options[:start].to_i < 0 ? '-sseof' : '-ss') << @options[:start] if @options[:start]
288
252
  opts[:input] << '-t' << @options[:duration] if @options[:duration]
289
253
  opts[:output] << '-qscale' << @options[:video_quality] if @options[:video_quality]
290
254
  opts[:output] << '-q:a' << @options[:audio_quality] if @options[:audio_quality]
@@ -306,25 +270,25 @@ module Libis
306
270
  target
307
271
  end
308
272
 
309
- def watermark_position_filter(for_text = false)
310
- margin_x = @options[:watermark_offset_x] || 10
311
- margin_y = @options[:watermark_offset_y] || 10
273
+ def watermark_position_text(for_text = false, margin = 10)
312
274
  w = for_text ? 'tw' : 'w'
313
275
  h = for_text ? 'th' : 'h'
314
276
  case @options[:watermark_position]
315
- when 'bottom_left'
316
- "x=#{margin_x}:y=H-#{h}-#{margin_y}"
317
- when 'top_left'
318
- "x=#{margin_x}:y=#{margin_y}"
319
- when 'bottom_right'
320
- "x=W-#{w}-#{margin_x}:y=H-#{h}-#{margin_y}"
321
- when 'top_right'
322
- "x=W-#{w}-#{margin_x}:y=#{margin_y}"
323
- else
324
- "x=#{margin_x}:y=H-#{h}-#{margin_y}"
277
+ when 'bottom_left'
278
+ "x=#{margin}:y=H-#{h}-#{margin}"
279
+ when 'top_left'
280
+ "x=#{margin}:y=#{margin}"
281
+ when 'bottom_right'
282
+ "x=W-#{w}-#{margin}:y=H-#{h}-#{margin}"
283
+ when 'top_right'
284
+ "x=W-#{w}-#{margin}:y=#{margin}"
285
+ else
286
+ "x=#{margin}:y=H-#{h}-#{margin}"
325
287
  end
326
288
  end
289
+
327
290
  end
291
+
328
292
  end
329
293
  end
330
- end
294
+ end
@@ -1,19 +1,18 @@
1
- # frozen_string_literal: true
2
-
3
1
  require_relative 'base'
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  module Converter
6
+
8
7
  class XsltConverter < Libis::Format::Converter::Base
8
+
9
9
  def self.input_types
10
10
  [:XML]
11
11
  end
12
12
 
13
13
  def self.output_types(format = nil)
14
- return [] if format && !input_types.include?(format)
15
-
16
- %i[XML HTML TXT]
14
+ return [] unless input_types.include?(format) if format
15
+ [:XML, :HTML, :TXT]
17
16
  end
18
17
 
19
18
  def xsl_file(file_path)
@@ -35,12 +34,12 @@ module Libis
35
34
 
36
35
  FileUtils.mkpath(File.dirname(target))
37
36
 
38
- if RUBY_PLATFORM == 'java'
37
+ if RUBY_PLATFORM == "java"
39
38
  require 'saxon-xslt'
40
39
  xsl = Saxon.XSLT(File.open(@options[:xsl_file]))
41
40
  xml = Saxon.XML(File.open(source))
42
41
  result = xsl.transform(xml)
43
- File.write(target, result.to_s)
42
+ File.open(target, 'w') {|f| f.write(result.to_s)}
44
43
  else
45
44
  require 'nokogiri'
46
45
 
@@ -87,17 +86,16 @@ module Libis
87
86
  error "Error transforming '#{source}' with '#{file}': #{e.message} @ #{e.backtrace[0]}"
88
87
  return nil
89
88
  ensure
90
- fp.close unless fp.nil? || fp.closed?
89
+ fp.close unless fp.nil? or fp.closed?
91
90
  end
92
91
 
93
- {
94
- command: { status: 0 },
95
- files: [target]
96
- }
97
-
92
+ target
98
93
  end
94
+
99
95
  end
96
+
100
97
  end
98
+
101
99
  end
102
100
  end
103
101
  end
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: true
1
+ # code utf-8
2
2
 
3
3
  module Libis
4
4
  module Format
@@ -1,4 +1,4 @@
1
- # frozen_string_literal: true
1
+ # encoding: utf-8
2
2
 
3
3
  require 'singleton'
4
4
  require 'pathname'
@@ -7,8 +7,9 @@ require 'libis-tools'
7
7
  require 'libis/tools/extend/hash'
8
8
  require 'libis/tools/extend/string'
9
9
  require 'libis/tools/extend/empty'
10
+ require 'nori/core_ext/object'
10
11
 
11
- require 'libis/format/type_database'
12
+ require 'libis/format/library'
12
13
 
13
14
  require_relative 'config'
14
15
  require_relative 'tool/fido'
@@ -18,10 +19,13 @@ require_relative 'tool/extension_identification'
18
19
 
19
20
  module Libis
20
21
  module Format
22
+
21
23
  class Identifier
22
24
  include ::Libis::Tools::Logger
23
25
  include Singleton
24
26
 
27
+ public
28
+
25
29
  def self.add_xml_validation(mimetype, xsd_file)
26
30
  instance.xml_validations[mimetype] = xsd_file
27
31
  end
@@ -37,28 +41,29 @@ module Libis
37
41
  attr_reader :xml_validations
38
42
 
39
43
  def get(file, options = {})
40
- options[:droid] = true unless options.keys.include?(:droid) || (options[:tool] && (options[:tool] != :droid))
41
- options[:fido] = true unless options.keys.include?(:fido) || (options[:tool] && (options[:tool] != :fido))
42
- options[:file] = true unless options.keys.include?(:file) || (options[:tool] && (options[:tool] != :file))
44
+
45
+ options[:droid] = true unless options.keys.include?(:droid) or (options[:tool] and options[:tool] != :droid)
46
+ options[:fido] = true unless options.keys.include?(:fido) or (options[:tool] and options[:tool] != :fido)
47
+ options[:file] = true unless options.keys.include?(:file) or (options[:tool] and options[:tool] != :file)
43
48
  options[:xml_validation] = true if options[:xml_validation].nil?
44
49
 
45
- result = { messages: [], output: {}, formats: {} }
50
+ result = {messages: [], output: {}, formats: {}}
46
51
 
47
52
  begin
48
53
  get_droid_identification(file, result, options) if options[:droid]
49
- rescue StandardError => e
54
+ rescue => e
50
55
  log_msg(result, :error, "Error running Droid: #{e.message} @ #{e.backtrace.first}")
51
56
  end
52
57
 
53
58
  begin
54
59
  get_fido_identification(file, result, options) if options[:fido]
55
- rescue StandardError => e
60
+ rescue => e
56
61
  log_msg(result, :error, "Error running Fido: #{e.message} @ #{e.backtrace.first}")
57
62
  end
58
63
 
59
64
  begin
60
65
  get_file_identification(file, result, options) if options[:file]
61
- rescue StandardError => e
66
+ rescue => e
62
67
  log_msg(result, :error, "Error running File: #{e.message} @ #{e.backtrace.first}")
63
68
  end
64
69
 
@@ -69,13 +74,14 @@ module Libis
69
74
  # Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
70
75
  begin
71
76
  validate_against_xml_schema(result, options[:base_dir]) if options[:xml_validation]
72
- rescue StandardError => e
77
+ rescue => e
73
78
  log_msg(result, :error, "Error validating XML files: #{e.message} @ #{e.backtrace.first}")
74
79
  end
75
80
 
76
81
  process_results(result, !options[:keep_output])
77
82
 
78
83
  result
84
+
79
85
  end
80
86
 
81
87
  protected
@@ -91,7 +97,7 @@ module Libis
91
97
  end
92
98
 
93
99
  def get_fido_identification(file, result, options)
94
- output = ::Libis::Format::Tool::Fido.run(file, options[:recursive], **(options[:fido_options] || {}))
100
+ output = ::Libis::Format::Tool::Fido.run(file, options[:recursive], options[:fido_options])
95
101
  process_tool_output(output, result, options[:base_dir])
96
102
  output
97
103
  end
@@ -117,21 +123,18 @@ module Libis
117
123
  end
118
124
 
119
125
  def xml_validate(file, file_result, result, base_dir)
120
- return unless file_result[:mimetype] =~ %r{^(text|application)/xml$}
121
-
126
+ return unless file_result[:mimetype] =~ /^(text|application)\/xml$/
122
127
  filepath = base_dir ? File.join(base_dir, file) : file
123
128
  doc = ::Libis::Tools::XmlDocument.open filepath
124
129
  xml_validations.each do |mime, xsd_file|
125
130
  next unless xsd_file
126
-
127
131
  begin
128
132
  if doc.validates_against?(xsd_file)
129
133
  log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
130
- info = { mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation',
131
- format_version: '' }
132
- file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
134
+ info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
135
+ file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
133
136
  end
134
- rescue StandardError => e
137
+ rescue => e
135
138
  # Do nothing - probably Nokogiri chrashed during validation. Could have many causes
136
139
  # (remote schema (firewall, network, link rot, ...), schema syntax error, corrupt XML,...)
137
140
  # so we log and continue.
@@ -139,7 +142,7 @@ module Libis
139
142
  "Error during XML validation of file #{file} against #{File.basename(xsd_file)}: #{e.message}")
140
143
  end
141
144
  end
142
- rescue StandardError => e
145
+ rescue => e
143
146
  # Not much we can do. probably Nokogiri chrashed opening the XML file. What caused this?
144
147
  # (XML not parsable, false XML identification, ???)
145
148
  # so we log and continue.
@@ -147,30 +150,30 @@ module Libis
147
150
  end
148
151
 
149
152
  def process_results(result, delete_output = true)
150
- result[:output].each_key do |file|
153
+ result[:output].keys.each do |file|
151
154
  output = result[:output][file]
152
155
  file_result = result[:formats][file] = {}
153
156
  if output.empty?
154
157
  log_msg(result, :warn, "Could not identify format of '#{file}'.")
155
- file_result.merge!(
156
- mimetype: 'application/octet-stream',
157
- puid: 'fmt/unknown',
158
- score: 0,
159
- tool: nil
160
- )
158
+ file_result = {
159
+ mimetype: 'application/octet-stream',
160
+ puid: 'fmt/unknown',
161
+ score: 0,
162
+ tool: nil
163
+ }
161
164
  else
162
- format_matches = output.group_by { |x| [x[:mimetype], x[:puid]] }
165
+ format_matches = output.group_by {|x| [x[:mimetype], x[:puid]]}
163
166
  format_matches.each do |match, group|
164
- format_matches[match] = group.group_by { |x| x[:score] }.sort.reverse.to_h
167
+ format_matches[match] = group.group_by {|x| x[:score]}.sort.reverse.to_h
165
168
  end
166
169
  case format_matches.count
167
- when 0
168
- # No this really cannot happen. If there are no hits, there will be at least a format [nil,nil]
169
- when 1
170
- # only one match, that's easy. The first of the highest score will be used
171
- file_result.merge!(get_best_result(output))
172
- else
173
- process_multiple_formats(file_result, format_matches, output)
170
+ when 0
171
+ # No this really cannot happen. If there are no hits, there will be at least a format [nil,nil]
172
+ when 1
173
+ # only one match, that's easy. The first of the highest score will be used
174
+ file_result.merge!(get_best_result(output))
175
+ else
176
+ process_multiple_formats(file_result, format_matches, output)
174
177
  end
175
178
  end
176
179
  end
@@ -181,13 +184,12 @@ module Libis
181
184
  # multiple matches. Let's select the highest score matches
182
185
  file_result.merge!(get_best_result(output))
183
186
  file_result[:alternatives] = []
184
- format_matches.each_key do |mime, puid|
187
+ format_matches.keys.each do |mime, puid|
185
188
  next if file_result[:mimetype] == mime && puid.nil?
186
-
187
- selection = output.select { |x| x[:mimetype] == mime && x[:puid] == puid }
189
+ selection = output.select {|x| x[:mimetype] == mime && x[:puid] == puid}
188
190
  file_result[:alternatives] << get_best_result(selection)
189
191
  end
190
- file_result[:alternatives] = file_result[:alternatives].sort_by { |x| x[:score] }.reverse
192
+ file_result[:alternatives] = file_result[:alternatives].sort_by {|x| x[:score]}.reverse
191
193
  file_result.delete(:alternatives) if file_result[:alternatives].size <= 1
192
194
  end
193
195
 
@@ -206,15 +208,15 @@ module Libis
206
208
  end
207
209
 
208
210
  def get_mimetype(puid)
209
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
210
- rescue StandardError
211
- nil
211
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
212
212
  end
213
213
 
214
214
  def get_best_result(results)
215
- score = results.map { |x| x[:score] }.max
216
- results.select { |x| x[:score] == score }.reduce(:apply_defaults)
215
+ score = results.map {|x| x[:score]}.max
216
+ results.select {|x| x[:score] == score}.reduce(:apply_defaults)
217
217
  end
218
+
218
219
  end
220
+
219
221
  end
220
222
  end
@@ -0,0 +1,27 @@
1
+ module Libis
2
+ module Format
3
+ class Info
4
+ attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
5
+
6
+ def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
7
+ @name = name
8
+ @category = category
9
+ @description = description
10
+ @puids = puids
11
+ @mimetypes = mimetypes
12
+ @extensions = extensions
13
+ end
14
+
15
+ def to_hash
16
+ {
17
+ name: name,
18
+ description: description.dup,
19
+ category: category,
20
+ puids: puids.dup,
21
+ mimetypes: mimetypes.dup,
22
+ extensions: extensions.dup
23
+ }
24
+ end
25
+ end
26
+ end
27
+ end