libis-format 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +15 -0
- data/.travis.yml +36 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/data/ISOcoated.icc +0 -0
- data/data/PDFA_def.ps +32 -0
- data/data/ead.xsd +2728 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +213 -0
- data/lib/libis/format/converter/base.rb +103 -0
- data/lib/libis/format/converter/chain.rb +80 -0
- data/lib/libis/format/converter/repository.rb +110 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +38 -0
- data/lib/libis/format/fido.rb +109 -0
- data/lib/libis/format/identifier.rb +185 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +12 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +30 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/identifier_spec.rb +59 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/argparselocal.pyc +0 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- metadata +342 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'os'
|
2
|
+
require 'csv'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
require 'libis/tools/extend/string'
|
6
|
+
require 'libis/tools/logger'
|
7
|
+
require 'libis/tools/command'
|
8
|
+
|
9
|
+
require 'libis/format/type_database'
|
10
|
+
|
11
|
+
module Libis
|
12
|
+
module Format
|
13
|
+
|
14
|
+
class Fido
|
15
|
+
include ::Libis::Tools::Logger
|
16
|
+
include Singleton
|
17
|
+
|
18
|
+
BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
|
19
|
+
|
20
|
+
def self.run(file, formats = nil)
|
21
|
+
instance.run file, formats
|
22
|
+
end
|
23
|
+
|
24
|
+
def run(file, xtra_formats = nil)
|
25
|
+
|
26
|
+
fido_results = []
|
27
|
+
|
28
|
+
fmt_list = formats.dup
|
29
|
+
case xtra_formats
|
30
|
+
when Array
|
31
|
+
fmt_list += xtra_formats
|
32
|
+
when String
|
33
|
+
fmt_list << xtra_formats
|
34
|
+
else
|
35
|
+
# do nothing
|
36
|
+
end
|
37
|
+
|
38
|
+
bin_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido'))
|
39
|
+
cmd = File.join(bin_dir, OS.windows? ? 'fido.bat' : 'fido.sh')
|
40
|
+
args = []
|
41
|
+
args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
|
42
|
+
args << "#{file.escape_for_string}"
|
43
|
+
fido = ::Libis::Tools::Command.run(cmd, *args)
|
44
|
+
warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
|
45
|
+
|
46
|
+
keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
|
47
|
+
fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
|
48
|
+
debug "Fido output: #{fido_output}"
|
49
|
+
|
50
|
+
fido_output.each do |x|
|
51
|
+
if x[:status] == 'OK'
|
52
|
+
x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
|
53
|
+
next if BAD_MIMETYPES.include? x[:mimetype]
|
54
|
+
x[:score] = 5
|
55
|
+
case x[:matchtype]
|
56
|
+
when 'signature'
|
57
|
+
x[:score] += 5
|
58
|
+
when 'container'
|
59
|
+
typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
|
60
|
+
ext = File.extname(file)
|
61
|
+
x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
62
|
+
else
|
63
|
+
# do nothing
|
64
|
+
end
|
65
|
+
fido_results << x
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
fido_results = fido_results.inject({}) do |result, value|
|
70
|
+
result[value[:score]] ||= []
|
71
|
+
result[value[:score]] << value
|
72
|
+
result
|
73
|
+
end
|
74
|
+
|
75
|
+
debug "Fido results: #{fido_results}"
|
76
|
+
|
77
|
+
max_score = fido_results.keys.max
|
78
|
+
|
79
|
+
# Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result
|
80
|
+
return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
|
81
|
+
|
82
|
+
fido_results[max_score].first
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.add_format(f)
|
86
|
+
instance.formats << f
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.formats
|
90
|
+
instance.formats
|
91
|
+
end
|
92
|
+
|
93
|
+
protected
|
94
|
+
|
95
|
+
attr_reader :formats
|
96
|
+
|
97
|
+
def initialize
|
98
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
99
|
+
@formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_mimetype(puid)
|
103
|
+
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
require 'libis-tools'
|
6
|
+
require 'libis/tools/extend/string'
|
7
|
+
require 'libis/tools/extend/empty'
|
8
|
+
|
9
|
+
require 'libis/format/type_database'
|
10
|
+
|
11
|
+
require_relative 'fido'
|
12
|
+
require_relative 'droid'
|
13
|
+
|
14
|
+
module Libis
|
15
|
+
module Format
|
16
|
+
|
17
|
+
class Identifier
|
18
|
+
include ::Libis::Tools::Logger
|
19
|
+
include Singleton
|
20
|
+
|
21
|
+
RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
|
22
|
+
FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
|
23
|
+
|
24
|
+
attr_reader :xml_validations
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
30
|
+
@fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
31
|
+
# noinspection RubyStringKeysInHashInspection
|
32
|
+
@xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
|
33
|
+
end
|
34
|
+
|
35
|
+
def result_ok?(result, who_is_asking = nil)
|
36
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
37
|
+
return false if result.empty?
|
38
|
+
return true unless result[:TYPE].empty?
|
39
|
+
return false if RETRY_MIMETYPES.include? result[:mimetype]
|
40
|
+
return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
|
41
|
+
!(result[:mimetype].empty? and result[:puid].empty?)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_puid(mimetype)
|
45
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
|
46
|
+
end
|
47
|
+
|
48
|
+
public
|
49
|
+
|
50
|
+
def self.add_fido_format(f)
|
51
|
+
::Libis::Format::Fido.add_format f
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.add_xml_validation(mimetype, xsd_file)
|
55
|
+
instance.xml_validations[mimetype] = xsd_file
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.xml_validations
|
59
|
+
instance.xml_validations
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.get(file_path, options = nil)
|
63
|
+
instance.get file_path, options
|
64
|
+
end
|
65
|
+
|
66
|
+
def get(file, options = nil)
|
67
|
+
|
68
|
+
unless File.exists? file
|
69
|
+
error 'File %s cannot be found.', file
|
70
|
+
return nil
|
71
|
+
end
|
72
|
+
if File.directory? file
|
73
|
+
error '%s is a directory.', file
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
|
77
|
+
options ||= {}
|
78
|
+
|
79
|
+
result = {}
|
80
|
+
|
81
|
+
# use FIDO
|
82
|
+
# Note: FIDO does not always do a good job, mainly due to lacking container inspection.
|
83
|
+
# FIDO misses should be registered in
|
84
|
+
result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
|
85
|
+
|
86
|
+
# use DROID
|
87
|
+
result = get_droid_identification file, result
|
88
|
+
|
89
|
+
# use FILE
|
90
|
+
result = get_file_identification(file, result)
|
91
|
+
|
92
|
+
# Try file extension
|
93
|
+
result = get_extension_identification(file, result)
|
94
|
+
|
95
|
+
# determine XML type. Add custom types at runtime with
|
96
|
+
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
97
|
+
result = validate_against_xml_schema(file, result)
|
98
|
+
|
99
|
+
result ? info("Identification of '#{file}': '#{result}'") : warn("Could not identify MIME type of '#{file}'")
|
100
|
+
|
101
|
+
result
|
102
|
+
end
|
103
|
+
|
104
|
+
def get_fido_identification(file, result = {}, xtra_formats = nil)
|
105
|
+
return result if result_ok? result
|
106
|
+
|
107
|
+
fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
|
108
|
+
|
109
|
+
return result unless fido_result.is_a? Hash
|
110
|
+
|
111
|
+
result.merge! fido_result
|
112
|
+
result[:method] = 'fido'
|
113
|
+
|
114
|
+
debug "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" unless result.empty?
|
115
|
+
result
|
116
|
+
end
|
117
|
+
|
118
|
+
def get_droid_identification(file, result = {})
|
119
|
+
return result if result_ok? result, :DROID
|
120
|
+
droid_output = ::Libis::Format::Droid.run file
|
121
|
+
debug "DROID: #{droid_output}"
|
122
|
+
warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
|
123
|
+
result.clear
|
124
|
+
droid_output = droid_output.first
|
125
|
+
result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
|
126
|
+
result[:matchtype] = droid_output[:method]
|
127
|
+
result[:puid] = droid_output[:puid]
|
128
|
+
result[:format_name] = droid_output[:format_name]
|
129
|
+
result[:format_version] = droid_output[:format_version]
|
130
|
+
result[:method] = 'droid'
|
131
|
+
|
132
|
+
debug "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" if result
|
133
|
+
result
|
134
|
+
end
|
135
|
+
|
136
|
+
def get_file_identification(file, result = nil)
|
137
|
+
return result if result_ok? result
|
138
|
+
result = {}
|
139
|
+
begin
|
140
|
+
output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
|
141
|
+
mimetype = output.strip.split
|
142
|
+
if mimetype
|
143
|
+
debug "File result: '#{mimetype}'"
|
144
|
+
result[:mimetype] = mimetype
|
145
|
+
result[:puid] = get_puid(mimetype)
|
146
|
+
end
|
147
|
+
result[:method] = 'file'
|
148
|
+
rescue Exception
|
149
|
+
# ignored
|
150
|
+
end
|
151
|
+
result
|
152
|
+
end
|
153
|
+
|
154
|
+
def get_extension_identification(file, result = nil)
|
155
|
+
return result if result_ok? result
|
156
|
+
result = {}
|
157
|
+
info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
|
158
|
+
debug "File extension info: #{info}"
|
159
|
+
if info
|
160
|
+
result[:mimetype] = info[:MIME].first rescue nil
|
161
|
+
result[:puid] = info[:PUID].first rescue nil
|
162
|
+
end
|
163
|
+
result[:method] = 'extension'
|
164
|
+
result
|
165
|
+
end
|
166
|
+
|
167
|
+
def validate_against_xml_schema(file, result)
|
168
|
+
return result unless result[:mimetype] =~ /^(text|application)\/xml$/
|
169
|
+
doc = ::Libis::Tools::XmlDocument.open file
|
170
|
+
xml_validations.each do |mime, xsd_file|
|
171
|
+
next unless xsd_file
|
172
|
+
if doc.validates_against?(xsd_file)
|
173
|
+
debug "XML file validated against XML Schema: #{xsd_file}"
|
174
|
+
result[:mimetype] = mime
|
175
|
+
result[:puid] = nil
|
176
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
result
|
180
|
+
end
|
181
|
+
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
require 'backports/rails/hash'
|
7
|
+
require 'libis/tools/logger'
|
8
|
+
require 'libis/tools/extend/string'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
|
13
|
+
class TypeDatabase
|
14
|
+
include Singleton
|
15
|
+
include ::Libis::Tools::Logger
|
16
|
+
|
17
|
+
def self.typeinfo(t)
|
18
|
+
self.instance.types[t] || {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.enrich(info, map_keys = {})
|
22
|
+
return {} unless info.is_a? Hash
|
23
|
+
mapper = Hash.new {|hash,key| hash[key] = key}
|
24
|
+
mapper.merge! map_keys
|
25
|
+
unless (puid = info[mapper[:PUID]]).blank?
|
26
|
+
info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
|
27
|
+
end
|
28
|
+
unless (mime = info[mapper[:MIME]]).blank?
|
29
|
+
info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
|
30
|
+
end
|
31
|
+
unless (type_name = info[mapper[:TYPE]]).nil?
|
32
|
+
info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
|
33
|
+
info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
|
34
|
+
info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
|
35
|
+
info[mapper[:GROUP]] = self.type_group(type_name)
|
36
|
+
end
|
37
|
+
info
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.type_group(t)
|
41
|
+
typeinfo(t)[:GROUP]
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.type_mimetypes(t)
|
45
|
+
typeinfo(t)[:MIME] || []
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.type_puids(t)
|
49
|
+
typeinfo(t)[:PUID] || []
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.type_extentions(t)
|
53
|
+
typeinfo(t)[:EXTENSIONS] || []
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.group_types(group)
|
57
|
+
self.instance.types.select do |_, v|
|
58
|
+
v[:GROUP] == group.to_sym
|
59
|
+
end.keys
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.puid_infos(puid)
|
63
|
+
self.instance.types.select do |_, v|
|
64
|
+
v[:PUID].include? puid rescue false
|
65
|
+
end.values
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.puid_types(puid)
|
69
|
+
self.instance.types.select do |_, v|
|
70
|
+
v[:PUID].include? puid rescue false
|
71
|
+
end.keys
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.puid_groups(puid)
|
75
|
+
puid_types(puid).map do |t|
|
76
|
+
type_group t
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.mime_infos(mime)
|
81
|
+
self.instance.types.select do |_, v|
|
82
|
+
v[:MIME].include? mime rescue false
|
83
|
+
end.values
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.mime_types(mime)
|
87
|
+
self.instance.types.select do |_, v|
|
88
|
+
v[:MIME].include? mime rescue false
|
89
|
+
end.keys
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.mime_groups(mime)
|
93
|
+
mime_types(mime).map do |t|
|
94
|
+
type_group t
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.ext_infos(ext)
|
99
|
+
ext = ext.gsub /^\./, ''
|
100
|
+
self.instance.types.select do |_, v|
|
101
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
102
|
+
end.values
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.ext_types(ext)
|
106
|
+
ext = ext.gsub /^\./, ''
|
107
|
+
self.instance.types.select do |_, v|
|
108
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
109
|
+
end.keys
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.puid_typeinfo(puid)
|
113
|
+
self.instance.types.each do |_, v|
|
114
|
+
return v if v[:PUID] and v[:PUID].include?(puid)
|
115
|
+
end
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.known_mime?(mime)
|
120
|
+
self.instance.types.each do |_, v|
|
121
|
+
return true if v[:MIME].include? mime
|
122
|
+
end
|
123
|
+
false
|
124
|
+
end
|
125
|
+
|
126
|
+
attr_reader :types
|
127
|
+
|
128
|
+
def load_types(file_or_hash = {}, append = true)
|
129
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
130
|
+
# noinspection RubyResolve
|
131
|
+
hash.each do |group, type_info|
|
132
|
+
type_info.each do |type_name, info|
|
133
|
+
type_key = type_name.to_sym
|
134
|
+
info.symbolize_keys!
|
135
|
+
info[:TYPE] = type_key
|
136
|
+
info[:GROUP] = group.to_sym
|
137
|
+
info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
138
|
+
info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
139
|
+
info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
|
140
|
+
if @types.has_key?(type_key)
|
141
|
+
warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
|
142
|
+
info.merge!(@types[type_key]) do |_,v_new,v_old|
|
143
|
+
case v_old
|
144
|
+
when Array
|
145
|
+
append ? v_old + v_new : v_new + v_old
|
146
|
+
when Hash
|
147
|
+
append ? v_new.merge(v_old) : v_old.merge(v_new)
|
148
|
+
else
|
149
|
+
append ? v_old : v_new
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
@types[type_key] = info
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
protected
|
159
|
+
|
160
|
+
def initialize
|
161
|
+
@types = Hash.new
|
162
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
163
|
+
type_database = File.join(data_dir, 'types.yml')
|
164
|
+
load_types(type_database)
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
data/lib/libis/format.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'libis/format/version'
|
2
|
+
|
3
|
+
module Libis
|
4
|
+
module Format
|
5
|
+
autoload :TypeDatabase, 'libis/format/type_database'
|
6
|
+
autoload :Identifier, 'libis/format/identifier'
|
7
|
+
autoload :Fido, 'libis/format/fido'
|
8
|
+
autoload :Droid, 'libis/format/droid'
|
9
|
+
|
10
|
+
autoload :Converter, 'libis/format/converter'
|
11
|
+
end
|
12
|
+
end
|
data/lib/libis-format.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'libis/format'
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'libis/format/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |spec|
|
9
|
+
spec.name = 'libis-format'
|
10
|
+
spec.version = Libis::Format::VERSION
|
11
|
+
spec.authors = ['Kris Dekeyser']
|
12
|
+
spec.email = ['kris.dekeyser@libis.be']
|
13
|
+
spec.summary = %q{LIBIS File format format services.}
|
14
|
+
spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
|
15
|
+
spec.homepage = ''
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
spec.files = `git ls-files -z`.split("\x0")
|
19
|
+
spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
|
20
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
21
|
+
spec.require_paths = ['lib']
|
22
|
+
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.6'
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
26
|
+
spec.add_development_dependency 'simplecov', '~> 0.9'
|
27
|
+
|
28
|
+
spec.add_runtime_dependency 'libis-tools', '~> 0.9'
|
29
|
+
spec.add_runtime_dependency 'os', '= 0.9.6'
|
30
|
+
end
|
Binary file
|
Binary file
|