libis-format 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +15 -0
- data/.travis.yml +36 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/data/ISOcoated.icc +0 -0
- data/data/PDFA_def.ps +32 -0
- data/data/ead.xsd +2728 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +213 -0
- data/lib/libis/format/converter/base.rb +103 -0
- data/lib/libis/format/converter/chain.rb +80 -0
- data/lib/libis/format/converter/repository.rb +110 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +38 -0
- data/lib/libis/format/fido.rb +109 -0
- data/lib/libis/format/identifier.rb +185 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +12 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +30 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/identifier_spec.rb +59 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/argparselocal.pyc +0 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- metadata +342 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require 'os'
|
|
2
|
+
require 'csv'
|
|
3
|
+
require 'singleton'
|
|
4
|
+
|
|
5
|
+
require 'libis/tools/extend/string'
|
|
6
|
+
require 'libis/tools/logger'
|
|
7
|
+
require 'libis/tools/command'
|
|
8
|
+
|
|
9
|
+
require 'libis/format/type_database'
|
|
10
|
+
|
|
11
|
+
module Libis
|
|
12
|
+
module Format
|
|
13
|
+
|
|
14
|
+
class Fido
|
|
15
|
+
include ::Libis::Tools::Logger
|
|
16
|
+
include Singleton
|
|
17
|
+
|
|
18
|
+
BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
|
|
19
|
+
|
|
20
|
+
def self.run(file, formats = nil)
|
|
21
|
+
instance.run file, formats
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def run(file, xtra_formats = nil)
|
|
25
|
+
|
|
26
|
+
fido_results = []
|
|
27
|
+
|
|
28
|
+
fmt_list = formats.dup
|
|
29
|
+
case xtra_formats
|
|
30
|
+
when Array
|
|
31
|
+
fmt_list += xtra_formats
|
|
32
|
+
when String
|
|
33
|
+
fmt_list << xtra_formats
|
|
34
|
+
else
|
|
35
|
+
# do nothing
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
bin_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido'))
|
|
39
|
+
cmd = File.join(bin_dir, OS.windows? ? 'fido.bat' : 'fido.sh')
|
|
40
|
+
args = []
|
|
41
|
+
args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
|
|
42
|
+
args << "#{file.escape_for_string}"
|
|
43
|
+
fido = ::Libis::Tools::Command.run(cmd, *args)
|
|
44
|
+
warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
|
|
45
|
+
|
|
46
|
+
keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
|
|
47
|
+
fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
|
|
48
|
+
debug "Fido output: #{fido_output}"
|
|
49
|
+
|
|
50
|
+
fido_output.each do |x|
|
|
51
|
+
if x[:status] == 'OK'
|
|
52
|
+
x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
|
|
53
|
+
next if BAD_MIMETYPES.include? x[:mimetype]
|
|
54
|
+
x[:score] = 5
|
|
55
|
+
case x[:matchtype]
|
|
56
|
+
when 'signature'
|
|
57
|
+
x[:score] += 5
|
|
58
|
+
when 'container'
|
|
59
|
+
typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
|
|
60
|
+
ext = File.extname(file)
|
|
61
|
+
x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
|
62
|
+
else
|
|
63
|
+
# do nothing
|
|
64
|
+
end
|
|
65
|
+
fido_results << x
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
fido_results = fido_results.inject({}) do |result, value|
|
|
70
|
+
result[value[:score]] ||= []
|
|
71
|
+
result[value[:score]] << value
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
debug "Fido results: #{fido_results}"
|
|
76
|
+
|
|
77
|
+
max_score = fido_results.keys.max
|
|
78
|
+
|
|
79
|
+
# Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result
|
|
80
|
+
return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
|
|
81
|
+
|
|
82
|
+
fido_results[max_score].first
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.add_format(f)
|
|
86
|
+
instance.formats << f
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def self.formats
|
|
90
|
+
instance.formats
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
protected
|
|
94
|
+
|
|
95
|
+
attr_reader :formats
|
|
96
|
+
|
|
97
|
+
def initialize
|
|
98
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
99
|
+
@formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def get_mimetype(puid)
|
|
103
|
+
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
|
|
5
|
+
require 'libis-tools'
|
|
6
|
+
require 'libis/tools/extend/string'
|
|
7
|
+
require 'libis/tools/extend/empty'
|
|
8
|
+
|
|
9
|
+
require 'libis/format/type_database'
|
|
10
|
+
|
|
11
|
+
require_relative 'fido'
|
|
12
|
+
require_relative 'droid'
|
|
13
|
+
|
|
14
|
+
module Libis
|
|
15
|
+
module Format
|
|
16
|
+
|
|
17
|
+
class Identifier
|
|
18
|
+
include ::Libis::Tools::Logger
|
|
19
|
+
include Singleton
|
|
20
|
+
|
|
21
|
+
RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
|
|
22
|
+
FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
|
|
23
|
+
|
|
24
|
+
attr_reader :xml_validations
|
|
25
|
+
|
|
26
|
+
protected
|
|
27
|
+
|
|
28
|
+
def initialize
|
|
29
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
30
|
+
@fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
|
31
|
+
# noinspection RubyStringKeysInHashInspection
|
|
32
|
+
@xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def result_ok?(result, who_is_asking = nil)
|
|
36
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
|
37
|
+
return false if result.empty?
|
|
38
|
+
return true unless result[:TYPE].empty?
|
|
39
|
+
return false if RETRY_MIMETYPES.include? result[:mimetype]
|
|
40
|
+
return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
|
|
41
|
+
!(result[:mimetype].empty? and result[:puid].empty?)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def get_puid(mimetype)
|
|
45
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
public
|
|
49
|
+
|
|
50
|
+
def self.add_fido_format(f)
|
|
51
|
+
::Libis::Format::Fido.add_format f
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.add_xml_validation(mimetype, xsd_file)
|
|
55
|
+
instance.xml_validations[mimetype] = xsd_file
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def self.xml_validations
|
|
59
|
+
instance.xml_validations
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.get(file_path, options = nil)
|
|
63
|
+
instance.get file_path, options
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def get(file, options = nil)
|
|
67
|
+
|
|
68
|
+
unless File.exists? file
|
|
69
|
+
error 'File %s cannot be found.', file
|
|
70
|
+
return nil
|
|
71
|
+
end
|
|
72
|
+
if File.directory? file
|
|
73
|
+
error '%s is a directory.', file
|
|
74
|
+
return nil
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
options ||= {}
|
|
78
|
+
|
|
79
|
+
result = {}
|
|
80
|
+
|
|
81
|
+
# use FIDO
|
|
82
|
+
# Note: FIDO does not always do a good job, mainly due to lacking container inspection.
|
|
83
|
+
# FIDO misses should be registered in
|
|
84
|
+
result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
|
|
85
|
+
|
|
86
|
+
# use DROID
|
|
87
|
+
result = get_droid_identification file, result
|
|
88
|
+
|
|
89
|
+
# use FILE
|
|
90
|
+
result = get_file_identification(file, result)
|
|
91
|
+
|
|
92
|
+
# Try file extension
|
|
93
|
+
result = get_extension_identification(file, result)
|
|
94
|
+
|
|
95
|
+
# determine XML type. Add custom types at runtime with
|
|
96
|
+
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
|
97
|
+
result = validate_against_xml_schema(file, result)
|
|
98
|
+
|
|
99
|
+
result ? info("Identification of '#{file}': '#{result}'") : warn("Could not identify MIME type of '#{file}'")
|
|
100
|
+
|
|
101
|
+
result
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def get_fido_identification(file, result = {}, xtra_formats = nil)
|
|
105
|
+
return result if result_ok? result
|
|
106
|
+
|
|
107
|
+
fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
|
|
108
|
+
|
|
109
|
+
return result unless fido_result.is_a? Hash
|
|
110
|
+
|
|
111
|
+
result.merge! fido_result
|
|
112
|
+
result[:method] = 'fido'
|
|
113
|
+
|
|
114
|
+
debug "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" unless result.empty?
|
|
115
|
+
result
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def get_droid_identification(file, result = {})
|
|
119
|
+
return result if result_ok? result, :DROID
|
|
120
|
+
droid_output = ::Libis::Format::Droid.run file
|
|
121
|
+
debug "DROID: #{droid_output}"
|
|
122
|
+
warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
|
|
123
|
+
result.clear
|
|
124
|
+
droid_output = droid_output.first
|
|
125
|
+
result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
|
|
126
|
+
result[:matchtype] = droid_output[:method]
|
|
127
|
+
result[:puid] = droid_output[:puid]
|
|
128
|
+
result[:format_name] = droid_output[:format_name]
|
|
129
|
+
result[:format_version] = droid_output[:format_version]
|
|
130
|
+
result[:method] = 'droid'
|
|
131
|
+
|
|
132
|
+
debug "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" if result
|
|
133
|
+
result
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def get_file_identification(file, result = nil)
|
|
137
|
+
return result if result_ok? result
|
|
138
|
+
result = {}
|
|
139
|
+
begin
|
|
140
|
+
output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
|
|
141
|
+
mimetype = output.strip.split
|
|
142
|
+
if mimetype
|
|
143
|
+
debug "File result: '#{mimetype}'"
|
|
144
|
+
result[:mimetype] = mimetype
|
|
145
|
+
result[:puid] = get_puid(mimetype)
|
|
146
|
+
end
|
|
147
|
+
result[:method] = 'file'
|
|
148
|
+
rescue Exception
|
|
149
|
+
# ignored
|
|
150
|
+
end
|
|
151
|
+
result
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def get_extension_identification(file, result = nil)
|
|
155
|
+
return result if result_ok? result
|
|
156
|
+
result = {}
|
|
157
|
+
info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
|
|
158
|
+
debug "File extension info: #{info}"
|
|
159
|
+
if info
|
|
160
|
+
result[:mimetype] = info[:MIME].first rescue nil
|
|
161
|
+
result[:puid] = info[:PUID].first rescue nil
|
|
162
|
+
end
|
|
163
|
+
result[:method] = 'extension'
|
|
164
|
+
result
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def validate_against_xml_schema(file, result)
|
|
168
|
+
return result unless result[:mimetype] =~ /^(text|application)\/xml$/
|
|
169
|
+
doc = ::Libis::Tools::XmlDocument.open file
|
|
170
|
+
xml_validations.each do |mime, xsd_file|
|
|
171
|
+
next unless xsd_file
|
|
172
|
+
if doc.validates_against?(xsd_file)
|
|
173
|
+
debug "XML file validated against XML Schema: #{xsd_file}"
|
|
174
|
+
result[:mimetype] = mime
|
|
175
|
+
result[:puid] = nil
|
|
176
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
result
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
end
|
|
185
|
+
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
require 'yaml'
|
|
5
|
+
|
|
6
|
+
require 'backports/rails/hash'
|
|
7
|
+
require 'libis/tools/logger'
|
|
8
|
+
require 'libis/tools/extend/string'
|
|
9
|
+
|
|
10
|
+
module Libis
|
|
11
|
+
module Format
|
|
12
|
+
|
|
13
|
+
class TypeDatabase
|
|
14
|
+
include Singleton
|
|
15
|
+
include ::Libis::Tools::Logger
|
|
16
|
+
|
|
17
|
+
def self.typeinfo(t)
|
|
18
|
+
self.instance.types[t] || {}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.enrich(info, map_keys = {})
|
|
22
|
+
return {} unless info.is_a? Hash
|
|
23
|
+
mapper = Hash.new {|hash,key| hash[key] = key}
|
|
24
|
+
mapper.merge! map_keys
|
|
25
|
+
unless (puid = info[mapper[:PUID]]).blank?
|
|
26
|
+
info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
|
|
27
|
+
end
|
|
28
|
+
unless (mime = info[mapper[:MIME]]).blank?
|
|
29
|
+
info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
|
|
30
|
+
end
|
|
31
|
+
unless (type_name = info[mapper[:TYPE]]).nil?
|
|
32
|
+
info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
|
|
33
|
+
info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
|
|
34
|
+
info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
|
|
35
|
+
info[mapper[:GROUP]] = self.type_group(type_name)
|
|
36
|
+
end
|
|
37
|
+
info
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.type_group(t)
|
|
41
|
+
typeinfo(t)[:GROUP]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.type_mimetypes(t)
|
|
45
|
+
typeinfo(t)[:MIME] || []
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def self.type_puids(t)
|
|
49
|
+
typeinfo(t)[:PUID] || []
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.type_extentions(t)
|
|
53
|
+
typeinfo(t)[:EXTENSIONS] || []
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.group_types(group)
|
|
57
|
+
self.instance.types.select do |_, v|
|
|
58
|
+
v[:GROUP] == group.to_sym
|
|
59
|
+
end.keys
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.puid_infos(puid)
|
|
63
|
+
self.instance.types.select do |_, v|
|
|
64
|
+
v[:PUID].include? puid rescue false
|
|
65
|
+
end.values
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.puid_types(puid)
|
|
69
|
+
self.instance.types.select do |_, v|
|
|
70
|
+
v[:PUID].include? puid rescue false
|
|
71
|
+
end.keys
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.puid_groups(puid)
|
|
75
|
+
puid_types(puid).map do |t|
|
|
76
|
+
type_group t
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.mime_infos(mime)
|
|
81
|
+
self.instance.types.select do |_, v|
|
|
82
|
+
v[:MIME].include? mime rescue false
|
|
83
|
+
end.values
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def self.mime_types(mime)
|
|
87
|
+
self.instance.types.select do |_, v|
|
|
88
|
+
v[:MIME].include? mime rescue false
|
|
89
|
+
end.keys
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.mime_groups(mime)
|
|
93
|
+
mime_types(mime).map do |t|
|
|
94
|
+
type_group t
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def self.ext_infos(ext)
|
|
99
|
+
ext = ext.gsub /^\./, ''
|
|
100
|
+
self.instance.types.select do |_, v|
|
|
101
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
|
102
|
+
end.values
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def self.ext_types(ext)
|
|
106
|
+
ext = ext.gsub /^\./, ''
|
|
107
|
+
self.instance.types.select do |_, v|
|
|
108
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
|
109
|
+
end.keys
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def self.puid_typeinfo(puid)
|
|
113
|
+
self.instance.types.each do |_, v|
|
|
114
|
+
return v if v[:PUID] and v[:PUID].include?(puid)
|
|
115
|
+
end
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def self.known_mime?(mime)
|
|
120
|
+
self.instance.types.each do |_, v|
|
|
121
|
+
return true if v[:MIME].include? mime
|
|
122
|
+
end
|
|
123
|
+
false
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
attr_reader :types
|
|
127
|
+
|
|
128
|
+
def load_types(file_or_hash = {}, append = true)
|
|
129
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
|
130
|
+
# noinspection RubyResolve
|
|
131
|
+
hash.each do |group, type_info|
|
|
132
|
+
type_info.each do |type_name, info|
|
|
133
|
+
type_key = type_name.to_sym
|
|
134
|
+
info.symbolize_keys!
|
|
135
|
+
info[:TYPE] = type_key
|
|
136
|
+
info[:GROUP] = group.to_sym
|
|
137
|
+
info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
|
138
|
+
info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
|
139
|
+
info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
|
|
140
|
+
if @types.has_key?(type_key)
|
|
141
|
+
warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
|
|
142
|
+
info.merge!(@types[type_key]) do |_,v_new,v_old|
|
|
143
|
+
case v_old
|
|
144
|
+
when Array
|
|
145
|
+
append ? v_old + v_new : v_new + v_old
|
|
146
|
+
when Hash
|
|
147
|
+
append ? v_new.merge(v_old) : v_old.merge(v_new)
|
|
148
|
+
else
|
|
149
|
+
append ? v_old : v_new
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
@types[type_key] = info
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
protected
|
|
159
|
+
|
|
160
|
+
def initialize
|
|
161
|
+
@types = Hash.new
|
|
162
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
163
|
+
type_database = File.join(data_dir, 'types.yml')
|
|
164
|
+
load_types(type_database)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
end
|
|
170
|
+
end
|
data/lib/libis/format.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'libis/format/version'
|
|
2
|
+
|
|
3
|
+
module Libis
|
|
4
|
+
module Format
|
|
5
|
+
autoload :TypeDatabase, 'libis/format/type_database'
|
|
6
|
+
autoload :Identifier, 'libis/format/identifier'
|
|
7
|
+
autoload :Fido, 'libis/format/fido'
|
|
8
|
+
autoload :Droid, 'libis/format/droid'
|
|
9
|
+
|
|
10
|
+
autoload :Converter, 'libis/format/converter'
|
|
11
|
+
end
|
|
12
|
+
end
|
data/lib/libis-format.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require 'libis/format'
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
|
+
|
|
6
|
+
require 'libis/format/version'
|
|
7
|
+
|
|
8
|
+
Gem::Specification.new do |spec|
|
|
9
|
+
spec.name = 'libis-format'
|
|
10
|
+
spec.version = Libis::Format::VERSION
|
|
11
|
+
spec.authors = ['Kris Dekeyser']
|
|
12
|
+
spec.email = ['kris.dekeyser@libis.be']
|
|
13
|
+
spec.summary = %q{LIBIS File format format services.}
|
|
14
|
+
spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
|
|
15
|
+
spec.homepage = ''
|
|
16
|
+
spec.license = 'MIT'
|
|
17
|
+
|
|
18
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
19
|
+
spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
|
|
20
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
21
|
+
spec.require_paths = ['lib']
|
|
22
|
+
|
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.6'
|
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
|
26
|
+
spec.add_development_dependency 'simplecov', '~> 0.9'
|
|
27
|
+
|
|
28
|
+
spec.add_runtime_dependency 'libis-tools', '~> 0.9'
|
|
29
|
+
spec.add_runtime_dependency 'os', '= 0.9.6'
|
|
30
|
+
end
|
|
Binary file
|
|
Binary file
|