libis-format 0.9.32 → 0.9.33
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data/types.yml +30 -16
- data/lib/libis/format/config.rb +7 -18
- data/lib/libis/format/converter/image_converter.rb +6 -0
- data/lib/libis/format/droid.rb +82 -25
- data/lib/libis/format/extension_identification.rb +55 -0
- data/lib/libis/format/fido.rb +57 -72
- data/lib/libis/format/file_tool.rb +76 -0
- data/lib/libis/format/identification_tool.rb +174 -0
- data/lib/libis/format/identifier.rb +129 -117
- data/lib/libis/format/type_database.rb +36 -5
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +3 -0
- data/libis-format.gemspec +2 -1
- data/spec/converter_spec.rb +6 -4
- data/spec/identifier_spec.rb +125 -34
- metadata +21 -126
- data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
- data/tools/droid/container-signature-20170330.xml +0 -3584
- data/tools/droid/droid-command-line-6.3.jar +0 -0
- data/tools/droid/droid.bat +0 -152
- data/tools/droid/droid.sh +0 -152
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.10.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/droid-container-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-help-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-results-6.3.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.13.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -50
- data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
- data/tools/fido/conf/container-signature-20170330.xml +0 -3584
- data/tools/fido/conf/dc.xsd +0 -119
- data/tools/fido/conf/dcmitype.xsd +0 -53
- data/tools/fido/conf/dcterms.xsd +0 -383
- data/tools/fido/conf/fido-formats.xsd +0 -173
- data/tools/fido/conf/format_extension_template.xml +0 -105
- data/tools/fido/conf/format_extensions.xml +0 -484
- data/tools/fido/conf/formats-v90.xml +0 -48877
- data/tools/fido/conf/pronom-xml-v90.zip +0 -0
- data/tools/fido/conf/versions.xml +0 -8
- data/tools/fido/fido.bat +0 -4
- data/tools/fido/fido.py +0 -884
- data/tools/fido/fido.sh +0 -5
- data/tools/fido/package.py +0 -96
- data/tools/fido/prepare.py +0 -645
- data/tools/fido/pronomutils.py +0 -200
- data/tools/fido/toxml.py +0 -60
- data/tools/fido/update_signatures.py +0 -183
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a13d9df8f5e85ccc58758e46e556066d16935b0
|
4
|
+
data.tar.gz: 3a80e5cfc6d7ea70f4c0091420012446ec801b74
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6a45a701e5ec6e07ebf763ee2fe3d97eb327fdccc85ca9aaca3e006e5dee3c1c058081c9a33c311aa72a7f2272661986799ab4a3f731f35e6ed5053ac1a25b3
|
7
|
+
data.tar.gz: 6f7b02f7ab9a9fad169920f7b2dc04fdbc63a33fb8d5e70e458de9055fa1427acf453f88ca3937883d721a15d636af53ce943a6feeb68e980e0d28a04970222d
|
data/data/types.yml
CHANGED
@@ -10,53 +10,62 @@
|
|
10
10
|
IMAGE:
|
11
11
|
TIFF:
|
12
12
|
NAME: Tagged Image File Format (TIFF)
|
13
|
-
MIME: image/tiff
|
13
|
+
MIME: image/tiff image/x-tiff image/tif image/x-tif application/tiff application/x-tiff application/tif application/x-tif
|
14
14
|
PUID: fmt/353 fmt/154 fmt/153 fmt/156 fmt/155 fmt/152 x-fmt/399 x-fmt/388 x-fmt/387 fmt/202
|
15
15
|
EXTENSIONS: tif,TIF,tiff,tifx,dng,nef
|
16
16
|
|
17
17
|
JP2:
|
18
18
|
NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
|
19
|
-
MIME: image/jp2
|
20
|
-
PUID: fmt/
|
19
|
+
MIME: image/jp2, image/jpeg2000, image/jpeg2000-image, image/x-jpeg2000-image, image/jpx
|
20
|
+
PUID: fmt/463, fmt/151, x-fmt/392
|
21
21
|
EXTENSIONS: jp2
|
22
22
|
|
23
23
|
JPG:
|
24
24
|
NAME: Joint Photographic Experts Group (JPEG)
|
25
|
-
MIME: image/jpeg
|
25
|
+
MIME: image/jpeg, image/jpg, image/jp_, application/jpg, application/x-jpg, image/pjpeg, image/pipeg, image/vnd.swiftview-jpeg
|
26
26
|
PUID: fmt/42 fmt/43 fmt/44 fmt/41 x-fmt/398 x-fmt/390 x-fmt/391 fmt/645
|
27
27
|
EXTENSIONS: jpg,jpe,jpeg
|
28
28
|
|
29
29
|
PNG:
|
30
30
|
NAME: Portable Network Graphics (PNG)
|
31
|
-
MIME: image/png
|
31
|
+
MIME: image/png, application/png, application/x-png
|
32
|
+
PUID: fmt/11, fmt/12, fmt/13,
|
32
33
|
EXTENSIONS: png
|
33
34
|
|
35
|
+
APNG:
|
36
|
+
NAME: Animate Portable Network Graphics
|
37
|
+
MIME: image/vnd.mozilla.apng
|
38
|
+
PUID: fmt/935
|
39
|
+
EXTENSIONS: apng, png
|
40
|
+
|
34
41
|
BMP:
|
35
42
|
NAME: Device Independent Bitmap (DIP/BMP)
|
36
|
-
MIME: image/bmp,image/x-ms-bmp
|
43
|
+
MIME: image/bmp, image/x-bmp, image/x-bitmap, image/x-xbitmap, image/x-win-bitmap, image/x-windows-bmp, image/ms-bmp, image/x-ms-bmp, application/bmp, application/x-bmp, application/x-win-bitmap
|
44
|
+
PUID: fmt/115, fmt/118, fmt/119, fmt/114, fmt/116, fmt/117, x-fmt/270
|
37
45
|
EXTENSIONS: bmp
|
38
46
|
|
39
47
|
GIF:
|
40
48
|
NAME: Graphics Interchange Format (GIF)
|
41
49
|
MIME: image/gif
|
50
|
+
PUID: fmt/3, fmt/4
|
42
51
|
EXTENSIONS: gif
|
43
52
|
|
44
53
|
PBM:
|
45
54
|
NAME: Portable Bitmap Format (PBM)
|
46
|
-
|
47
|
-
|
55
|
+
MIME: image/x-portable-bitmap, image/pbm, image/x-pbm
|
56
|
+
PUID: fmt/409, x-fmt/164
|
48
57
|
EXTENSIONS: pbm
|
49
58
|
|
50
59
|
PGM:
|
51
60
|
NAME: Portable GrayMap Format (PGM)
|
52
|
-
PUID: fmt/406
|
53
|
-
MIME: image/x
|
61
|
+
PUID: fmt/406, fmt/407
|
62
|
+
MIME: image/x-portable-graymap, image/pgm, image/x-pgm
|
54
63
|
EXTENSIONS: pgm
|
55
64
|
|
56
65
|
PPM:
|
57
66
|
NAME: Portable Pixel Map (PPM)
|
58
|
-
PUID: fmt/408
|
59
|
-
MIME: image/x
|
67
|
+
PUID: fmt/408, x-fmt/178
|
68
|
+
MIME: image/x-portable-pixmap, application/ppm, application/x-ppm, image/ppm, image/x-ppm
|
60
69
|
EXTENSIONS: ppm
|
61
70
|
|
62
71
|
AUDIO:
|
@@ -133,7 +142,7 @@ TEXT:
|
|
133
142
|
RTF:
|
134
143
|
NAME: Rich Text Format (RTF)
|
135
144
|
PUID: fmt/45
|
136
|
-
MIME:
|
145
|
+
MIME: application/rtf text/rtf
|
137
146
|
EXTENSIONS: rtf
|
138
147
|
|
139
148
|
HTML:
|
@@ -143,8 +152,8 @@ TEXT:
|
|
143
152
|
|
144
153
|
MSDOC:
|
145
154
|
NAME: Microsoft Word Document (DOC)
|
146
|
-
PUID: fmt/609 fmt/39 x-fmt/
|
147
|
-
MIME: application/vnd.ms-word application/msword
|
155
|
+
PUID: fmt/609 fmt/39 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40 fmt/754
|
156
|
+
MIME: application/msword application/doc appl/text application/vnd.msword application/vnd.ms-word application/winword application/word application/x-msw6 application/x-msword
|
148
157
|
EXTENSIONS: doc
|
149
158
|
|
150
159
|
MSDOCX:
|
@@ -171,9 +180,14 @@ TEXT:
|
|
171
180
|
MIME: application/vnd.wordperfect
|
172
181
|
EXTENSIONS: wpd
|
173
182
|
|
183
|
+
OO_WRITER:
|
184
|
+
NAME: OpenDocument Text
|
185
|
+
MIME: application/vnd.oasis.opendocument.text, application/x-vnd.oasis.opendocument.text
|
186
|
+
PUID: fmt/136, fmt/290, fmt/291
|
187
|
+
|
174
188
|
XML:
|
175
189
|
NAME: Extensible Markup Language (XML)
|
176
|
-
MIME: text/xml application/xml
|
190
|
+
MIME: text/xml application/xml application/x-xml
|
177
191
|
PUID: fmt/101
|
178
192
|
EXTENSIONS: xml
|
179
193
|
|
data/lib/libis/format/config.rb
CHANGED
@@ -13,24 +13,13 @@ module Libis
|
|
13
13
|
Config[:j2kdriver] = 'j2kdriver'
|
14
14
|
Config[:soffice_path] = 'soffice'
|
15
15
|
Config[:ghostscript_path] = 'gs'
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
#
|
21
|
-
|
22
|
-
Config[:
|
23
|
-
File.absolute_path(
|
24
|
-
File.join(
|
25
|
-
File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
|
26
|
-
)
|
27
|
-
)
|
28
|
-
Config[:fido_path] =
|
29
|
-
File.absolute_path(
|
30
|
-
File.join(
|
31
|
-
File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
|
32
|
-
)
|
33
|
-
)
|
16
|
+
Config[:droid_path] = '/opt/droid/droid.sh'
|
17
|
+
Config[:fido_path] = '/usr/local/bin/fido'
|
18
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
19
|
+
Config[:fido_formats] = [(File.join(data_dir, 'lias_formats.xml'))]
|
20
|
+
# noinspection RubyStringKeysInHashInspection
|
21
|
+
Config[:xml_validations] = [['archive/ead', File.join(data_dir, 'ead.xsd')]]
|
22
|
+
Config[:type_database] = File.join(data_dir, 'types.yml')
|
34
23
|
|
35
24
|
end
|
36
25
|
end
|
@@ -8,6 +8,12 @@ require 'fileutils'
|
|
8
8
|
|
9
9
|
MiniMagick.logger.level = ::Logger::ERROR
|
10
10
|
|
11
|
+
MiniMagick.configure do |config|
|
12
|
+
# config.cli = :graphicsmagick
|
13
|
+
config.validate_on_create = false
|
14
|
+
config.validate_on_write = false
|
15
|
+
end
|
16
|
+
|
11
17
|
module Libis
|
12
18
|
module Format
|
13
19
|
module Converter
|
data/lib/libis/format/droid.rb
CHANGED
@@ -3,45 +3,102 @@ require 'singleton'
|
|
3
3
|
require 'tempfile'
|
4
4
|
require 'csv'
|
5
5
|
|
6
|
-
require 'libis/tools/extend/string'
|
7
|
-
require 'libis/tools/logger'
|
8
|
-
require 'libis/tools/command'
|
9
|
-
|
10
6
|
require 'libis/format/config'
|
11
7
|
|
8
|
+
unless CSV::HeaderConverters.has_key?(:droid_headers)
|
9
|
+
CSV::HeaderConverters[:droid_headers] = lambda {|h|
|
10
|
+
h.encode(ConverterEncoding).downcase.strip.
|
11
|
+
gsub(/\W+/, "").to_sym
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
require_relative 'identification_tool'
|
16
|
+
|
12
17
|
module Libis
|
13
18
|
module Format
|
14
19
|
|
15
|
-
class Droid
|
16
|
-
|
17
|
-
|
20
|
+
class Droid < Libis::Format::IdentificationTool
|
21
|
+
|
22
|
+
def run_list(filelist)
|
23
|
+
runner(filelist)
|
24
|
+
end
|
18
25
|
|
19
|
-
def
|
20
|
-
|
26
|
+
def run_dir(dir, recursive = true)
|
27
|
+
profile = profile_file_name
|
28
|
+
report = result_file_name
|
29
|
+
create_profile(dir, profile, recursive)
|
30
|
+
create_report(profile, report)
|
31
|
+
parse_report(report)
|
21
32
|
end
|
22
33
|
|
23
34
|
def run(file)
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
+
runner(file)
|
36
|
+
end
|
37
|
+
|
38
|
+
protected
|
39
|
+
|
40
|
+
def runner(file_or_list)
|
41
|
+
profile = profile_file_name
|
42
|
+
report = result_file_name
|
43
|
+
create_profile(file_or_list, profile)
|
44
|
+
create_report(profile, report)
|
45
|
+
parse_report(report)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_report(report)
|
49
|
+
keys = [
|
50
|
+
:id, :parent_id, :uri, :filepath, :filename, :matchtype, :status, :filesize, :type, :extension,
|
51
|
+
:mod_time, :ext_mismatch, :hash, :format_count, :puid, :mimetype, :format_name, :format_version]
|
52
|
+
result = CSV.parse(File.readlines(report).join)
|
53
|
+
.map {|a| Hash[keys.zip(a)]}
|
54
|
+
.select {|a| a[:type] == 'File'}
|
55
|
+
# File.delete report
|
56
|
+
result.each do |r|
|
57
|
+
r.delete(:id)
|
58
|
+
r.delete(:parent_id)
|
59
|
+
r.delete(:uri)
|
60
|
+
r.delete(:filename)
|
61
|
+
r.delete(:status)
|
62
|
+
r.delete(:filesize)
|
63
|
+
r.delete(:type)
|
64
|
+
r.delete(:extension)
|
65
|
+
r.delete(:mod_time)
|
66
|
+
r.delete(:hash)
|
67
|
+
r.delete(:format_count)
|
68
|
+
r[:source] = :droid
|
69
|
+
end
|
70
|
+
process_output(result)
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_report(profile, report)
|
74
|
+
args = [
|
35
75
|
'-e', report,
|
36
76
|
'-p', profile,
|
37
77
|
'-q'
|
38
|
-
|
39
|
-
|
78
|
+
]
|
79
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
|
80
|
+
raise RuntimeError, "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
40
81
|
File.delete profile
|
41
|
-
result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
|
42
|
-
File.delete report
|
43
|
-
result.map{|r|r.to_hash}
|
44
82
|
end
|
83
|
+
|
84
|
+
def create_profile(file_or_list, profile, recursive = false)
|
85
|
+
args = []
|
86
|
+
files = (file_or_list.is_a?(Array)) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
|
87
|
+
files.each { |file| args << '-a' << file}
|
88
|
+
args << '-p' << profile << '-q'
|
89
|
+
args << '-R' if recursive
|
90
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
|
91
|
+
raise RuntimeError, "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
92
|
+
end
|
93
|
+
|
94
|
+
def profile_file_name
|
95
|
+
File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
|
96
|
+
end
|
97
|
+
|
98
|
+
def result_file_name
|
99
|
+
File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
|
100
|
+
end
|
101
|
+
|
45
102
|
end
|
46
103
|
|
47
104
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require_relative 'identification_tool'
|
2
|
+
|
3
|
+
module Libis
|
4
|
+
module Format
|
5
|
+
|
6
|
+
class ExtensionIdentification < Libis::Format::IdentificationTool
|
7
|
+
|
8
|
+
def run_list(filelist)
|
9
|
+
|
10
|
+
output = runner(nil, filelist)
|
11
|
+
|
12
|
+
process_output(output)
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
def run_dir(dir, recursive = true)
|
17
|
+
|
18
|
+
filelist = find_files(dir, recursive)
|
19
|
+
|
20
|
+
output = runner(nil, filelist)
|
21
|
+
|
22
|
+
process_output(output)
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
def run(file)
|
27
|
+
|
28
|
+
output = runner(file)
|
29
|
+
|
30
|
+
process_output(output)
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
|
36
|
+
def runner(*args)
|
37
|
+
|
38
|
+
args.map do |file|
|
39
|
+
info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
|
40
|
+
if info
|
41
|
+
{
|
42
|
+
filepath: file,
|
43
|
+
mimetype: (info[:MIME].first rescue nil),
|
44
|
+
puid: (info[:PUID].first rescue nil),
|
45
|
+
matchtype: 'extension',
|
46
|
+
source: :type_database
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end.cleanup
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/libis/format/fido.rb
CHANGED
@@ -1,101 +1,86 @@
|
|
1
|
-
require 'csv'
|
2
|
-
|
3
|
-
require 'singleton'
|
4
1
|
require 'libis/tools/extend/string'
|
5
|
-
require 'libis/tools/logger'
|
6
2
|
require 'libis/tools/command'
|
7
3
|
|
4
|
+
require 'csv'
|
8
5
|
require 'libis/format/config'
|
9
|
-
|
6
|
+
|
7
|
+
require_relative 'identification_tool'
|
10
8
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
|
14
|
-
class Fido
|
15
|
-
include Singleton
|
16
|
-
include ::Libis::Tools::Logger
|
12
|
+
class Fido < Libis::Format::IdentificationTool
|
17
13
|
|
18
|
-
|
19
|
-
|
20
|
-
def self.run(file, formats = nil)
|
21
|
-
self.instance.run file, formats
|
14
|
+
def self.add_formats(formats_file)
|
15
|
+
self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
|
22
16
|
end
|
23
17
|
|
24
|
-
def
|
18
|
+
def self.del_formats(formats_file)
|
19
|
+
self.instance.formats.delete(formats_file)
|
20
|
+
end
|
25
21
|
|
26
|
-
|
22
|
+
attr_reader :formats
|
27
23
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
when String
|
33
|
-
fmt_list << xtra_formats
|
34
|
-
else
|
35
|
-
# do nothing
|
24
|
+
def run_list(filelist)
|
25
|
+
create_list_file(filelist) do |list_file|
|
26
|
+
output = runner(nil, '-input', list_file.escape_for_string)
|
27
|
+
process_output(output)
|
36
28
|
end
|
29
|
+
end
|
37
30
|
|
31
|
+
def run_dir(dir, recursive = true)
|
38
32
|
args = []
|
39
|
-
args << '-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
|
45
|
-
fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
|
46
|
-
|
47
|
-
fido_output.each do |x|
|
48
|
-
if x[:status] == 'OK'
|
49
|
-
x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
|
50
|
-
next if BAD_MIMETYPES.include? x[:mimetype]
|
51
|
-
x[:score] = 5
|
52
|
-
case x[:matchtype]
|
53
|
-
when 'signature'
|
54
|
-
x[:score] += 5
|
55
|
-
when 'container'
|
56
|
-
typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
|
57
|
-
ext = File.extname(file)
|
58
|
-
x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
59
|
-
else
|
60
|
-
# do nothing
|
61
|
-
end
|
62
|
-
fido_results << x
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
fido_results = fido_results.inject({}) do |result, value|
|
67
|
-
result[value[:score]] ||= []
|
68
|
-
result[value[:score]] << value
|
69
|
-
result
|
70
|
-
end
|
33
|
+
args << '-recurse' if recursive
|
34
|
+
output = runner(dir, *args)
|
35
|
+
process_output(output)
|
36
|
+
end
|
71
37
|
|
72
|
-
|
38
|
+
def run(file)
|
39
|
+
output = runner(file)
|
40
|
+
process_output(output)
|
41
|
+
end
|
73
42
|
|
74
|
-
|
75
|
-
return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
|
43
|
+
protected
|
76
44
|
|
77
|
-
|
45
|
+
def initialize
|
46
|
+
super
|
47
|
+
@formats = Libis::Format::Config[:fido_formats].dup
|
48
|
+
bad_mimetype('application/vnd.oasis.opendocument.text')
|
49
|
+
bad_mimetype('application/vnd.oasis.opendocument.spreadsheet')
|
78
50
|
end
|
79
51
|
|
80
|
-
|
81
|
-
instance.formats << f
|
82
|
-
end
|
52
|
+
attr_writer :formats
|
83
53
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
54
|
+
def runner(filename, *args)
|
55
|
+
# Load custome format definitions if present
|
56
|
+
args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
|
87
57
|
|
88
|
-
|
58
|
+
# Workaround for Fido performance bug
|
59
|
+
args << '-bufsize' << '1000'
|
89
60
|
|
90
|
-
|
61
|
+
# Add filename to argument list (optional)
|
62
|
+
args << "#{filename.escape_for_string}" if filename
|
91
63
|
|
92
|
-
|
93
|
-
|
94
|
-
@formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
95
|
-
end
|
64
|
+
# No header output
|
65
|
+
args << '-q'
|
96
66
|
|
97
|
-
|
98
|
-
::Libis::
|
67
|
+
# Run command and capture results
|
68
|
+
fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
|
69
|
+
|
70
|
+
# Log warning if needed
|
71
|
+
raise RuntimeError, "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
|
72
|
+
|
73
|
+
# Parse output (CSV) text into array and return result
|
74
|
+
keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
|
75
|
+
result = CSV.parse(fido[:out].join("\n"))
|
76
|
+
.map {|a| Hash[keys.zip(a)]}
|
77
|
+
.select {|a| a[:status] == 'OK'}
|
78
|
+
result.each do |r|
|
79
|
+
r.delete(:time)
|
80
|
+
r.delete(:status)
|
81
|
+
r.delete(:filesize)
|
82
|
+
r[:source] = :fido
|
83
|
+
end
|
99
84
|
end
|
100
85
|
|
101
86
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require_relative 'identification_tool'
|
2
|
+
|
3
|
+
module Libis
|
4
|
+
module Format
|
5
|
+
|
6
|
+
class FileTool < Libis::Format::IdentificationTool
|
7
|
+
|
8
|
+
def run_list(filelist)
|
9
|
+
|
10
|
+
create_list_file(filelist) do |list_file|
|
11
|
+
|
12
|
+
output = runner(nil, '--files-from', list_file)
|
13
|
+
|
14
|
+
process_output(output)
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
def run_dir(dir, recursive = true)
|
21
|
+
|
22
|
+
filelist = find_files(dir, recursive)
|
23
|
+
|
24
|
+
create_list_file(filelist) do |list_file|
|
25
|
+
|
26
|
+
output = runner(nil, '--files-from', list_file)
|
27
|
+
|
28
|
+
process_output(output)
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def run(file)
|
35
|
+
|
36
|
+
output = runner(file)
|
37
|
+
|
38
|
+
process_output(output)
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
def runner(filename, *args)
|
45
|
+
|
46
|
+
# Create new argument list
|
47
|
+
opts = []
|
48
|
+
|
49
|
+
# Add fixed options
|
50
|
+
# -L : follow symlinks
|
51
|
+
# --mime-type : only print MIME type
|
52
|
+
opts << '-L' << '--mime-type'
|
53
|
+
|
54
|
+
# Append passed arguments
|
55
|
+
opts += args
|
56
|
+
|
57
|
+
# Finally add the filename to process
|
58
|
+
opts << filename.escape_for_string if filename
|
59
|
+
|
60
|
+
# Run the UNIX file command and capture the results
|
61
|
+
file_tool = ::Libis::Tools::Command.run('file', *opts)
|
62
|
+
|
63
|
+
raise RuntimeError, "File command errors: #{file_tool[:err].join("\n")}" unless file_tool[:err].empty?
|
64
|
+
|
65
|
+
|
66
|
+
# Parse output text into array and return result
|
67
|
+
file_tool[:out].map do |line|
|
68
|
+
r = line.split(/:\s+/)
|
69
|
+
{filepath: r[0], mimetype: r[1], matchtype: 'magic', source: :file}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|