libis-format 0.9.32 → 0.9.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/types.yml +30 -16
- data/lib/libis/format/config.rb +7 -18
- data/lib/libis/format/converter/image_converter.rb +6 -0
- data/lib/libis/format/droid.rb +82 -25
- data/lib/libis/format/extension_identification.rb +55 -0
- data/lib/libis/format/fido.rb +57 -72
- data/lib/libis/format/file_tool.rb +76 -0
- data/lib/libis/format/identification_tool.rb +174 -0
- data/lib/libis/format/identifier.rb +129 -117
- data/lib/libis/format/type_database.rb +36 -5
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +3 -0
- data/libis-format.gemspec +2 -1
- data/spec/converter_spec.rb +6 -4
- data/spec/identifier_spec.rb +125 -34
- metadata +21 -126
- data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
- data/tools/droid/container-signature-20170330.xml +0 -3584
- data/tools/droid/droid-command-line-6.3.jar +0 -0
- data/tools/droid/droid.bat +0 -152
- data/tools/droid/droid.sh +0 -152
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.10.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/droid-container-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-6.3.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-6.3.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-help-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-6.3.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
- data/tools/droid/lib/droid-results-6.3.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
- data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.13.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -50
- data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
- data/tools/fido/conf/container-signature-20170330.xml +0 -3584
- data/tools/fido/conf/dc.xsd +0 -119
- data/tools/fido/conf/dcmitype.xsd +0 -53
- data/tools/fido/conf/dcterms.xsd +0 -383
- data/tools/fido/conf/fido-formats.xsd +0 -173
- data/tools/fido/conf/format_extension_template.xml +0 -105
- data/tools/fido/conf/format_extensions.xml +0 -484
- data/tools/fido/conf/formats-v90.xml +0 -48877
- data/tools/fido/conf/pronom-xml-v90.zip +0 -0
- data/tools/fido/conf/versions.xml +0 -8
- data/tools/fido/fido.bat +0 -4
- data/tools/fido/fido.py +0 -884
- data/tools/fido/fido.sh +0 -5
- data/tools/fido/package.py +0 -96
- data/tools/fido/prepare.py +0 -645
- data/tools/fido/pronomutils.py +0 -200
- data/tools/fido/toxml.py +0 -60
- data/tools/fido/update_signatures.py +0 -183
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5a13d9df8f5e85ccc58758e46e556066d16935b0
|
|
4
|
+
data.tar.gz: 3a80e5cfc6d7ea70f4c0091420012446ec801b74
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c6a45a701e5ec6e07ebf763ee2fe3d97eb327fdccc85ca9aaca3e006e5dee3c1c058081c9a33c311aa72a7f2272661986799ab4a3f731f35e6ed5053ac1a25b3
|
|
7
|
+
data.tar.gz: 6f7b02f7ab9a9fad169920f7b2dc04fdbc63a33fb8d5e70e458de9055fa1427acf453f88ca3937883d721a15d636af53ce943a6feeb68e980e0d28a04970222d
|
data/data/types.yml
CHANGED
|
@@ -10,53 +10,62 @@
|
|
|
10
10
|
IMAGE:
|
|
11
11
|
TIFF:
|
|
12
12
|
NAME: Tagged Image File Format (TIFF)
|
|
13
|
-
MIME: image/tiff
|
|
13
|
+
MIME: image/tiff image/x-tiff image/tif image/x-tif application/tiff application/x-tiff application/tif application/x-tif
|
|
14
14
|
PUID: fmt/353 fmt/154 fmt/153 fmt/156 fmt/155 fmt/152 x-fmt/399 x-fmt/388 x-fmt/387 fmt/202
|
|
15
15
|
EXTENSIONS: tif,TIF,tiff,tifx,dng,nef
|
|
16
16
|
|
|
17
17
|
JP2:
|
|
18
18
|
NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
|
|
19
|
-
MIME: image/jp2
|
|
20
|
-
PUID: fmt/
|
|
19
|
+
MIME: image/jp2, image/jpeg2000, image/jpeg2000-image, image/x-jpeg2000-image, image/jpx
|
|
20
|
+
PUID: fmt/463, fmt/151, x-fmt/392
|
|
21
21
|
EXTENSIONS: jp2
|
|
22
22
|
|
|
23
23
|
JPG:
|
|
24
24
|
NAME: Joint Photographic Experts Group (JPEG)
|
|
25
|
-
MIME: image/jpeg
|
|
25
|
+
MIME: image/jpeg, image/jpg, image/jp_, application/jpg, application/x-jpg, image/pjpeg, image/pipeg, image/vnd.swiftview-jpeg
|
|
26
26
|
PUID: fmt/42 fmt/43 fmt/44 fmt/41 x-fmt/398 x-fmt/390 x-fmt/391 fmt/645
|
|
27
27
|
EXTENSIONS: jpg,jpe,jpeg
|
|
28
28
|
|
|
29
29
|
PNG:
|
|
30
30
|
NAME: Portable Network Graphics (PNG)
|
|
31
|
-
MIME: image/png
|
|
31
|
+
MIME: image/png, application/png, application/x-png
|
|
32
|
+
PUID: fmt/11, fmt/12, fmt/13,
|
|
32
33
|
EXTENSIONS: png
|
|
33
34
|
|
|
35
|
+
APNG:
|
|
36
|
+
NAME: Animate Portable Network Graphics
|
|
37
|
+
MIME: image/vnd.mozilla.apng
|
|
38
|
+
PUID: fmt/935
|
|
39
|
+
EXTENSIONS: apng, png
|
|
40
|
+
|
|
34
41
|
BMP:
|
|
35
42
|
NAME: Device Independent Bitmap (DIP/BMP)
|
|
36
|
-
MIME: image/bmp,image/x-ms-bmp
|
|
43
|
+
MIME: image/bmp, image/x-bmp, image/x-bitmap, image/x-xbitmap, image/x-win-bitmap, image/x-windows-bmp, image/ms-bmp, image/x-ms-bmp, application/bmp, application/x-bmp, application/x-win-bitmap
|
|
44
|
+
PUID: fmt/115, fmt/118, fmt/119, fmt/114, fmt/116, fmt/117, x-fmt/270
|
|
37
45
|
EXTENSIONS: bmp
|
|
38
46
|
|
|
39
47
|
GIF:
|
|
40
48
|
NAME: Graphics Interchange Format (GIF)
|
|
41
49
|
MIME: image/gif
|
|
50
|
+
PUID: fmt/3, fmt/4
|
|
42
51
|
EXTENSIONS: gif
|
|
43
52
|
|
|
44
53
|
PBM:
|
|
45
54
|
NAME: Portable Bitmap Format (PBM)
|
|
46
|
-
|
|
47
|
-
|
|
55
|
+
MIME: image/x-portable-bitmap, image/pbm, image/x-pbm
|
|
56
|
+
PUID: fmt/409, x-fmt/164
|
|
48
57
|
EXTENSIONS: pbm
|
|
49
58
|
|
|
50
59
|
PGM:
|
|
51
60
|
NAME: Portable GrayMap Format (PGM)
|
|
52
|
-
PUID: fmt/406
|
|
53
|
-
MIME: image/x
|
|
61
|
+
PUID: fmt/406, fmt/407
|
|
62
|
+
MIME: image/x-portable-graymap, image/pgm, image/x-pgm
|
|
54
63
|
EXTENSIONS: pgm
|
|
55
64
|
|
|
56
65
|
PPM:
|
|
57
66
|
NAME: Portable Pixel Map (PPM)
|
|
58
|
-
PUID: fmt/408
|
|
59
|
-
MIME: image/x
|
|
67
|
+
PUID: fmt/408, x-fmt/178
|
|
68
|
+
MIME: image/x-portable-pixmap, application/ppm, application/x-ppm, image/ppm, image/x-ppm
|
|
60
69
|
EXTENSIONS: ppm
|
|
61
70
|
|
|
62
71
|
AUDIO:
|
|
@@ -133,7 +142,7 @@ TEXT:
|
|
|
133
142
|
RTF:
|
|
134
143
|
NAME: Rich Text Format (RTF)
|
|
135
144
|
PUID: fmt/45
|
|
136
|
-
MIME:
|
|
145
|
+
MIME: application/rtf text/rtf
|
|
137
146
|
EXTENSIONS: rtf
|
|
138
147
|
|
|
139
148
|
HTML:
|
|
@@ -143,8 +152,8 @@ TEXT:
|
|
|
143
152
|
|
|
144
153
|
MSDOC:
|
|
145
154
|
NAME: Microsoft Word Document (DOC)
|
|
146
|
-
PUID: fmt/609 fmt/39 x-fmt/
|
|
147
|
-
MIME: application/vnd.ms-word application/msword
|
|
155
|
+
PUID: fmt/609 fmt/39 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40 fmt/754
|
|
156
|
+
MIME: application/msword application/doc appl/text application/vnd.msword application/vnd.ms-word application/winword application/word application/x-msw6 application/x-msword
|
|
148
157
|
EXTENSIONS: doc
|
|
149
158
|
|
|
150
159
|
MSDOCX:
|
|
@@ -171,9 +180,14 @@ TEXT:
|
|
|
171
180
|
MIME: application/vnd.wordperfect
|
|
172
181
|
EXTENSIONS: wpd
|
|
173
182
|
|
|
183
|
+
OO_WRITER:
|
|
184
|
+
NAME: OpenDocument Text
|
|
185
|
+
MIME: application/vnd.oasis.opendocument.text, application/x-vnd.oasis.opendocument.text
|
|
186
|
+
PUID: fmt/136, fmt/290, fmt/291
|
|
187
|
+
|
|
174
188
|
XML:
|
|
175
189
|
NAME: Extensible Markup Language (XML)
|
|
176
|
-
MIME: text/xml application/xml
|
|
190
|
+
MIME: text/xml application/xml application/x-xml
|
|
177
191
|
PUID: fmt/101
|
|
178
192
|
EXTENSIONS: xml
|
|
179
193
|
|
data/lib/libis/format/config.rb
CHANGED
|
@@ -13,24 +13,13 @@ module Libis
|
|
|
13
13
|
Config[:j2kdriver] = 'j2kdriver'
|
|
14
14
|
Config[:soffice_path] = 'soffice'
|
|
15
15
|
Config[:ghostscript_path] = 'gs'
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
#
|
|
21
|
-
|
|
22
|
-
Config[:
|
|
23
|
-
File.absolute_path(
|
|
24
|
-
File.join(
|
|
25
|
-
File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
|
|
26
|
-
)
|
|
27
|
-
)
|
|
28
|
-
Config[:fido_path] =
|
|
29
|
-
File.absolute_path(
|
|
30
|
-
File.join(
|
|
31
|
-
File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
|
|
32
|
-
)
|
|
33
|
-
)
|
|
16
|
+
Config[:droid_path] = '/opt/droid/droid.sh'
|
|
17
|
+
Config[:fido_path] = '/usr/local/bin/fido'
|
|
18
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
19
|
+
Config[:fido_formats] = [(File.join(data_dir, 'lias_formats.xml'))]
|
|
20
|
+
# noinspection RubyStringKeysInHashInspection
|
|
21
|
+
Config[:xml_validations] = [['archive/ead', File.join(data_dir, 'ead.xsd')]]
|
|
22
|
+
Config[:type_database] = File.join(data_dir, 'types.yml')
|
|
34
23
|
|
|
35
24
|
end
|
|
36
25
|
end
|
|
@@ -8,6 +8,12 @@ require 'fileutils'
|
|
|
8
8
|
|
|
9
9
|
MiniMagick.logger.level = ::Logger::ERROR
|
|
10
10
|
|
|
11
|
+
MiniMagick.configure do |config|
|
|
12
|
+
# config.cli = :graphicsmagick
|
|
13
|
+
config.validate_on_create = false
|
|
14
|
+
config.validate_on_write = false
|
|
15
|
+
end
|
|
16
|
+
|
|
11
17
|
module Libis
|
|
12
18
|
module Format
|
|
13
19
|
module Converter
|
data/lib/libis/format/droid.rb
CHANGED
|
@@ -3,45 +3,102 @@ require 'singleton'
|
|
|
3
3
|
require 'tempfile'
|
|
4
4
|
require 'csv'
|
|
5
5
|
|
|
6
|
-
require 'libis/tools/extend/string'
|
|
7
|
-
require 'libis/tools/logger'
|
|
8
|
-
require 'libis/tools/command'
|
|
9
|
-
|
|
10
6
|
require 'libis/format/config'
|
|
11
7
|
|
|
8
|
+
unless CSV::HeaderConverters.has_key?(:droid_headers)
|
|
9
|
+
CSV::HeaderConverters[:droid_headers] = lambda {|h|
|
|
10
|
+
h.encode(ConverterEncoding).downcase.strip.
|
|
11
|
+
gsub(/\W+/, "").to_sym
|
|
12
|
+
}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
require_relative 'identification_tool'
|
|
16
|
+
|
|
12
17
|
module Libis
|
|
13
18
|
module Format
|
|
14
19
|
|
|
15
|
-
class Droid
|
|
16
|
-
|
|
17
|
-
|
|
20
|
+
class Droid < Libis::Format::IdentificationTool
|
|
21
|
+
|
|
22
|
+
def run_list(filelist)
|
|
23
|
+
runner(filelist)
|
|
24
|
+
end
|
|
18
25
|
|
|
19
|
-
def
|
|
20
|
-
|
|
26
|
+
def run_dir(dir, recursive = true)
|
|
27
|
+
profile = profile_file_name
|
|
28
|
+
report = result_file_name
|
|
29
|
+
create_profile(dir, profile, recursive)
|
|
30
|
+
create_report(profile, report)
|
|
31
|
+
parse_report(report)
|
|
21
32
|
end
|
|
22
33
|
|
|
23
34
|
def run(file)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
runner(file)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
protected
|
|
39
|
+
|
|
40
|
+
def runner(file_or_list)
|
|
41
|
+
profile = profile_file_name
|
|
42
|
+
report = result_file_name
|
|
43
|
+
create_profile(file_or_list, profile)
|
|
44
|
+
create_report(profile, report)
|
|
45
|
+
parse_report(report)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def parse_report(report)
|
|
49
|
+
keys = [
|
|
50
|
+
:id, :parent_id, :uri, :filepath, :filename, :matchtype, :status, :filesize, :type, :extension,
|
|
51
|
+
:mod_time, :ext_mismatch, :hash, :format_count, :puid, :mimetype, :format_name, :format_version]
|
|
52
|
+
result = CSV.parse(File.readlines(report).join)
|
|
53
|
+
.map {|a| Hash[keys.zip(a)]}
|
|
54
|
+
.select {|a| a[:type] == 'File'}
|
|
55
|
+
# File.delete report
|
|
56
|
+
result.each do |r|
|
|
57
|
+
r.delete(:id)
|
|
58
|
+
r.delete(:parent_id)
|
|
59
|
+
r.delete(:uri)
|
|
60
|
+
r.delete(:filename)
|
|
61
|
+
r.delete(:status)
|
|
62
|
+
r.delete(:filesize)
|
|
63
|
+
r.delete(:type)
|
|
64
|
+
r.delete(:extension)
|
|
65
|
+
r.delete(:mod_time)
|
|
66
|
+
r.delete(:hash)
|
|
67
|
+
r.delete(:format_count)
|
|
68
|
+
r[:source] = :droid
|
|
69
|
+
end
|
|
70
|
+
process_output(result)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def create_report(profile, report)
|
|
74
|
+
args = [
|
|
35
75
|
'-e', report,
|
|
36
76
|
'-p', profile,
|
|
37
77
|
'-q'
|
|
38
|
-
|
|
39
|
-
|
|
78
|
+
]
|
|
79
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
|
|
80
|
+
raise RuntimeError, "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
|
40
81
|
File.delete profile
|
|
41
|
-
result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
|
|
42
|
-
File.delete report
|
|
43
|
-
result.map{|r|r.to_hash}
|
|
44
82
|
end
|
|
83
|
+
|
|
84
|
+
def create_profile(file_or_list, profile, recursive = false)
|
|
85
|
+
args = []
|
|
86
|
+
files = (file_or_list.is_a?(Array)) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
|
|
87
|
+
files.each { |file| args << '-a' << file}
|
|
88
|
+
args << '-p' << profile << '-q'
|
|
89
|
+
args << '-R' if recursive
|
|
90
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
|
|
91
|
+
raise RuntimeError, "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def profile_file_name
|
|
95
|
+
File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def result_file_name
|
|
99
|
+
File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
|
|
100
|
+
end
|
|
101
|
+
|
|
45
102
|
end
|
|
46
103
|
|
|
47
104
|
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
require_relative 'identification_tool'
|
|
2
|
+
|
|
3
|
+
module Libis
|
|
4
|
+
module Format
|
|
5
|
+
|
|
6
|
+
class ExtensionIdentification < Libis::Format::IdentificationTool
|
|
7
|
+
|
|
8
|
+
def run_list(filelist)
|
|
9
|
+
|
|
10
|
+
output = runner(nil, filelist)
|
|
11
|
+
|
|
12
|
+
process_output(output)
|
|
13
|
+
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def run_dir(dir, recursive = true)
|
|
17
|
+
|
|
18
|
+
filelist = find_files(dir, recursive)
|
|
19
|
+
|
|
20
|
+
output = runner(nil, filelist)
|
|
21
|
+
|
|
22
|
+
process_output(output)
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def run(file)
|
|
27
|
+
|
|
28
|
+
output = runner(file)
|
|
29
|
+
|
|
30
|
+
process_output(output)
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
protected
|
|
35
|
+
|
|
36
|
+
def runner(*args)
|
|
37
|
+
|
|
38
|
+
args.map do |file|
|
|
39
|
+
info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
|
|
40
|
+
if info
|
|
41
|
+
{
|
|
42
|
+
filepath: file,
|
|
43
|
+
mimetype: (info[:MIME].first rescue nil),
|
|
44
|
+
puid: (info[:PUID].first rescue nil),
|
|
45
|
+
matchtype: 'extension',
|
|
46
|
+
source: :type_database
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
end.cleanup
|
|
50
|
+
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
data/lib/libis/format/fido.rb
CHANGED
|
@@ -1,101 +1,86 @@
|
|
|
1
|
-
require 'csv'
|
|
2
|
-
|
|
3
|
-
require 'singleton'
|
|
4
1
|
require 'libis/tools/extend/string'
|
|
5
|
-
require 'libis/tools/logger'
|
|
6
2
|
require 'libis/tools/command'
|
|
7
3
|
|
|
4
|
+
require 'csv'
|
|
8
5
|
require 'libis/format/config'
|
|
9
|
-
|
|
6
|
+
|
|
7
|
+
require_relative 'identification_tool'
|
|
10
8
|
|
|
11
9
|
module Libis
|
|
12
10
|
module Format
|
|
13
11
|
|
|
14
|
-
class Fido
|
|
15
|
-
include Singleton
|
|
16
|
-
include ::Libis::Tools::Logger
|
|
12
|
+
class Fido < Libis::Format::IdentificationTool
|
|
17
13
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def self.run(file, formats = nil)
|
|
21
|
-
self.instance.run file, formats
|
|
14
|
+
def self.add_formats(formats_file)
|
|
15
|
+
self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
|
|
22
16
|
end
|
|
23
17
|
|
|
24
|
-
def
|
|
18
|
+
def self.del_formats(formats_file)
|
|
19
|
+
self.instance.formats.delete(formats_file)
|
|
20
|
+
end
|
|
25
21
|
|
|
26
|
-
|
|
22
|
+
attr_reader :formats
|
|
27
23
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
when String
|
|
33
|
-
fmt_list << xtra_formats
|
|
34
|
-
else
|
|
35
|
-
# do nothing
|
|
24
|
+
def run_list(filelist)
|
|
25
|
+
create_list_file(filelist) do |list_file|
|
|
26
|
+
output = runner(nil, '-input', list_file.escape_for_string)
|
|
27
|
+
process_output(output)
|
|
36
28
|
end
|
|
29
|
+
end
|
|
37
30
|
|
|
31
|
+
def run_dir(dir, recursive = true)
|
|
38
32
|
args = []
|
|
39
|
-
args << '-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
|
|
45
|
-
fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
|
|
46
|
-
|
|
47
|
-
fido_output.each do |x|
|
|
48
|
-
if x[:status] == 'OK'
|
|
49
|
-
x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
|
|
50
|
-
next if BAD_MIMETYPES.include? x[:mimetype]
|
|
51
|
-
x[:score] = 5
|
|
52
|
-
case x[:matchtype]
|
|
53
|
-
when 'signature'
|
|
54
|
-
x[:score] += 5
|
|
55
|
-
when 'container'
|
|
56
|
-
typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
|
|
57
|
-
ext = File.extname(file)
|
|
58
|
-
x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
|
59
|
-
else
|
|
60
|
-
# do nothing
|
|
61
|
-
end
|
|
62
|
-
fido_results << x
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
fido_results = fido_results.inject({}) do |result, value|
|
|
67
|
-
result[value[:score]] ||= []
|
|
68
|
-
result[value[:score]] << value
|
|
69
|
-
result
|
|
70
|
-
end
|
|
33
|
+
args << '-recurse' if recursive
|
|
34
|
+
output = runner(dir, *args)
|
|
35
|
+
process_output(output)
|
|
36
|
+
end
|
|
71
37
|
|
|
72
|
-
|
|
38
|
+
def run(file)
|
|
39
|
+
output = runner(file)
|
|
40
|
+
process_output(output)
|
|
41
|
+
end
|
|
73
42
|
|
|
74
|
-
|
|
75
|
-
return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
|
|
43
|
+
protected
|
|
76
44
|
|
|
77
|
-
|
|
45
|
+
def initialize
|
|
46
|
+
super
|
|
47
|
+
@formats = Libis::Format::Config[:fido_formats].dup
|
|
48
|
+
bad_mimetype('application/vnd.oasis.opendocument.text')
|
|
49
|
+
bad_mimetype('application/vnd.oasis.opendocument.spreadsheet')
|
|
78
50
|
end
|
|
79
51
|
|
|
80
|
-
|
|
81
|
-
instance.formats << f
|
|
82
|
-
end
|
|
52
|
+
attr_writer :formats
|
|
83
53
|
|
|
84
|
-
def
|
|
85
|
-
|
|
86
|
-
|
|
54
|
+
def runner(filename, *args)
|
|
55
|
+
# Load custome format definitions if present
|
|
56
|
+
args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
|
|
87
57
|
|
|
88
|
-
|
|
58
|
+
# Workaround for Fido performance bug
|
|
59
|
+
args << '-bufsize' << '1000'
|
|
89
60
|
|
|
90
|
-
|
|
61
|
+
# Add filename to argument list (optional)
|
|
62
|
+
args << "#{filename.escape_for_string}" if filename
|
|
91
63
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
@formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
|
95
|
-
end
|
|
64
|
+
# No header output
|
|
65
|
+
args << '-q'
|
|
96
66
|
|
|
97
|
-
|
|
98
|
-
::Libis::
|
|
67
|
+
# Run command and capture results
|
|
68
|
+
fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
|
|
69
|
+
|
|
70
|
+
# Log warning if needed
|
|
71
|
+
raise RuntimeError, "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
|
|
72
|
+
|
|
73
|
+
# Parse output (CSV) text into array and return result
|
|
74
|
+
keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
|
|
75
|
+
result = CSV.parse(fido[:out].join("\n"))
|
|
76
|
+
.map {|a| Hash[keys.zip(a)]}
|
|
77
|
+
.select {|a| a[:status] == 'OK'}
|
|
78
|
+
result.each do |r|
|
|
79
|
+
r.delete(:time)
|
|
80
|
+
r.delete(:status)
|
|
81
|
+
r.delete(:filesize)
|
|
82
|
+
r[:source] = :fido
|
|
83
|
+
end
|
|
99
84
|
end
|
|
100
85
|
|
|
101
86
|
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require_relative 'identification_tool'
|
|
2
|
+
|
|
3
|
+
module Libis
|
|
4
|
+
module Format
|
|
5
|
+
|
|
6
|
+
class FileTool < Libis::Format::IdentificationTool
|
|
7
|
+
|
|
8
|
+
def run_list(filelist)
|
|
9
|
+
|
|
10
|
+
create_list_file(filelist) do |list_file|
|
|
11
|
+
|
|
12
|
+
output = runner(nil, '--files-from', list_file)
|
|
13
|
+
|
|
14
|
+
process_output(output)
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def run_dir(dir, recursive = true)
|
|
21
|
+
|
|
22
|
+
filelist = find_files(dir, recursive)
|
|
23
|
+
|
|
24
|
+
create_list_file(filelist) do |list_file|
|
|
25
|
+
|
|
26
|
+
output = runner(nil, '--files-from', list_file)
|
|
27
|
+
|
|
28
|
+
process_output(output)
|
|
29
|
+
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def run(file)
|
|
35
|
+
|
|
36
|
+
output = runner(file)
|
|
37
|
+
|
|
38
|
+
process_output(output)
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
protected
|
|
43
|
+
|
|
44
|
+
def runner(filename, *args)
|
|
45
|
+
|
|
46
|
+
# Create new argument list
|
|
47
|
+
opts = []
|
|
48
|
+
|
|
49
|
+
# Add fixed options
|
|
50
|
+
# -L : follow symlinks
|
|
51
|
+
# --mime-type : only print MIME type
|
|
52
|
+
opts << '-L' << '--mime-type'
|
|
53
|
+
|
|
54
|
+
# Append passed arguments
|
|
55
|
+
opts += args
|
|
56
|
+
|
|
57
|
+
# Finally add the filename to process
|
|
58
|
+
opts << filename.escape_for_string if filename
|
|
59
|
+
|
|
60
|
+
# Run the UNIX file command and capture the results
|
|
61
|
+
file_tool = ::Libis::Tools::Command.run('file', *opts)
|
|
62
|
+
|
|
63
|
+
raise RuntimeError, "File command errors: #{file_tool[:err].join("\n")}" unless file_tool[:err].empty?
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Parse output text into array and return result
|
|
67
|
+
file_tool[:out].map do |line|
|
|
68
|
+
r = line.split(/:\s+/)
|
|
69
|
+
{filepath: r[0], mimetype: r[1], matchtype: 'magic', source: :file}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
end
|