libis-format 0.9.32 → 0.9.33

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/data/types.yml +30 -16
  3. data/lib/libis/format/config.rb +7 -18
  4. data/lib/libis/format/converter/image_converter.rb +6 -0
  5. data/lib/libis/format/droid.rb +82 -25
  6. data/lib/libis/format/extension_identification.rb +55 -0
  7. data/lib/libis/format/fido.rb +57 -72
  8. data/lib/libis/format/file_tool.rb +76 -0
  9. data/lib/libis/format/identification_tool.rb +174 -0
  10. data/lib/libis/format/identifier.rb +129 -117
  11. data/lib/libis/format/type_database.rb +36 -5
  12. data/lib/libis/format/version.rb +1 -1
  13. data/lib/libis/format.rb +3 -0
  14. data/libis-format.gemspec +2 -1
  15. data/spec/converter_spec.rb +6 -4
  16. data/spec/identifier_spec.rb +125 -34
  17. metadata +21 -126
  18. data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
  19. data/tools/droid/container-signature-20170330.xml +0 -3584
  20. data/tools/droid/droid-command-line-6.3.jar +0 -0
  21. data/tools/droid/droid.bat +0 -152
  22. data/tools/droid/droid.sh +0 -152
  23. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  24. data/tools/droid/lib/activation-1.1.jar +0 -0
  25. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  26. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  27. data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
  28. data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
  29. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  30. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  31. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  32. data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
  33. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  34. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  35. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  36. data/tools/droid/lib/commons-codec-1.10.jar +0 -0
  37. data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
  38. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  39. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  40. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  41. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  42. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  43. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  44. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  45. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  46. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  47. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  48. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  49. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  50. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  51. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  52. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  53. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  54. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  55. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  56. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  57. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  58. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  59. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  60. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  61. data/tools/droid/lib/droid-container-6.3.jar +0 -0
  62. data/tools/droid/lib/droid-core-6.3.jar +0 -0
  63. data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
  64. data/tools/droid/lib/droid-export-6.3.jar +0 -0
  65. data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
  66. data/tools/droid/lib/droid-help-6.3.jar +0 -0
  67. data/tools/droid/lib/droid-report-6.3.jar +0 -0
  68. data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
  69. data/tools/droid/lib/droid-results-6.3.jar +0 -0
  70. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  71. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  72. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  73. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  74. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  75. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  76. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  77. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  78. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  79. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  80. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  81. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  82. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  83. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  84. data/tools/droid/lib/jta-1.1.jar +0 -0
  85. data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
  86. data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
  87. data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
  88. data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
  89. data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
  90. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  91. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  92. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  93. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  94. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  95. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  96. data/tools/droid/lib/poi-3.13.jar +0 -0
  97. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  98. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  99. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  100. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  101. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  102. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  103. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  104. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  105. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  106. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  107. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  108. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  109. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  110. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  111. data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
  112. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  113. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  114. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  115. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  116. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  117. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  118. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  119. data/tools/droid/lib/xz-1.0.jar +0 -0
  120. data/tools/fido/__init__.py +0 -50
  121. data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
  122. data/tools/fido/conf/container-signature-20170330.xml +0 -3584
  123. data/tools/fido/conf/dc.xsd +0 -119
  124. data/tools/fido/conf/dcmitype.xsd +0 -53
  125. data/tools/fido/conf/dcterms.xsd +0 -383
  126. data/tools/fido/conf/fido-formats.xsd +0 -173
  127. data/tools/fido/conf/format_extension_template.xml +0 -105
  128. data/tools/fido/conf/format_extensions.xml +0 -484
  129. data/tools/fido/conf/formats-v90.xml +0 -48877
  130. data/tools/fido/conf/pronom-xml-v90.zip +0 -0
  131. data/tools/fido/conf/versions.xml +0 -8
  132. data/tools/fido/fido.bat +0 -4
  133. data/tools/fido/fido.py +0 -884
  134. data/tools/fido/fido.sh +0 -5
  135. data/tools/fido/package.py +0 -96
  136. data/tools/fido/prepare.py +0 -645
  137. data/tools/fido/pronomutils.py +0 -200
  138. data/tools/fido/toxml.py +0 -60
  139. data/tools/fido/update_signatures.py +0 -183
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11f5b2cb05e4748b97fef65f805e8222e9857354
4
- data.tar.gz: 49c667a039c6174b41aaabbb2800955989884fa0
3
+ metadata.gz: 5a13d9df8f5e85ccc58758e46e556066d16935b0
4
+ data.tar.gz: 3a80e5cfc6d7ea70f4c0091420012446ec801b74
5
5
  SHA512:
6
- metadata.gz: 9bbb1f1f21742ece1b4e34ce22a295327436b0c59b996a018bfba9c828377e5a79a1cefb9f4c2a01e080dbdc15e8c6586dc15b5ab66eca936550c840eeba380d
7
- data.tar.gz: 9214406a8a4b638801d8c4258e14401a3b65fb1283299e76664115148514476f3738832be3718b7d124583a3ea21e53b9a949f7557c7a63d42203f97e4381b14
6
+ metadata.gz: c6a45a701e5ec6e07ebf763ee2fe3d97eb327fdccc85ca9aaca3e006e5dee3c1c058081c9a33c311aa72a7f2272661986799ab4a3f731f35e6ed5053ac1a25b3
7
+ data.tar.gz: 6f7b02f7ab9a9fad169920f7b2dc04fdbc63a33fb8d5e70e458de9055fa1427acf453f88ca3937883d721a15d636af53ce943a6feeb68e980e0d28a04970222d
data/data/types.yml CHANGED
@@ -10,53 +10,62 @@
10
10
  IMAGE:
11
11
  TIFF:
12
12
  NAME: Tagged Image File Format (TIFF)
13
- MIME: image/tiff
13
+ MIME: image/tiff image/x-tiff image/tif image/x-tif application/tiff application/x-tiff application/tif application/x-tif
14
14
  PUID: fmt/353 fmt/154 fmt/153 fmt/156 fmt/155 fmt/152 x-fmt/399 x-fmt/388 x-fmt/387 fmt/202
15
15
  EXTENSIONS: tif,TIF,tiff,tifx,dng,nef
16
16
 
17
17
  JP2:
18
18
  NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
19
- MIME: image/jp2
20
- PUID: fmt/406
19
+ MIME: image/jp2, image/jpeg2000, image/jpeg2000-image, image/x-jpeg2000-image, image/jpx
20
+ PUID: fmt/463, fmt/151, x-fmt/392
21
21
  EXTENSIONS: jp2
22
22
 
23
23
  JPG:
24
24
  NAME: Joint Photographic Experts Group (JPEG)
25
- MIME: image/jpeg
25
+ MIME: image/jpeg, image/jpg, image/jp_, application/jpg, application/x-jpg, image/pjpeg, image/pipeg, image/vnd.swiftview-jpeg
26
26
  PUID: fmt/42 fmt/43 fmt/44 fmt/41 x-fmt/398 x-fmt/390 x-fmt/391 fmt/645
27
27
  EXTENSIONS: jpg,jpe,jpeg
28
28
 
29
29
  PNG:
30
30
  NAME: Portable Network Graphics (PNG)
31
- MIME: image/png
31
+ MIME: image/png, application/png, application/x-png
32
+ PUID: fmt/11, fmt/12, fmt/13,
32
33
  EXTENSIONS: png
33
34
 
35
+ APNG:
36
+ NAME: Animate Portable Network Graphics
37
+ MIME: image/vnd.mozilla.apng
38
+ PUID: fmt/935
39
+ EXTENSIONS: apng, png
40
+
34
41
  BMP:
35
42
  NAME: Device Independent Bitmap (DIP/BMP)
36
- MIME: image/bmp,image/x-ms-bmp
43
+ MIME: image/bmp, image/x-bmp, image/x-bitmap, image/x-xbitmap, image/x-win-bitmap, image/x-windows-bmp, image/ms-bmp, image/x-ms-bmp, application/bmp, application/x-bmp, application/x-win-bitmap
44
+ PUID: fmt/115, fmt/118, fmt/119, fmt/114, fmt/116, fmt/117, x-fmt/270
37
45
  EXTENSIONS: bmp
38
46
 
39
47
  GIF:
40
48
  NAME: Graphics Interchange Format (GIF)
41
49
  MIME: image/gif
50
+ PUID: fmt/3, fmt/4
42
51
  EXTENSIONS: gif
43
52
 
44
53
  PBM:
45
54
  NAME: Portable Bitmap Format (PBM)
46
- PUID: fmt/409
47
- MIME: image/x‑portable‑bitmap
55
+ MIME: image/x-portable-bitmap, image/pbm, image/x-pbm
56
+ PUID: fmt/409, x-fmt/164
48
57
  EXTENSIONS: pbm
49
58
 
50
59
  PGM:
51
60
  NAME: Portable GrayMap Format (PGM)
52
- PUID: fmt/406
53
- MIME: image/xportablegraymap
61
+ PUID: fmt/406, fmt/407
62
+ MIME: image/x-portable-graymap, image/pgm, image/x-pgm
54
63
  EXTENSIONS: pgm
55
64
 
56
65
  PPM:
57
66
  NAME: Portable Pixel Map (PPM)
58
- PUID: fmt/408
59
- MIME: image/xportablepixmap
67
+ PUID: fmt/408, x-fmt/178
68
+ MIME: image/x-portable-pixmap, application/ppm, application/x-ppm, image/ppm, image/x-ppm
60
69
  EXTENSIONS: ppm
61
70
 
62
71
  AUDIO:
@@ -133,7 +142,7 @@ TEXT:
133
142
  RTF:
134
143
  NAME: Rich Text Format (RTF)
135
144
  PUID: fmt/45
136
- MIME: text/rtf application/rtf
145
+ MIME: application/rtf text/rtf
137
146
  EXTENSIONS: rtf
138
147
 
139
148
  HTML:
@@ -143,8 +152,8 @@ TEXT:
143
152
 
144
153
  MSDOC:
145
154
  NAME: Microsoft Word Document (DOC)
146
- PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
147
- MIME: application/vnd.ms-word application/msword
155
+ PUID: fmt/609 fmt/39 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40 fmt/754
156
+ MIME: application/msword application/doc appl/text application/vnd.msword application/vnd.ms-word application/winword application/word application/x-msw6 application/x-msword
148
157
  EXTENSIONS: doc
149
158
 
150
159
  MSDOCX:
@@ -171,9 +180,14 @@ TEXT:
171
180
  MIME: application/vnd.wordperfect
172
181
  EXTENSIONS: wpd
173
182
 
183
+ OO_WRITER:
184
+ NAME: OpenDocument Text
185
+ MIME: application/vnd.oasis.opendocument.text, application/x-vnd.oasis.opendocument.text
186
+ PUID: fmt/136, fmt/290, fmt/291
187
+
174
188
  XML:
175
189
  NAME: Extensible Markup Language (XML)
176
- MIME: text/xml application/xml
190
+ MIME: text/xml application/xml application/x-xml
177
191
  PUID: fmt/101
178
192
  EXTENSIONS: xml
179
193
 
@@ -13,24 +13,13 @@ module Libis
13
13
  Config[:j2kdriver] = 'j2kdriver'
14
14
  Config[:soffice_path] = 'soffice'
15
15
  Config[:ghostscript_path] = 'gs'
16
- # Config[:pdfa_path] =
17
- # File.absolute_path(
18
- # File.join(
19
- # File.dirname(__FILE__), '..', '..', '..', 'tools', 'pdf', 'pdfa', 'pdfa'
20
- # )
21
- # )
22
- Config[:droid_path] =
23
- File.absolute_path(
24
- File.join(
25
- File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
26
- )
27
- )
28
- Config[:fido_path] =
29
- File.absolute_path(
30
- File.join(
31
- File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
32
- )
33
- )
16
+ Config[:droid_path] = '/opt/droid/droid.sh'
17
+ Config[:fido_path] = '/usr/local/bin/fido'
18
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
19
+ Config[:fido_formats] = [(File.join(data_dir, 'lias_formats.xml'))]
20
+ # noinspection RubyStringKeysInHashInspection
21
+ Config[:xml_validations] = [['archive/ead', File.join(data_dir, 'ead.xsd')]]
22
+ Config[:type_database] = File.join(data_dir, 'types.yml')
34
23
 
35
24
  end
36
25
  end
@@ -8,6 +8,12 @@ require 'fileutils'
8
8
 
9
9
  MiniMagick.logger.level = ::Logger::ERROR
10
10
 
11
+ MiniMagick.configure do |config|
12
+ # config.cli = :graphicsmagick
13
+ config.validate_on_create = false
14
+ config.validate_on_write = false
15
+ end
16
+
11
17
  module Libis
12
18
  module Format
13
19
  module Converter
@@ -3,45 +3,102 @@ require 'singleton'
3
3
  require 'tempfile'
4
4
  require 'csv'
5
5
 
6
- require 'libis/tools/extend/string'
7
- require 'libis/tools/logger'
8
- require 'libis/tools/command'
9
-
10
6
  require 'libis/format/config'
11
7
 
8
+ unless CSV::HeaderConverters.has_key?(:droid_headers)
9
+ CSV::HeaderConverters[:droid_headers] = lambda {|h|
10
+ h.encode(ConverterEncoding).downcase.strip.
11
+ gsub(/\W+/, "").to_sym
12
+ }
13
+ end
14
+
15
+ require_relative 'identification_tool'
16
+
12
17
  module Libis
13
18
  module Format
14
19
 
15
- class Droid
16
- include Singleton
17
- include ::Libis::Tools::Logger
20
+ class Droid < Libis::Format::IdentificationTool
21
+
22
+ def run_list(filelist)
23
+ runner(filelist)
24
+ end
18
25
 
19
- def self.run(file)
20
- self.instance.run file
26
+ def run_dir(dir, recursive = true)
27
+ profile = profile_file_name
28
+ report = result_file_name
29
+ create_profile(dir, profile, recursive)
30
+ create_report(profile, report)
31
+ parse_report(report)
21
32
  end
22
33
 
23
34
  def run(file)
24
- profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
25
- report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
26
- result = Libis::Tools::Command.run(
27
- Libis::Format::Config[:droid_path],
28
- '-a', file.escape_for_string,
29
- '-p', profile,
30
- '-q',
31
- )
32
- warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
33
- result = Libis::Tools::Command.run(
34
- Libis::Format::Config[:droid_path],
35
+ runner(file)
36
+ end
37
+
38
+ protected
39
+
40
+ def runner(file_or_list)
41
+ profile = profile_file_name
42
+ report = result_file_name
43
+ create_profile(file_or_list, profile)
44
+ create_report(profile, report)
45
+ parse_report(report)
46
+ end
47
+
48
+ def parse_report(report)
49
+ keys = [
50
+ :id, :parent_id, :uri, :filepath, :filename, :matchtype, :status, :filesize, :type, :extension,
51
+ :mod_time, :ext_mismatch, :hash, :format_count, :puid, :mimetype, :format_name, :format_version]
52
+ result = CSV.parse(File.readlines(report).join)
53
+ .map {|a| Hash[keys.zip(a)]}
54
+ .select {|a| a[:type] == 'File'}
55
+ # File.delete report
56
+ result.each do |r|
57
+ r.delete(:id)
58
+ r.delete(:parent_id)
59
+ r.delete(:uri)
60
+ r.delete(:filename)
61
+ r.delete(:status)
62
+ r.delete(:filesize)
63
+ r.delete(:type)
64
+ r.delete(:extension)
65
+ r.delete(:mod_time)
66
+ r.delete(:hash)
67
+ r.delete(:format_count)
68
+ r[:source] = :droid
69
+ end
70
+ process_output(result)
71
+ end
72
+
73
+ def create_report(profile, report)
74
+ args = [
35
75
  '-e', report,
36
76
  '-p', profile,
37
77
  '-q'
38
- )
39
- warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
78
+ ]
79
+ result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
80
+ raise RuntimeError, "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
40
81
  File.delete profile
41
- result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
42
- File.delete report
43
- result.map{|r|r.to_hash}
44
82
  end
83
+
84
+ def create_profile(file_or_list, profile, recursive = false)
85
+ args = []
86
+ files = (file_or_list.is_a?(Array)) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
87
+ files.each { |file| args << '-a' << file}
88
+ args << '-p' << profile << '-q'
89
+ args << '-R' if recursive
90
+ result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
91
+ raise RuntimeError, "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
92
+ end
93
+
94
+ def profile_file_name
95
+ File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
96
+ end
97
+
98
+ def result_file_name
99
+ File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
100
+ end
101
+
45
102
  end
46
103
 
47
104
  end
@@ -0,0 +1,55 @@
1
+ require_relative 'identification_tool'
2
+
3
+ module Libis
4
+ module Format
5
+
6
+ class ExtensionIdentification < Libis::Format::IdentificationTool
7
+
8
+ def run_list(filelist)
9
+
10
+ output = runner(nil, filelist)
11
+
12
+ process_output(output)
13
+
14
+ end
15
+
16
+ def run_dir(dir, recursive = true)
17
+
18
+ filelist = find_files(dir, recursive)
19
+
20
+ output = runner(nil, filelist)
21
+
22
+ process_output(output)
23
+
24
+ end
25
+
26
+ def run(file)
27
+
28
+ output = runner(file)
29
+
30
+ process_output(output)
31
+
32
+ end
33
+
34
+ protected
35
+
36
+ def runner(*args)
37
+
38
+ args.map do |file|
39
+ info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
40
+ if info
41
+ {
42
+ filepath: file,
43
+ mimetype: (info[:MIME].first rescue nil),
44
+ puid: (info[:PUID].first rescue nil),
45
+ matchtype: 'extension',
46
+ source: :type_database
47
+ }
48
+ end
49
+ end.cleanup
50
+
51
+ end
52
+
53
+ end
54
+ end
55
+ end
@@ -1,101 +1,86 @@
1
- require 'csv'
2
-
3
- require 'singleton'
4
1
  require 'libis/tools/extend/string'
5
- require 'libis/tools/logger'
6
2
  require 'libis/tools/command'
7
3
 
4
+ require 'csv'
8
5
  require 'libis/format/config'
9
- require 'libis/format/type_database'
6
+
7
+ require_relative 'identification_tool'
10
8
 
11
9
  module Libis
12
10
  module Format
13
11
 
14
- class Fido
15
- include Singleton
16
- include ::Libis::Tools::Logger
12
+ class Fido < Libis::Format::IdentificationTool
17
13
 
18
- BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
19
-
20
- def self.run(file, formats = nil)
21
- self.instance.run file, formats
14
+ def self.add_formats(formats_file)
15
+ self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
22
16
  end
23
17
 
24
- def run(file, xtra_formats = nil)
18
+ def self.del_formats(formats_file)
19
+ self.instance.formats.delete(formats_file)
20
+ end
25
21
 
26
- fido_results = []
22
+ attr_reader :formats
27
23
 
28
- fmt_list = formats.dup
29
- case xtra_formats
30
- when Array
31
- fmt_list += xtra_formats
32
- when String
33
- fmt_list << xtra_formats
34
- else
35
- # do nothing
24
+ def run_list(filelist)
25
+ create_list_file(filelist) do |list_file|
26
+ output = runner(nil, '-input', list_file.escape_for_string)
27
+ process_output(output)
36
28
  end
29
+ end
37
30
 
31
+ def run_dir(dir, recursive = true)
38
32
  args = []
39
- args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
40
- args << "#{file.escape_for_string}"
41
- fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
42
- warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
43
-
44
- keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
45
- fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
46
-
47
- fido_output.each do |x|
48
- if x[:status] == 'OK'
49
- x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
50
- next if BAD_MIMETYPES.include? x[:mimetype]
51
- x[:score] = 5
52
- case x[:matchtype]
53
- when 'signature'
54
- x[:score] += 5
55
- when 'container'
56
- typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
57
- ext = File.extname(file)
58
- x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
59
- else
60
- # do nothing
61
- end
62
- fido_results << x
63
- end
64
- end
65
-
66
- fido_results = fido_results.inject({}) do |result, value|
67
- result[value[:score]] ||= []
68
- result[value[:score]] << value
69
- result
70
- end
33
+ args << '-recurse' if recursive
34
+ output = runner(dir, *args)
35
+ process_output(output)
36
+ end
71
37
 
72
- max_score = fido_results.keys.max
38
+ def run(file)
39
+ output = runner(file)
40
+ process_output(output)
41
+ end
73
42
 
74
- # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result
75
- return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
43
+ protected
76
44
 
77
- fido_results[max_score].first
45
+ def initialize
46
+ super
47
+ @formats = Libis::Format::Config[:fido_formats].dup
48
+ bad_mimetype('application/vnd.oasis.opendocument.text')
49
+ bad_mimetype('application/vnd.oasis.opendocument.spreadsheet')
78
50
  end
79
51
 
80
- def self.add_format(f)
81
- instance.formats << f
82
- end
52
+ attr_writer :formats
83
53
 
84
- def self.formats
85
- instance.formats
86
- end
54
+ def runner(filename, *args)
55
+ # Load custome format definitions if present
56
+ args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
87
57
 
88
- protected
58
+ # Workaround for Fido performance bug
59
+ args << '-bufsize' << '1000'
89
60
 
90
- attr_reader :formats
61
+ # Add filename to argument list (optional)
62
+ args << "#{filename.escape_for_string}" if filename
91
63
 
92
- def initialize
93
- data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
94
- @formats = [(File.join(data_dir, 'lias_formats.xml'))]
95
- end
64
+ # No header output
65
+ args << '-q'
96
66
 
97
- def get_mimetype(puid)
98
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
67
+ # Run command and capture results
68
+ fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
69
+
70
+ # Log warning if needed
71
+ raise RuntimeError, "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
72
+
73
+ # Parse output (CSV) text into array and return result
74
+ keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
75
+ result = CSV.parse(fido[:out].join("\n"))
76
+ .map {|a| Hash[keys.zip(a)]}
77
+ .select {|a| a[:status] == 'OK'}
78
+ result.each do |r|
79
+ r.delete(:time)
80
+ r.delete(:status)
81
+ r.delete(:filesize)
82
+ r[:source] = :fido
83
+ end
99
84
  end
100
85
 
101
86
  end
@@ -0,0 +1,76 @@
1
+ require_relative 'identification_tool'
2
+
3
+ module Libis
4
+ module Format
5
+
6
+ class FileTool < Libis::Format::IdentificationTool
7
+
8
+ def run_list(filelist)
9
+
10
+ create_list_file(filelist) do |list_file|
11
+
12
+ output = runner(nil, '--files-from', list_file)
13
+
14
+ process_output(output)
15
+
16
+ end
17
+
18
+ end
19
+
20
+ def run_dir(dir, recursive = true)
21
+
22
+ filelist = find_files(dir, recursive)
23
+
24
+ create_list_file(filelist) do |list_file|
25
+
26
+ output = runner(nil, '--files-from', list_file)
27
+
28
+ process_output(output)
29
+
30
+ end
31
+
32
+ end
33
+
34
+ def run(file)
35
+
36
+ output = runner(file)
37
+
38
+ process_output(output)
39
+
40
+ end
41
+
42
+ protected
43
+
44
+ def runner(filename, *args)
45
+
46
+ # Create new argument list
47
+ opts = []
48
+
49
+ # Add fixed options
50
+ # -L : follow symlinks
51
+ # --mime-type : only print MIME type
52
+ opts << '-L' << '--mime-type'
53
+
54
+ # Append passed arguments
55
+ opts += args
56
+
57
+ # Finally add the filename to process
58
+ opts << filename.escape_for_string if filename
59
+
60
+ # Run the UNIX file command and capture the results
61
+ file_tool = ::Libis::Tools::Command.run('file', *opts)
62
+
63
+ raise RuntimeError, "File command errors: #{file_tool[:err].join("\n")}" unless file_tool[:err].empty?
64
+
65
+
66
+ # Parse output text into array and return result
67
+ file_tool[:out].map do |line|
68
+ r = line.split(/:\s+/)
69
+ {filepath: r[0], mimetype: r[1], matchtype: 'magic', source: :file}
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end