libis-format 0.9.32 → 0.9.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/data/types.yml +30 -16
  3. data/lib/libis/format/config.rb +7 -18
  4. data/lib/libis/format/converter/image_converter.rb +6 -0
  5. data/lib/libis/format/droid.rb +82 -25
  6. data/lib/libis/format/extension_identification.rb +55 -0
  7. data/lib/libis/format/fido.rb +57 -72
  8. data/lib/libis/format/file_tool.rb +76 -0
  9. data/lib/libis/format/identification_tool.rb +174 -0
  10. data/lib/libis/format/identifier.rb +129 -117
  11. data/lib/libis/format/type_database.rb +36 -5
  12. data/lib/libis/format/version.rb +1 -1
  13. data/lib/libis/format.rb +3 -0
  14. data/libis-format.gemspec +2 -1
  15. data/spec/converter_spec.rb +6 -4
  16. data/spec/identifier_spec.rb +125 -34
  17. metadata +21 -126
  18. data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
  19. data/tools/droid/container-signature-20170330.xml +0 -3584
  20. data/tools/droid/droid-command-line-6.3.jar +0 -0
  21. data/tools/droid/droid.bat +0 -152
  22. data/tools/droid/droid.sh +0 -152
  23. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  24. data/tools/droid/lib/activation-1.1.jar +0 -0
  25. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  26. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  27. data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
  28. data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
  29. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  30. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  31. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  32. data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
  33. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  34. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  35. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  36. data/tools/droid/lib/commons-codec-1.10.jar +0 -0
  37. data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
  38. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  39. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  40. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  41. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  42. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  43. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  44. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  45. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  46. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  47. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  48. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  49. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  50. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  51. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  52. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  53. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  54. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  55. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  56. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  57. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  58. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  59. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  60. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  61. data/tools/droid/lib/droid-container-6.3.jar +0 -0
  62. data/tools/droid/lib/droid-core-6.3.jar +0 -0
  63. data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
  64. data/tools/droid/lib/droid-export-6.3.jar +0 -0
  65. data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
  66. data/tools/droid/lib/droid-help-6.3.jar +0 -0
  67. data/tools/droid/lib/droid-report-6.3.jar +0 -0
  68. data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
  69. data/tools/droid/lib/droid-results-6.3.jar +0 -0
  70. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  71. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  72. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  73. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  74. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  75. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  76. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  77. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  78. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  79. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  80. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  81. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  82. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  83. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  84. data/tools/droid/lib/jta-1.1.jar +0 -0
  85. data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
  86. data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
  87. data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
  88. data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
  89. data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
  90. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  91. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  92. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  93. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  94. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  95. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  96. data/tools/droid/lib/poi-3.13.jar +0 -0
  97. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  98. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  99. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  100. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  101. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  102. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  103. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  104. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  105. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  106. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  107. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  108. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  109. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  110. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  111. data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
  112. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  113. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  114. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  115. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  116. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  117. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  118. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  119. data/tools/droid/lib/xz-1.0.jar +0 -0
  120. data/tools/fido/__init__.py +0 -50
  121. data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
  122. data/tools/fido/conf/container-signature-20170330.xml +0 -3584
  123. data/tools/fido/conf/dc.xsd +0 -119
  124. data/tools/fido/conf/dcmitype.xsd +0 -53
  125. data/tools/fido/conf/dcterms.xsd +0 -383
  126. data/tools/fido/conf/fido-formats.xsd +0 -173
  127. data/tools/fido/conf/format_extension_template.xml +0 -105
  128. data/tools/fido/conf/format_extensions.xml +0 -484
  129. data/tools/fido/conf/formats-v90.xml +0 -48877
  130. data/tools/fido/conf/pronom-xml-v90.zip +0 -0
  131. data/tools/fido/conf/versions.xml +0 -8
  132. data/tools/fido/fido.bat +0 -4
  133. data/tools/fido/fido.py +0 -884
  134. data/tools/fido/fido.sh +0 -5
  135. data/tools/fido/package.py +0 -96
  136. data/tools/fido/prepare.py +0 -645
  137. data/tools/fido/pronomutils.py +0 -200
  138. data/tools/fido/toxml.py +0 -60
  139. data/tools/fido/update_signatures.py +0 -183
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11f5b2cb05e4748b97fef65f805e8222e9857354
4
- data.tar.gz: 49c667a039c6174b41aaabbb2800955989884fa0
3
+ metadata.gz: 5a13d9df8f5e85ccc58758e46e556066d16935b0
4
+ data.tar.gz: 3a80e5cfc6d7ea70f4c0091420012446ec801b74
5
5
  SHA512:
6
- metadata.gz: 9bbb1f1f21742ece1b4e34ce22a295327436b0c59b996a018bfba9c828377e5a79a1cefb9f4c2a01e080dbdc15e8c6586dc15b5ab66eca936550c840eeba380d
7
- data.tar.gz: 9214406a8a4b638801d8c4258e14401a3b65fb1283299e76664115148514476f3738832be3718b7d124583a3ea21e53b9a949f7557c7a63d42203f97e4381b14
6
+ metadata.gz: c6a45a701e5ec6e07ebf763ee2fe3d97eb327fdccc85ca9aaca3e006e5dee3c1c058081c9a33c311aa72a7f2272661986799ab4a3f731f35e6ed5053ac1a25b3
7
+ data.tar.gz: 6f7b02f7ab9a9fad169920f7b2dc04fdbc63a33fb8d5e70e458de9055fa1427acf453f88ca3937883d721a15d636af53ce943a6feeb68e980e0d28a04970222d
data/data/types.yml CHANGED
@@ -10,53 +10,62 @@
10
10
  IMAGE:
11
11
  TIFF:
12
12
  NAME: Tagged Image File Format (TIFF)
13
- MIME: image/tiff
13
+ MIME: image/tiff image/x-tiff image/tif image/x-tif application/tiff application/x-tiff application/tif application/x-tif
14
14
  PUID: fmt/353 fmt/154 fmt/153 fmt/156 fmt/155 fmt/152 x-fmt/399 x-fmt/388 x-fmt/387 fmt/202
15
15
  EXTENSIONS: tif,TIF,tiff,tifx,dng,nef
16
16
 
17
17
  JP2:
18
18
  NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
19
- MIME: image/jp2
20
- PUID: fmt/406
19
+ MIME: image/jp2, image/jpeg2000, image/jpeg2000-image, image/x-jpeg2000-image, image/jpx
20
+ PUID: fmt/463, fmt/151, x-fmt/392
21
21
  EXTENSIONS: jp2
22
22
 
23
23
  JPG:
24
24
  NAME: Joint Photographic Experts Group (JPEG)
25
- MIME: image/jpeg
25
+ MIME: image/jpeg, image/jpg, image/jp_, application/jpg, application/x-jpg, image/pjpeg, image/pipeg, image/vnd.swiftview-jpeg
26
26
  PUID: fmt/42 fmt/43 fmt/44 fmt/41 x-fmt/398 x-fmt/390 x-fmt/391 fmt/645
27
27
  EXTENSIONS: jpg,jpe,jpeg
28
28
 
29
29
  PNG:
30
30
  NAME: Portable Network Graphics (PNG)
31
- MIME: image/png
31
+ MIME: image/png, application/png, application/x-png
32
+ PUID: fmt/11, fmt/12, fmt/13,
32
33
  EXTENSIONS: png
33
34
 
35
+ APNG:
36
+ NAME: Animate Portable Network Graphics
37
+ MIME: image/vnd.mozilla.apng
38
+ PUID: fmt/935
39
+ EXTENSIONS: apng, png
40
+
34
41
  BMP:
35
42
  NAME: Device Independent Bitmap (DIP/BMP)
36
- MIME: image/bmp,image/x-ms-bmp
43
+ MIME: image/bmp, image/x-bmp, image/x-bitmap, image/x-xbitmap, image/x-win-bitmap, image/x-windows-bmp, image/ms-bmp, image/x-ms-bmp, application/bmp, application/x-bmp, application/x-win-bitmap
44
+ PUID: fmt/115, fmt/118, fmt/119, fmt/114, fmt/116, fmt/117, x-fmt/270
37
45
  EXTENSIONS: bmp
38
46
 
39
47
  GIF:
40
48
  NAME: Graphics Interchange Format (GIF)
41
49
  MIME: image/gif
50
+ PUID: fmt/3, fmt/4
42
51
  EXTENSIONS: gif
43
52
 
44
53
  PBM:
45
54
  NAME: Portable Bitmap Format (PBM)
46
- PUID: fmt/409
47
- MIME: image/x‑portable‑bitmap
55
+ MIME: image/x-portable-bitmap, image/pbm, image/x-pbm
56
+ PUID: fmt/409, x-fmt/164
48
57
  EXTENSIONS: pbm
49
58
 
50
59
  PGM:
51
60
  NAME: Portable GrayMap Format (PGM)
52
- PUID: fmt/406
53
- MIME: image/xportablegraymap
61
+ PUID: fmt/406, fmt/407
62
+ MIME: image/x-portable-graymap, image/pgm, image/x-pgm
54
63
  EXTENSIONS: pgm
55
64
 
56
65
  PPM:
57
66
  NAME: Portable Pixel Map (PPM)
58
- PUID: fmt/408
59
- MIME: image/xportablepixmap
67
+ PUID: fmt/408, x-fmt/178
68
+ MIME: image/x-portable-pixmap, application/ppm, application/x-ppm, image/ppm, image/x-ppm
60
69
  EXTENSIONS: ppm
61
70
 
62
71
  AUDIO:
@@ -133,7 +142,7 @@ TEXT:
133
142
  RTF:
134
143
  NAME: Rich Text Format (RTF)
135
144
  PUID: fmt/45
136
- MIME: text/rtf application/rtf
145
+ MIME: application/rtf text/rtf
137
146
  EXTENSIONS: rtf
138
147
 
139
148
  HTML:
@@ -143,8 +152,8 @@ TEXT:
143
152
 
144
153
  MSDOC:
145
154
  NAME: Microsoft Word Document (DOC)
146
- PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
147
- MIME: application/vnd.ms-word application/msword
155
+ PUID: fmt/609 fmt/39 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40 fmt/754
156
+ MIME: application/msword application/doc appl/text application/vnd.msword application/vnd.ms-word application/winword application/word application/x-msw6 application/x-msword
148
157
  EXTENSIONS: doc
149
158
 
150
159
  MSDOCX:
@@ -171,9 +180,14 @@ TEXT:
171
180
  MIME: application/vnd.wordperfect
172
181
  EXTENSIONS: wpd
173
182
 
183
+ OO_WRITER:
184
+ NAME: OpenDocument Text
185
+ MIME: application/vnd.oasis.opendocument.text, application/x-vnd.oasis.opendocument.text
186
+ PUID: fmt/136, fmt/290, fmt/291
187
+
174
188
  XML:
175
189
  NAME: Extensible Markup Language (XML)
176
- MIME: text/xml application/xml
190
+ MIME: text/xml application/xml application/x-xml
177
191
  PUID: fmt/101
178
192
  EXTENSIONS: xml
179
193
 
@@ -13,24 +13,13 @@ module Libis
13
13
  Config[:j2kdriver] = 'j2kdriver'
14
14
  Config[:soffice_path] = 'soffice'
15
15
  Config[:ghostscript_path] = 'gs'
16
- # Config[:pdfa_path] =
17
- # File.absolute_path(
18
- # File.join(
19
- # File.dirname(__FILE__), '..', '..', '..', 'tools', 'pdf', 'pdfa', 'pdfa'
20
- # )
21
- # )
22
- Config[:droid_path] =
23
- File.absolute_path(
24
- File.join(
25
- File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
26
- )
27
- )
28
- Config[:fido_path] =
29
- File.absolute_path(
30
- File.join(
31
- File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
32
- )
33
- )
16
+ Config[:droid_path] = '/opt/droid/droid.sh'
17
+ Config[:fido_path] = '/usr/local/bin/fido'
18
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
19
+ Config[:fido_formats] = [(File.join(data_dir, 'lias_formats.xml'))]
20
+ # noinspection RubyStringKeysInHashInspection
21
+ Config[:xml_validations] = [['archive/ead', File.join(data_dir, 'ead.xsd')]]
22
+ Config[:type_database] = File.join(data_dir, 'types.yml')
34
23
 
35
24
  end
36
25
  end
@@ -8,6 +8,12 @@ require 'fileutils'
8
8
 
9
9
  MiniMagick.logger.level = ::Logger::ERROR
10
10
 
11
+ MiniMagick.configure do |config|
12
+ # config.cli = :graphicsmagick
13
+ config.validate_on_create = false
14
+ config.validate_on_write = false
15
+ end
16
+
11
17
  module Libis
12
18
  module Format
13
19
  module Converter
@@ -3,45 +3,102 @@ require 'singleton'
3
3
  require 'tempfile'
4
4
  require 'csv'
5
5
 
6
- require 'libis/tools/extend/string'
7
- require 'libis/tools/logger'
8
- require 'libis/tools/command'
9
-
10
6
  require 'libis/format/config'
11
7
 
8
+ unless CSV::HeaderConverters.has_key?(:droid_headers)
9
+ CSV::HeaderConverters[:droid_headers] = lambda {|h|
10
+ h.encode(ConverterEncoding).downcase.strip.
11
+ gsub(/\W+/, "").to_sym
12
+ }
13
+ end
14
+
15
+ require_relative 'identification_tool'
16
+
12
17
  module Libis
13
18
  module Format
14
19
 
15
- class Droid
16
- include Singleton
17
- include ::Libis::Tools::Logger
20
+ class Droid < Libis::Format::IdentificationTool
21
+
22
+ def run_list(filelist)
23
+ runner(filelist)
24
+ end
18
25
 
19
- def self.run(file)
20
- self.instance.run file
26
+ def run_dir(dir, recursive = true)
27
+ profile = profile_file_name
28
+ report = result_file_name
29
+ create_profile(dir, profile, recursive)
30
+ create_report(profile, report)
31
+ parse_report(report)
21
32
  end
22
33
 
23
34
  def run(file)
24
- profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
25
- report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
26
- result = Libis::Tools::Command.run(
27
- Libis::Format::Config[:droid_path],
28
- '-a', file.escape_for_string,
29
- '-p', profile,
30
- '-q',
31
- )
32
- warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
33
- result = Libis::Tools::Command.run(
34
- Libis::Format::Config[:droid_path],
35
+ runner(file)
36
+ end
37
+
38
+ protected
39
+
40
+ def runner(file_or_list)
41
+ profile = profile_file_name
42
+ report = result_file_name
43
+ create_profile(file_or_list, profile)
44
+ create_report(profile, report)
45
+ parse_report(report)
46
+ end
47
+
48
+ def parse_report(report)
49
+ keys = [
50
+ :id, :parent_id, :uri, :filepath, :filename, :matchtype, :status, :filesize, :type, :extension,
51
+ :mod_time, :ext_mismatch, :hash, :format_count, :puid, :mimetype, :format_name, :format_version]
52
+ result = CSV.parse(File.readlines(report).join)
53
+ .map {|a| Hash[keys.zip(a)]}
54
+ .select {|a| a[:type] == 'File'}
55
+ # File.delete report
56
+ result.each do |r|
57
+ r.delete(:id)
58
+ r.delete(:parent_id)
59
+ r.delete(:uri)
60
+ r.delete(:filename)
61
+ r.delete(:status)
62
+ r.delete(:filesize)
63
+ r.delete(:type)
64
+ r.delete(:extension)
65
+ r.delete(:mod_time)
66
+ r.delete(:hash)
67
+ r.delete(:format_count)
68
+ r[:source] = :droid
69
+ end
70
+ process_output(result)
71
+ end
72
+
73
+ def create_report(profile, report)
74
+ args = [
35
75
  '-e', report,
36
76
  '-p', profile,
37
77
  '-q'
38
- )
39
- warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
78
+ ]
79
+ result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
80
+ raise RuntimeError, "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
40
81
  File.delete profile
41
- result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
42
- File.delete report
43
- result.map{|r|r.to_hash}
44
82
  end
83
+
84
+ def create_profile(file_or_list, profile, recursive = false)
85
+ args = []
86
+ files = (file_or_list.is_a?(Array)) ? file_or_list.map(&:escape_for_string) : [file_or_list.escape_for_string]
87
+ files.each { |file| args << '-a' << file}
88
+ args << '-p' << profile << '-q'
89
+ args << '-R' if recursive
90
+ result = Libis::Tools::Command.run(Libis::Format::Config[:droid_path], *args)
91
+ raise RuntimeError, "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
92
+ end
93
+
94
+ def profile_file_name
95
+ File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
96
+ end
97
+
98
+ def result_file_name
99
+ File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
100
+ end
101
+
45
102
  end
46
103
 
47
104
  end
@@ -0,0 +1,55 @@
1
+ require_relative 'identification_tool'
2
+
3
+ module Libis
4
+ module Format
5
+
6
+ class ExtensionIdentification < Libis::Format::IdentificationTool
7
+
8
+ def run_list(filelist)
9
+
10
+ output = runner(nil, filelist)
11
+
12
+ process_output(output)
13
+
14
+ end
15
+
16
+ def run_dir(dir, recursive = true)
17
+
18
+ filelist = find_files(dir, recursive)
19
+
20
+ output = runner(nil, filelist)
21
+
22
+ process_output(output)
23
+
24
+ end
25
+
26
+ def run(file)
27
+
28
+ output = runner(file)
29
+
30
+ process_output(output)
31
+
32
+ end
33
+
34
+ protected
35
+
36
+ def runner(*args)
37
+
38
+ args.map do |file|
39
+ info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
40
+ if info
41
+ {
42
+ filepath: file,
43
+ mimetype: (info[:MIME].first rescue nil),
44
+ puid: (info[:PUID].first rescue nil),
45
+ matchtype: 'extension',
46
+ source: :type_database
47
+ }
48
+ end
49
+ end.cleanup
50
+
51
+ end
52
+
53
+ end
54
+ end
55
+ end
@@ -1,101 +1,86 @@
1
- require 'csv'
2
-
3
- require 'singleton'
4
1
  require 'libis/tools/extend/string'
5
- require 'libis/tools/logger'
6
2
  require 'libis/tools/command'
7
3
 
4
+ require 'csv'
8
5
  require 'libis/format/config'
9
- require 'libis/format/type_database'
6
+
7
+ require_relative 'identification_tool'
10
8
 
11
9
  module Libis
12
10
  module Format
13
11
 
14
- class Fido
15
- include Singleton
16
- include ::Libis::Tools::Logger
12
+ class Fido < Libis::Format::IdentificationTool
17
13
 
18
- BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
19
-
20
- def self.run(file, formats = nil)
21
- self.instance.run file, formats
14
+ def self.add_formats(formats_file)
15
+ self.instance.formats << formats_file unless self.instance.formats.include?(formats_file)
22
16
  end
23
17
 
24
- def run(file, xtra_formats = nil)
18
+ def self.del_formats(formats_file)
19
+ self.instance.formats.delete(formats_file)
20
+ end
25
21
 
26
- fido_results = []
22
+ attr_reader :formats
27
23
 
28
- fmt_list = formats.dup
29
- case xtra_formats
30
- when Array
31
- fmt_list += xtra_formats
32
- when String
33
- fmt_list << xtra_formats
34
- else
35
- # do nothing
24
+ def run_list(filelist)
25
+ create_list_file(filelist) do |list_file|
26
+ output = runner(nil, '-input', list_file.escape_for_string)
27
+ process_output(output)
36
28
  end
29
+ end
37
30
 
31
+ def run_dir(dir, recursive = true)
38
32
  args = []
39
- args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
40
- args << "#{file.escape_for_string}"
41
- fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
42
- warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
43
-
44
- keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
45
- fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
46
-
47
- fido_output.each do |x|
48
- if x[:status] == 'OK'
49
- x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
50
- next if BAD_MIMETYPES.include? x[:mimetype]
51
- x[:score] = 5
52
- case x[:matchtype]
53
- when 'signature'
54
- x[:score] += 5
55
- when 'container'
56
- typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
57
- ext = File.extname(file)
58
- x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
59
- else
60
- # do nothing
61
- end
62
- fido_results << x
63
- end
64
- end
65
-
66
- fido_results = fido_results.inject({}) do |result, value|
67
- result[value[:score]] ||= []
68
- result[value[:score]] << value
69
- result
70
- end
33
+ args << '-recurse' if recursive
34
+ output = runner(dir, *args)
35
+ process_output(output)
36
+ end
71
37
 
72
- max_score = fido_results.keys.max
38
+ def run(file)
39
+ output = runner(file)
40
+ process_output(output)
41
+ end
73
42
 
74
- # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result
75
- return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
43
+ protected
76
44
 
77
- fido_results[max_score].first
45
+ def initialize
46
+ super
47
+ @formats = Libis::Format::Config[:fido_formats].dup
48
+ bad_mimetype('application/vnd.oasis.opendocument.text')
49
+ bad_mimetype('application/vnd.oasis.opendocument.spreadsheet')
78
50
  end
79
51
 
80
- def self.add_format(f)
81
- instance.formats << f
82
- end
52
+ attr_writer :formats
83
53
 
84
- def self.formats
85
- instance.formats
86
- end
54
+ def runner(filename, *args)
55
+ # Load custome format definitions if present
56
+ args << '-loadformats' << "#{formats.join(',')}" unless formats.empty?
87
57
 
88
- protected
58
+ # Workaround for Fido performance bug
59
+ args << '-bufsize' << '1000'
89
60
 
90
- attr_reader :formats
61
+ # Add filename to argument list (optional)
62
+ args << "#{filename.escape_for_string}" if filename
91
63
 
92
- def initialize
93
- data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
94
- @formats = [(File.join(data_dir, 'lias_formats.xml'))]
95
- end
64
+ # No header output
65
+ args << '-q'
96
66
 
97
- def get_mimetype(puid)
98
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
67
+ # Run command and capture results
68
+ fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
69
+
70
+ # Log warning if needed
71
+ raise RuntimeError, "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
72
+
73
+ # Parse output (CSV) text into array and return result
74
+ keys = [:status, :time, :puid, :format_name, :format_version, :filesize, :filepath, :mimetype, :matchtype]
75
+ result = CSV.parse(fido[:out].join("\n"))
76
+ .map {|a| Hash[keys.zip(a)]}
77
+ .select {|a| a[:status] == 'OK'}
78
+ result.each do |r|
79
+ r.delete(:time)
80
+ r.delete(:status)
81
+ r.delete(:filesize)
82
+ r[:source] = :fido
83
+ end
99
84
  end
100
85
 
101
86
  end
@@ -0,0 +1,76 @@
1
+ require_relative 'identification_tool'
2
+
3
+ module Libis
4
+ module Format
5
+
6
+ class FileTool < Libis::Format::IdentificationTool
7
+
8
+ def run_list(filelist)
9
+
10
+ create_list_file(filelist) do |list_file|
11
+
12
+ output = runner(nil, '--files-from', list_file)
13
+
14
+ process_output(output)
15
+
16
+ end
17
+
18
+ end
19
+
20
+ def run_dir(dir, recursive = true)
21
+
22
+ filelist = find_files(dir, recursive)
23
+
24
+ create_list_file(filelist) do |list_file|
25
+
26
+ output = runner(nil, '--files-from', list_file)
27
+
28
+ process_output(output)
29
+
30
+ end
31
+
32
+ end
33
+
34
+ def run(file)
35
+
36
+ output = runner(file)
37
+
38
+ process_output(output)
39
+
40
+ end
41
+
42
+ protected
43
+
44
+ def runner(filename, *args)
45
+
46
+ # Create new argument list
47
+ opts = []
48
+
49
+ # Add fixed options
50
+ # -L : follow symlinks
51
+ # --mime-type : only print MIME type
52
+ opts << '-L' << '--mime-type'
53
+
54
+ # Append passed arguments
55
+ opts += args
56
+
57
+ # Finally add the filename to process
58
+ opts << filename.escape_for_string if filename
59
+
60
+ # Run the UNIX file command and capture the results
61
+ file_tool = ::Libis::Tools::Command.run('file', *opts)
62
+
63
+ raise RuntimeError, "File command errors: #{file_tool[:err].join("\n")}" unless file_tool[:err].empty?
64
+
65
+
66
+ # Parse output text into array and return result
67
+ file_tool[:out].map do |line|
68
+ r = line.split(/:\s+/)
69
+ {filepath: r[0], mimetype: r[1], matchtype: 'magic', source: :file}
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end