libis-format 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +15 -0
  4. data/.travis.yml +36 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/data/ISOcoated.icc +0 -0
  12. data/data/PDFA_def.ps +32 -0
  13. data/data/ead.xsd +2728 -0
  14. data/data/lias_formats.xml +106 -0
  15. data/data/types.yml +213 -0
  16. data/lib/libis/format/converter/base.rb +103 -0
  17. data/lib/libis/format/converter/chain.rb +80 -0
  18. data/lib/libis/format/converter/repository.rb +110 -0
  19. data/lib/libis/format/converter.rb +11 -0
  20. data/lib/libis/format/droid.rb +38 -0
  21. data/lib/libis/format/fido.rb +109 -0
  22. data/lib/libis/format/identifier.rb +185 -0
  23. data/lib/libis/format/type_database.rb +170 -0
  24. data/lib/libis/format/version.rb +5 -0
  25. data/lib/libis/format.rb +12 -0
  26. data/lib/libis-format.rb +1 -0
  27. data/libis-format.gemspec +30 -0
  28. data/spec/data/Cevennes2.bmp +0 -0
  29. data/spec/data/Cevennes2.jp2 +0 -0
  30. data/spec/data/Cevennes2.ppm +22492 -0
  31. data/spec/data/test-ead.xml +392 -0
  32. data/spec/data/test-jpg.tif +0 -0
  33. data/spec/data/test-lzw.tif +0 -0
  34. data/spec/data/test.bmp +0 -0
  35. data/spec/data/test.doc +0 -0
  36. data/spec/data/test.docx +0 -0
  37. data/spec/data/test.gif +0 -0
  38. data/spec/data/test.ods +0 -0
  39. data/spec/data/test.odt +0 -0
  40. data/spec/data/test.pdf +0 -0
  41. data/spec/data/test.png +0 -0
  42. data/spec/data/test.ps +8631 -0
  43. data/spec/data/test.psd +0 -0
  44. data/spec/data/test.rtf +1455 -0
  45. data/spec/data/test.tif +0 -0
  46. data/spec/data/test.txt +12 -0
  47. data/spec/data/test.xcf +0 -0
  48. data/spec/data/test.xls +0 -0
  49. data/spec/data/test.xlsx +0 -0
  50. data/spec/data/test.xml +4 -0
  51. data/spec/identifier_spec.rb +59 -0
  52. data/spec/spec_helper.rb +9 -0
  53. data/spec/test_types.yml +12 -0
  54. data/spec/type_database_spec.rb +140 -0
  55. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  56. data/tools/droid/container-signature-20150307.xml +2235 -0
  57. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  58. data/tools/droid/droid.bat +154 -0
  59. data/tools/droid/droid.sh +138 -0
  60. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  61. data/tools/droid/lib/activation-1.1.jar +0 -0
  62. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  63. data/tools/droid/lib/antlr-3.2.jar +0 -0
  64. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  65. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  66. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  67. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  68. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  69. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  70. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  71. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  72. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  73. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  74. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  75. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  76. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  77. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  78. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  79. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  80. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  81. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  82. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  83. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  84. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  85. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  86. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  87. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  88. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  89. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  90. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  91. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  92. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  93. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  94. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  95. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  96. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  97. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  98. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  99. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  100. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  101. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  102. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  103. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  104. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  105. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  106. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  107. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  108. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  109. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  110. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  111. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  112. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  113. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  114. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  115. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  116. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  117. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  118. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  119. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  120. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  121. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  122. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  123. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  124. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  125. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  126. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  127. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  128. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  129. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  130. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  131. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  132. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  133. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  134. data/tools/droid/lib/jta-1.1.jar +0 -0
  135. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  136. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  137. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  138. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  139. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  140. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  141. data/tools/droid/lib/poi-3.7.jar +0 -0
  142. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  143. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  144. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  145. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  146. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  147. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  148. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  149. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  150. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  151. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  152. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  153. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  154. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  155. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  156. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  157. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  158. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  159. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  160. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  161. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  162. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  163. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  164. data/tools/droid/lib/xz-1.0.jar +0 -0
  165. data/tools/fido/__init__.py +0 -0
  166. data/tools/fido/argparselocal.py +2355 -0
  167. data/tools/fido/argparselocal.pyc +0 -0
  168. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  169. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  170. data/tools/fido/conf/dc.xsd +119 -0
  171. data/tools/fido/conf/dcmitype.xsd +53 -0
  172. data/tools/fido/conf/dcterms.xsd +383 -0
  173. data/tools/fido/conf/fido-formats.xsd +173 -0
  174. data/tools/fido/conf/format_extension_template.xml +105 -0
  175. data/tools/fido/conf/format_extensions.xml +498 -0
  176. data/tools/fido/conf/formats-v81.xml +38355 -0
  177. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  178. data/tools/fido/conf/versions.xml +8 -0
  179. data/tools/fido/fido.bat +4 -0
  180. data/tools/fido/fido.py +854 -0
  181. data/tools/fido/fido.sh +5 -0
  182. data/tools/fido/prepare.py +616 -0
  183. data/tools/fido/pronomutils.py +115 -0
  184. data/tools/fido/toxml.py +52 -0
  185. data/tools/fido/update_signatures.py +171 -0
  186. metadata +342 -0
@@ -0,0 +1,109 @@
1
+ require 'os'
2
+ require 'csv'
3
+ require 'singleton'
4
+
5
+ require 'libis/tools/extend/string'
6
+ require 'libis/tools/logger'
7
+ require 'libis/tools/command'
8
+
9
+ require 'libis/format/type_database'
10
+
11
+ module Libis
12
+ module Format
13
+
14
+ class Fido
15
+ include ::Libis::Tools::Logger
16
+ include Singleton
17
+
18
+ BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
19
+
20
+ def self.run(file, formats = nil)
21
+ instance.run file, formats
22
+ end
23
+
24
+ def run(file, xtra_formats = nil)
25
+
26
+ fido_results = []
27
+
28
+ fmt_list = formats.dup
29
+ case xtra_formats
30
+ when Array
31
+ fmt_list += xtra_formats
32
+ when String
33
+ fmt_list << xtra_formats
34
+ else
35
+ # do nothing
36
+ end
37
+
38
+ bin_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido'))
39
+ cmd = File.join(bin_dir, OS.windows? ? 'fido.bat' : 'fido.sh')
40
+ args = []
41
+ args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
42
+ args << "#{file.escape_for_string}"
43
+ fido = ::Libis::Tools::Command.run(cmd, *args)
44
+ warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
45
+
46
+ keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
47
+ fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
48
+ debug "Fido output: #{fido_output}"
49
+
50
+ fido_output.each do |x|
51
+ if x[:status] == 'OK'
52
+ x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
53
+ next if BAD_MIMETYPES.include? x[:mimetype]
54
+ x[:score] = 5
55
+ case x[:matchtype]
56
+ when 'signature'
57
+ x[:score] += 5
58
+ when 'container'
59
+ typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
60
+ ext = File.extname(file)
61
+ x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
62
+ else
63
+ # do nothing
64
+ end
65
+ fido_results << x
66
+ end
67
+ end
68
+
69
+ fido_results = fido_results.inject({}) do |result, value|
70
+ result[value[:score]] ||= []
71
+ result[value[:score]] << value
72
+ result
73
+ end
74
+
75
+ debug "Fido results: #{fido_results}"
76
+
77
+ max_score = fido_results.keys.max
78
+
79
+ # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result
80
+ return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
81
+
82
+ fido_results[max_score].first
83
+ end
84
+
85
+ def self.add_format(f)
86
+ instance.formats << f
87
+ end
88
+
89
+ def self.formats
90
+ instance.formats
91
+ end
92
+
93
+ protected
94
+
95
+ attr_reader :formats
96
+
97
+ def initialize
98
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
99
+ @formats = [(File.join(data_dir, 'lias_formats.xml'))]
100
+ end
101
+
102
+ def get_mimetype(puid)
103
+ ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
@@ -0,0 +1,185 @@
1
+ # encoding: utf-8
2
+
3
+ require 'singleton'
4
+
5
+ require 'libis-tools'
6
+ require 'libis/tools/extend/string'
7
+ require 'libis/tools/extend/empty'
8
+
9
+ require 'libis/format/type_database'
10
+
11
+ require_relative 'fido'
12
+ require_relative 'droid'
13
+
14
+ module Libis
15
+ module Format
16
+
17
+ class Identifier
18
+ include ::Libis::Tools::Logger
19
+ include Singleton
20
+
21
+ RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
22
+ FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
23
+
24
+ attr_reader :xml_validations
25
+
26
+ protected
27
+
28
+ def initialize
29
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
30
+ @fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
31
+ # noinspection RubyStringKeysInHashInspection
32
+ @xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
33
+ end
34
+
35
+ def result_ok?(result, who_is_asking = nil)
36
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
37
+ return false if result.empty?
38
+ return true unless result[:TYPE].empty?
39
+ return false if RETRY_MIMETYPES.include? result[:mimetype]
40
+ return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
41
+ !(result[:mimetype].empty? and result[:puid].empty?)
42
+ end
43
+
44
+ def get_puid(mimetype)
45
+ ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
46
+ end
47
+
48
+ public
49
+
50
+ def self.add_fido_format(f)
51
+ ::Libis::Format::Fido.add_format f
52
+ end
53
+
54
+ def self.add_xml_validation(mimetype, xsd_file)
55
+ instance.xml_validations[mimetype] = xsd_file
56
+ end
57
+
58
+ def self.xml_validations
59
+ instance.xml_validations
60
+ end
61
+
62
+ def self.get(file_path, options = nil)
63
+ instance.get file_path, options
64
+ end
65
+
66
+ def get(file, options = nil)
67
+
68
+ unless File.exists? file
69
+ error 'File %s cannot be found.', file
70
+ return nil
71
+ end
72
+ if File.directory? file
73
+ error '%s is a directory.', file
74
+ return nil
75
+ end
76
+
77
+ options ||= {}
78
+
79
+ result = {}
80
+
81
+ # use FIDO
82
+ # Note: FIDO does not always do a good job, mainly due to lacking container inspection.
83
+ # FIDO misses should be registered in
84
+ result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
85
+
86
+ # use DROID
87
+ result = get_droid_identification file, result
88
+
89
+ # use FILE
90
+ result = get_file_identification(file, result)
91
+
92
+ # Try file extension
93
+ result = get_extension_identification(file, result)
94
+
95
+ # determine XML type. Add custom types at runtime with
96
+ # Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
97
+ result = validate_against_xml_schema(file, result)
98
+
99
+ result ? info("Identification of '#{file}': '#{result}'") : warn("Could not identify MIME type of '#{file}'")
100
+
101
+ result
102
+ end
103
+
104
+ def get_fido_identification(file, result = {}, xtra_formats = nil)
105
+ return result if result_ok? result
106
+
107
+ fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
108
+
109
+ return result unless fido_result.is_a? Hash
110
+
111
+ result.merge! fido_result
112
+ result[:method] = 'fido'
113
+
114
+ debug "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" unless result.empty?
115
+ result
116
+ end
117
+
118
+ def get_droid_identification(file, result = {})
119
+ return result if result_ok? result, :DROID
120
+ droid_output = ::Libis::Format::Droid.run file
121
+ debug "DROID: #{droid_output}"
122
+ warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
123
+ result.clear
124
+ droid_output = droid_output.first
125
+ result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
126
+ result[:matchtype] = droid_output[:method]
127
+ result[:puid] = droid_output[:puid]
128
+ result[:format_name] = droid_output[:format_name]
129
+ result[:format_version] = droid_output[:format_version]
130
+ result[:method] = 'droid'
131
+
132
+ debug "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" if result
133
+ result
134
+ end
135
+
136
+ def get_file_identification(file, result = nil)
137
+ return result if result_ok? result
138
+ result = {}
139
+ begin
140
+ output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
141
+ mimetype = output.strip.split
142
+ if mimetype
143
+ debug "File result: '#{mimetype}'"
144
+ result[:mimetype] = mimetype
145
+ result[:puid] = get_puid(mimetype)
146
+ end
147
+ result[:method] = 'file'
148
+ rescue Exception
149
+ # ignored
150
+ end
151
+ result
152
+ end
153
+
154
+ def get_extension_identification(file, result = nil)
155
+ return result if result_ok? result
156
+ result = {}
157
+ info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
158
+ debug "File extension info: #{info}"
159
+ if info
160
+ result[:mimetype] = info[:MIME].first rescue nil
161
+ result[:puid] = info[:PUID].first rescue nil
162
+ end
163
+ result[:method] = 'extension'
164
+ result
165
+ end
166
+
167
+ def validate_against_xml_schema(file, result)
168
+ return result unless result[:mimetype] =~ /^(text|application)\/xml$/
169
+ doc = ::Libis::Tools::XmlDocument.open file
170
+ xml_validations.each do |mime, xsd_file|
171
+ next unless xsd_file
172
+ if doc.validates_against?(xsd_file)
173
+ debug "XML file validated against XML Schema: #{xsd_file}"
174
+ result[:mimetype] = mime
175
+ result[:puid] = nil
176
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
177
+ end
178
+ end
179
+ result
180
+ end
181
+
182
+ end
183
+
184
+ end
185
+ end
@@ -0,0 +1,170 @@
1
+ # coding: utf-8
2
+
3
+ require 'singleton'
4
+ require 'yaml'
5
+
6
+ require 'backports/rails/hash'
7
+ require 'libis/tools/logger'
8
+ require 'libis/tools/extend/string'
9
+
10
+ module Libis
11
+ module Format
12
+
13
+ class TypeDatabase
14
+ include Singleton
15
+ include ::Libis::Tools::Logger
16
+
17
+ def self.typeinfo(t)
18
+ self.instance.types[t] || {}
19
+ end
20
+
21
+ def self.enrich(info, map_keys = {})
22
+ return {} unless info.is_a? Hash
23
+ mapper = Hash.new {|hash,key| hash[key] = key}
24
+ mapper.merge! map_keys
25
+ unless (puid = info[mapper[:PUID]]).blank?
26
+ info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
27
+ end
28
+ unless (mime = info[mapper[:MIME]]).blank?
29
+ info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
30
+ end
31
+ unless (type_name = info[mapper[:TYPE]]).nil?
32
+ info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
33
+ info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
34
+ info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
35
+ info[mapper[:GROUP]] = self.type_group(type_name)
36
+ end
37
+ info
38
+ end
39
+
40
+ def self.type_group(t)
41
+ typeinfo(t)[:GROUP]
42
+ end
43
+
44
+ def self.type_mimetypes(t)
45
+ typeinfo(t)[:MIME] || []
46
+ end
47
+
48
+ def self.type_puids(t)
49
+ typeinfo(t)[:PUID] || []
50
+ end
51
+
52
+ def self.type_extentions(t)
53
+ typeinfo(t)[:EXTENSIONS] || []
54
+ end
55
+
56
+ def self.group_types(group)
57
+ self.instance.types.select do |_, v|
58
+ v[:GROUP] == group.to_sym
59
+ end.keys
60
+ end
61
+
62
+ def self.puid_infos(puid)
63
+ self.instance.types.select do |_, v|
64
+ v[:PUID].include? puid rescue false
65
+ end.values
66
+ end
67
+
68
+ def self.puid_types(puid)
69
+ self.instance.types.select do |_, v|
70
+ v[:PUID].include? puid rescue false
71
+ end.keys
72
+ end
73
+
74
+ def self.puid_groups(puid)
75
+ puid_types(puid).map do |t|
76
+ type_group t
77
+ end
78
+ end
79
+
80
+ def self.mime_infos(mime)
81
+ self.instance.types.select do |_, v|
82
+ v[:MIME].include? mime rescue false
83
+ end.values
84
+ end
85
+
86
+ def self.mime_types(mime)
87
+ self.instance.types.select do |_, v|
88
+ v[:MIME].include? mime rescue false
89
+ end.keys
90
+ end
91
+
92
+ def self.mime_groups(mime)
93
+ mime_types(mime).map do |t|
94
+ type_group t
95
+ end
96
+ end
97
+
98
+ def self.ext_infos(ext)
99
+ ext = ext.gsub /^\./, ''
100
+ self.instance.types.select do |_, v|
101
+ v[:EXTENSIONS].include?(ext) rescue false
102
+ end.values
103
+ end
104
+
105
+ def self.ext_types(ext)
106
+ ext = ext.gsub /^\./, ''
107
+ self.instance.types.select do |_, v|
108
+ v[:EXTENSIONS].include?(ext) rescue false
109
+ end.keys
110
+ end
111
+
112
+ def self.puid_typeinfo(puid)
113
+ self.instance.types.each do |_, v|
114
+ return v if v[:PUID] and v[:PUID].include?(puid)
115
+ end
116
+ nil
117
+ end
118
+
119
+ def self.known_mime?(mime)
120
+ self.instance.types.each do |_, v|
121
+ return true if v[:MIME].include? mime
122
+ end
123
+ false
124
+ end
125
+
126
+ attr_reader :types
127
+
128
+ def load_types(file_or_hash = {}, append = true)
129
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
130
+ # noinspection RubyResolve
131
+ hash.each do |group, type_info|
132
+ type_info.each do |type_name, info|
133
+ type_key = type_name.to_sym
134
+ info.symbolize_keys!
135
+ info[:TYPE] = type_key
136
+ info[:GROUP] = group.to_sym
137
+ info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
138
+ info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
139
+ info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
140
+ if @types.has_key?(type_key)
141
+ warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
142
+ info.merge!(@types[type_key]) do |_,v_new,v_old|
143
+ case v_old
144
+ when Array
145
+ append ? v_old + v_new : v_new + v_old
146
+ when Hash
147
+ append ? v_new.merge(v_old) : v_old.merge(v_new)
148
+ else
149
+ append ? v_old : v_new
150
+ end
151
+ end
152
+ end
153
+ @types[type_key] = info
154
+ end
155
+ end
156
+ end
157
+
158
+ protected
159
+
160
+ def initialize
161
+ @types = Hash.new
162
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
163
+ type_database = File.join(data_dir, 'types.yml')
164
+ load_types(type_database)
165
+ end
166
+
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,5 @@
1
+ module Libis
2
+ module Format
3
+ VERSION = '0.9.1'
4
+ end
5
+ end
@@ -0,0 +1,12 @@
1
+ require 'libis/format/version'
2
+
3
+ module Libis
4
+ module Format
5
+ autoload :TypeDatabase, 'libis/format/type_database'
6
+ autoload :Identifier, 'libis/format/identifier'
7
+ autoload :Fido, 'libis/format/fido'
8
+ autoload :Droid, 'libis/format/droid'
9
+
10
+ autoload :Converter, 'libis/format/converter'
11
+ end
12
+ end
@@ -0,0 +1 @@
1
+ require 'libis/format'
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'libis/format/version'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = 'libis-format'
10
+ spec.version = Libis::Format::VERSION
11
+ spec.authors = ['Kris Dekeyser']
12
+ spec.email = ['kris.dekeyser@libis.be']
13
+ spec.summary = %q{LIBIS File format format services.}
14
+ spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
15
+ spec.homepage = ''
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ['lib']
22
+
23
+ spec.add_development_dependency 'bundler', '~> 1.6'
24
+ spec.add_development_dependency 'rake', '~> 10.3'
25
+ spec.add_development_dependency 'rspec', '~> 3.1'
26
+ spec.add_development_dependency 'simplecov', '~> 0.9'
27
+
28
+ spec.add_runtime_dependency 'libis-tools', '~> 0.9'
29
+ spec.add_runtime_dependency 'os', '= 0.9.6'
30
+ end
Binary file
Binary file