libis-format 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +15 -0
  4. data/.travis.yml +36 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/data/ISOcoated.icc +0 -0
  12. data/data/PDFA_def.ps +32 -0
  13. data/data/ead.xsd +2728 -0
  14. data/data/lias_formats.xml +106 -0
  15. data/data/types.yml +213 -0
  16. data/lib/libis/format/converter/base.rb +103 -0
  17. data/lib/libis/format/converter/chain.rb +80 -0
  18. data/lib/libis/format/converter/repository.rb +110 -0
  19. data/lib/libis/format/converter.rb +11 -0
  20. data/lib/libis/format/droid.rb +38 -0
  21. data/lib/libis/format/fido.rb +109 -0
  22. data/lib/libis/format/identifier.rb +185 -0
  23. data/lib/libis/format/type_database.rb +170 -0
  24. data/lib/libis/format/version.rb +5 -0
  25. data/lib/libis/format.rb +12 -0
  26. data/lib/libis-format.rb +1 -0
  27. data/libis-format.gemspec +30 -0
  28. data/spec/data/Cevennes2.bmp +0 -0
  29. data/spec/data/Cevennes2.jp2 +0 -0
  30. data/spec/data/Cevennes2.ppm +22492 -0
  31. data/spec/data/test-ead.xml +392 -0
  32. data/spec/data/test-jpg.tif +0 -0
  33. data/spec/data/test-lzw.tif +0 -0
  34. data/spec/data/test.bmp +0 -0
  35. data/spec/data/test.doc +0 -0
  36. data/spec/data/test.docx +0 -0
  37. data/spec/data/test.gif +0 -0
  38. data/spec/data/test.ods +0 -0
  39. data/spec/data/test.odt +0 -0
  40. data/spec/data/test.pdf +0 -0
  41. data/spec/data/test.png +0 -0
  42. data/spec/data/test.ps +8631 -0
  43. data/spec/data/test.psd +0 -0
  44. data/spec/data/test.rtf +1455 -0
  45. data/spec/data/test.tif +0 -0
  46. data/spec/data/test.txt +12 -0
  47. data/spec/data/test.xcf +0 -0
  48. data/spec/data/test.xls +0 -0
  49. data/spec/data/test.xlsx +0 -0
  50. data/spec/data/test.xml +4 -0
  51. data/spec/identifier_spec.rb +59 -0
  52. data/spec/spec_helper.rb +9 -0
  53. data/spec/test_types.yml +12 -0
  54. data/spec/type_database_spec.rb +140 -0
  55. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  56. data/tools/droid/container-signature-20150307.xml +2235 -0
  57. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  58. data/tools/droid/droid.bat +154 -0
  59. data/tools/droid/droid.sh +138 -0
  60. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  61. data/tools/droid/lib/activation-1.1.jar +0 -0
  62. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  63. data/tools/droid/lib/antlr-3.2.jar +0 -0
  64. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  65. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  66. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  67. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  68. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  69. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  70. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  71. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  72. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  73. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  74. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  75. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  76. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  77. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  78. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  79. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  80. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  81. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  82. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  83. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  84. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  85. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  86. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  87. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  88. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  89. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  90. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  91. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  92. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  93. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  94. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  95. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  96. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  97. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  98. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  99. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  100. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  101. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  102. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  103. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  104. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  105. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  106. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  107. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  108. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  109. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  110. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  111. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  112. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  113. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  114. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  115. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  116. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  117. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  118. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  119. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  120. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  121. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  122. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  123. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  124. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  125. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  126. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  127. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  128. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  129. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  130. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  131. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  132. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  133. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  134. data/tools/droid/lib/jta-1.1.jar +0 -0
  135. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  136. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  137. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  138. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  139. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  140. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  141. data/tools/droid/lib/poi-3.7.jar +0 -0
  142. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  143. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  144. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  145. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  146. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  147. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  148. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  149. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  150. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  151. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  152. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  153. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  154. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  155. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  156. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  157. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  158. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  159. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  160. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  161. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  162. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  163. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  164. data/tools/droid/lib/xz-1.0.jar +0 -0
  165. data/tools/fido/__init__.py +0 -0
  166. data/tools/fido/argparselocal.py +2355 -0
  167. data/tools/fido/argparselocal.pyc +0 -0
  168. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  169. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  170. data/tools/fido/conf/dc.xsd +119 -0
  171. data/tools/fido/conf/dcmitype.xsd +53 -0
  172. data/tools/fido/conf/dcterms.xsd +383 -0
  173. data/tools/fido/conf/fido-formats.xsd +173 -0
  174. data/tools/fido/conf/format_extension_template.xml +105 -0
  175. data/tools/fido/conf/format_extensions.xml +498 -0
  176. data/tools/fido/conf/formats-v81.xml +38355 -0
  177. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  178. data/tools/fido/conf/versions.xml +8 -0
  179. data/tools/fido/fido.bat +4 -0
  180. data/tools/fido/fido.py +854 -0
  181. data/tools/fido/fido.sh +5 -0
  182. data/tools/fido/prepare.py +616 -0
  183. data/tools/fido/pronomutils.py +115 -0
  184. data/tools/fido/toxml.py +52 -0
  185. data/tools/fido/update_signatures.py +171 -0
  186. metadata +342 -0
@@ -0,0 +1,109 @@
1
+ require 'os'
2
+ require 'csv'
3
+ require 'singleton'
4
+
5
+ require 'libis/tools/extend/string'
6
+ require 'libis/tools/logger'
7
+ require 'libis/tools/command'
8
+
9
+ require 'libis/format/type_database'
10
+
11
+ module Libis
12
+ module Format
13
+
14
+ class Fido
15
+ include ::Libis::Tools::Logger
16
+ include Singleton
17
+
18
+ BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
19
+
20
+ def self.run(file, formats = nil)
21
+ instance.run file, formats
22
+ end
23
+
24
+ def run(file, xtra_formats = nil)
25
+
26
+ fido_results = []
27
+
28
+ fmt_list = formats.dup
29
+ case xtra_formats
30
+ when Array
31
+ fmt_list += xtra_formats
32
+ when String
33
+ fmt_list << xtra_formats
34
+ else
35
+ # do nothing
36
+ end
37
+
38
+ bin_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido'))
39
+ cmd = File.join(bin_dir, OS.windows? ? 'fido.bat' : 'fido.sh')
40
+ args = []
41
+ args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
42
+ args << "#{file.escape_for_string}"
43
+ fido = ::Libis::Tools::Command.run(cmd, *args)
44
+ warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
45
+
46
+ keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
47
+ fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
48
+ debug "Fido output: #{fido_output}"
49
+
50
+ fido_output.each do |x|
51
+ if x[:status] == 'OK'
52
+ x[:mimetype] = get_mimetype(x[:puid]) if x[:mimetype] == 'None'
53
+ next if BAD_MIMETYPES.include? x[:mimetype]
54
+ x[:score] = 5
55
+ case x[:matchtype]
56
+ when 'signature'
57
+ x[:score] += 5
58
+ when 'container'
59
+ typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(x[:puid])
60
+ ext = File.extname(file)
61
+ x[:score] += 2 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
62
+ else
63
+ # do nothing
64
+ end
65
+ fido_results << x
66
+ end
67
+ end
68
+
69
+ fido_results = fido_results.inject({}) do |result, value|
70
+ result[value[:score]] ||= []
71
+ result[value[:score]] << value
72
+ result
73
+ end
74
+
75
+ debug "Fido results: #{fido_results}"
76
+
77
+ max_score = fido_results.keys.max
78
+
79
+ # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result
80
+ return {} unless max_score and max_score >= 5 && fido_results[max_score].size == 1
81
+
82
+ fido_results[max_score].first
83
+ end
84
+
85
+ def self.add_format(f)
86
+ instance.formats << f
87
+ end
88
+
89
+ def self.formats
90
+ instance.formats
91
+ end
92
+
93
+ protected
94
+
95
+ attr_reader :formats
96
+
97
+ def initialize
98
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
99
+ @formats = [(File.join(data_dir, 'lias_formats.xml'))]
100
+ end
101
+
102
+ def get_mimetype(puid)
103
+ ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
@@ -0,0 +1,185 @@
1
+ # encoding: utf-8
2
+
3
+ require 'singleton'
4
+
5
+ require 'libis-tools'
6
+ require 'libis/tools/extend/string'
7
+ require 'libis/tools/extend/empty'
8
+
9
+ require 'libis/format/type_database'
10
+
11
+ require_relative 'fido'
12
+ require_relative 'droid'
13
+
14
+ module Libis
15
+ module Format
16
+
17
+ class Identifier
18
+ include ::Libis::Tools::Logger
19
+ include Singleton
20
+
21
+ RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
22
+ FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
23
+
24
+ attr_reader :xml_validations
25
+
26
+ protected
27
+
28
+ def initialize
29
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
30
+ @fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
31
+ # noinspection RubyStringKeysInHashInspection
32
+ @xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
33
+ end
34
+
35
+ def result_ok?(result, who_is_asking = nil)
36
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
37
+ return false if result.empty?
38
+ return true unless result[:TYPE].empty?
39
+ return false if RETRY_MIMETYPES.include? result[:mimetype]
40
+ return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
41
+ !(result[:mimetype].empty? and result[:puid].empty?)
42
+ end
43
+
44
+ def get_puid(mimetype)
45
+ ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
46
+ end
47
+
48
+ public
49
+
50
+ def self.add_fido_format(f)
51
+ ::Libis::Format::Fido.add_format f
52
+ end
53
+
54
+ def self.add_xml_validation(mimetype, xsd_file)
55
+ instance.xml_validations[mimetype] = xsd_file
56
+ end
57
+
58
+ def self.xml_validations
59
+ instance.xml_validations
60
+ end
61
+
62
+ def self.get(file_path, options = nil)
63
+ instance.get file_path, options
64
+ end
65
+
66
+ def get(file, options = nil)
67
+
68
+ unless File.exists? file
69
+ error 'File %s cannot be found.', file
70
+ return nil
71
+ end
72
+ if File.directory? file
73
+ error '%s is a directory.', file
74
+ return nil
75
+ end
76
+
77
+ options ||= {}
78
+
79
+ result = {}
80
+
81
+ # use FIDO
82
+ # Note: FIDO does not always do a good job, mainly due to lacking container inspection.
83
+ # FIDO misses should be registered in
84
+ result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
85
+
86
+ # use DROID
87
+ result = get_droid_identification file, result
88
+
89
+ # use FILE
90
+ result = get_file_identification(file, result)
91
+
92
+ # Try file extension
93
+ result = get_extension_identification(file, result)
94
+
95
+ # determine XML type. Add custom types at runtime with
96
+ # Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
97
+ result = validate_against_xml_schema(file, result)
98
+
99
+ result ? info("Identification of '#{file}': '#{result}'") : warn("Could not identify MIME type of '#{file}'")
100
+
101
+ result
102
+ end
103
+
104
+ def get_fido_identification(file, result = {}, xtra_formats = nil)
105
+ return result if result_ok? result
106
+
107
+ fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
108
+
109
+ return result unless fido_result.is_a? Hash
110
+
111
+ result.merge! fido_result
112
+ result[:method] = 'fido'
113
+
114
+ debug "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" unless result.empty?
115
+ result
116
+ end
117
+
118
+ def get_droid_identification(file, result = {})
119
+ return result if result_ok? result, :DROID
120
+ droid_output = ::Libis::Format::Droid.run file
121
+ debug "DROID: #{droid_output}"
122
+ warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
123
+ result.clear
124
+ droid_output = droid_output.first
125
+ result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
126
+ result[:matchtype] = droid_output[:method]
127
+ result[:puid] = droid_output[:puid]
128
+ result[:format_name] = droid_output[:format_name]
129
+ result[:format_version] = droid_output[:format_version]
130
+ result[:method] = 'droid'
131
+
132
+ debug "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" if result
133
+ result
134
+ end
135
+
136
+ def get_file_identification(file, result = nil)
137
+ return result if result_ok? result
138
+ result = {}
139
+ begin
140
+ output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
141
+ mimetype = output.strip.split
142
+ if mimetype
143
+ debug "File result: '#{mimetype}'"
144
+ result[:mimetype] = mimetype
145
+ result[:puid] = get_puid(mimetype)
146
+ end
147
+ result[:method] = 'file'
148
+ rescue Exception
149
+ # ignored
150
+ end
151
+ result
152
+ end
153
+
154
+ def get_extension_identification(file, result = nil)
155
+ return result if result_ok? result
156
+ result = {}
157
+ info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
158
+ debug "File extension info: #{info}"
159
+ if info
160
+ result[:mimetype] = info[:MIME].first rescue nil
161
+ result[:puid] = info[:PUID].first rescue nil
162
+ end
163
+ result[:method] = 'extension'
164
+ result
165
+ end
166
+
167
+ def validate_against_xml_schema(file, result)
168
+ return result unless result[:mimetype] =~ /^(text|application)\/xml$/
169
+ doc = ::Libis::Tools::XmlDocument.open file
170
+ xml_validations.each do |mime, xsd_file|
171
+ next unless xsd_file
172
+ if doc.validates_against?(xsd_file)
173
+ debug "XML file validated against XML Schema: #{xsd_file}"
174
+ result[:mimetype] = mime
175
+ result[:puid] = nil
176
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
177
+ end
178
+ end
179
+ result
180
+ end
181
+
182
+ end
183
+
184
+ end
185
+ end
@@ -0,0 +1,170 @@
1
+ # coding: utf-8
2
+
3
+ require 'singleton'
4
+ require 'yaml'
5
+
6
+ require 'backports/rails/hash'
7
+ require 'libis/tools/logger'
8
+ require 'libis/tools/extend/string'
9
+
10
+ module Libis
11
+ module Format
12
+
13
+ class TypeDatabase
14
+ include Singleton
15
+ include ::Libis::Tools::Logger
16
+
17
+ def self.typeinfo(t)
18
+ self.instance.types[t] || {}
19
+ end
20
+
21
+ def self.enrich(info, map_keys = {})
22
+ return {} unless info.is_a? Hash
23
+ mapper = Hash.new {|hash,key| hash[key] = key}
24
+ mapper.merge! map_keys
25
+ unless (puid = info[mapper[:PUID]]).blank?
26
+ info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
27
+ end
28
+ unless (mime = info[mapper[:MIME]]).blank?
29
+ info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
30
+ end
31
+ unless (type_name = info[mapper[:TYPE]]).nil?
32
+ info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
33
+ info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
34
+ info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
35
+ info[mapper[:GROUP]] = self.type_group(type_name)
36
+ end
37
+ info
38
+ end
39
+
40
+ def self.type_group(t)
41
+ typeinfo(t)[:GROUP]
42
+ end
43
+
44
+ def self.type_mimetypes(t)
45
+ typeinfo(t)[:MIME] || []
46
+ end
47
+
48
+ def self.type_puids(t)
49
+ typeinfo(t)[:PUID] || []
50
+ end
51
+
52
+ def self.type_extentions(t)
53
+ typeinfo(t)[:EXTENSIONS] || []
54
+ end
55
+
56
+ def self.group_types(group)
57
+ self.instance.types.select do |_, v|
58
+ v[:GROUP] == group.to_sym
59
+ end.keys
60
+ end
61
+
62
+ def self.puid_infos(puid)
63
+ self.instance.types.select do |_, v|
64
+ v[:PUID].include? puid rescue false
65
+ end.values
66
+ end
67
+
68
+ def self.puid_types(puid)
69
+ self.instance.types.select do |_, v|
70
+ v[:PUID].include? puid rescue false
71
+ end.keys
72
+ end
73
+
74
+ def self.puid_groups(puid)
75
+ puid_types(puid).map do |t|
76
+ type_group t
77
+ end
78
+ end
79
+
80
+ def self.mime_infos(mime)
81
+ self.instance.types.select do |_, v|
82
+ v[:MIME].include? mime rescue false
83
+ end.values
84
+ end
85
+
86
+ def self.mime_types(mime)
87
+ self.instance.types.select do |_, v|
88
+ v[:MIME].include? mime rescue false
89
+ end.keys
90
+ end
91
+
92
+ def self.mime_groups(mime)
93
+ mime_types(mime).map do |t|
94
+ type_group t
95
+ end
96
+ end
97
+
98
+ def self.ext_infos(ext)
99
+ ext = ext.gsub /^\./, ''
100
+ self.instance.types.select do |_, v|
101
+ v[:EXTENSIONS].include?(ext) rescue false
102
+ end.values
103
+ end
104
+
105
+ def self.ext_types(ext)
106
+ ext = ext.gsub /^\./, ''
107
+ self.instance.types.select do |_, v|
108
+ v[:EXTENSIONS].include?(ext) rescue false
109
+ end.keys
110
+ end
111
+
112
+ def self.puid_typeinfo(puid)
113
+ self.instance.types.each do |_, v|
114
+ return v if v[:PUID] and v[:PUID].include?(puid)
115
+ end
116
+ nil
117
+ end
118
+
119
+ def self.known_mime?(mime)
120
+ self.instance.types.each do |_, v|
121
+ return true if v[:MIME].include? mime
122
+ end
123
+ false
124
+ end
125
+
126
+ attr_reader :types
127
+
128
+ def load_types(file_or_hash = {}, append = true)
129
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
130
+ # noinspection RubyResolve
131
+ hash.each do |group, type_info|
132
+ type_info.each do |type_name, info|
133
+ type_key = type_name.to_sym
134
+ info.symbolize_keys!
135
+ info[:TYPE] = type_key
136
+ info[:GROUP] = group.to_sym
137
+ info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
138
+ info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
139
+ info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
140
+ if @types.has_key?(type_key)
141
+ warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
142
+ info.merge!(@types[type_key]) do |_,v_new,v_old|
143
+ case v_old
144
+ when Array
145
+ append ? v_old + v_new : v_new + v_old
146
+ when Hash
147
+ append ? v_new.merge(v_old) : v_old.merge(v_new)
148
+ else
149
+ append ? v_old : v_new
150
+ end
151
+ end
152
+ end
153
+ @types[type_key] = info
154
+ end
155
+ end
156
+ end
157
+
158
+ protected
159
+
160
+ def initialize
161
+ @types = Hash.new
162
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
163
+ type_database = File.join(data_dir, 'types.yml')
164
+ load_types(type_database)
165
+ end
166
+
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,5 @@
1
+ module Libis
2
+ module Format
3
+ VERSION = '0.9.1'
4
+ end
5
+ end
@@ -0,0 +1,12 @@
1
+ require 'libis/format/version'
2
+
3
+ module Libis
4
+ module Format
5
+ autoload :TypeDatabase, 'libis/format/type_database'
6
+ autoload :Identifier, 'libis/format/identifier'
7
+ autoload :Fido, 'libis/format/fido'
8
+ autoload :Droid, 'libis/format/droid'
9
+
10
+ autoload :Converter, 'libis/format/converter'
11
+ end
12
+ end
@@ -0,0 +1 @@
1
+ require 'libis/format'
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'libis/format/version'
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = 'libis-format'
10
+ spec.version = Libis::Format::VERSION
11
+ spec.authors = ['Kris Dekeyser']
12
+ spec.email = ['kris.dekeyser@libis.be']
13
+ spec.summary = %q{LIBIS File format format services.}
14
+ spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
15
+ spec.homepage = ''
16
+ spec.license = 'MIT'
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ['lib']
22
+
23
+ spec.add_development_dependency 'bundler', '~> 1.6'
24
+ spec.add_development_dependency 'rake', '~> 10.3'
25
+ spec.add_development_dependency 'rspec', '~> 3.1'
26
+ spec.add_development_dependency 'simplecov', '~> 0.9'
27
+
28
+ spec.add_runtime_dependency 'libis-tools', '~> 0.9'
29
+ spec.add_runtime_dependency 'os', '= 0.9.6'
30
+ end
Binary file
Binary file