shared-mime-info 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/copying.txt +20 -0
  2. data/lib/shared-mime-info.rb +377 -0
  3. data/rakefile +30 -0
  4. metadata +47 -0
data/copying.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2006 Hank Lords <hanklords@gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,377 @@
1
+ # Copyright (c) 2006 Hank Lords <hanklords@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining
4
+ # a copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to
8
+ # permit persons to whom the Software is furnished to do so, subject to
9
+ # the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in all
12
+ # copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ require 'enumerator'
23
+ require 'rexml/document'
24
+
25
+ # shared-mime-info is a pure Ruby library for accessing the MIME info
26
+ # database provided by Freedesktop[http://freedesktop.org/] on
27
+ # {Standards/shared-mime-info-spec}[http://wiki.freedesktop.org/wiki/Standards_2fshared_2dmime_2dinfo_2dspec].
28
+ #
29
+ # This provides a way to guess the mime type of a file by doing both
30
+ # filename lookups and _magic_ file checks. This implementation tries to
31
+ # follow the version 0.13 of the
32
+ # specification[http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.13.html].
33
+ module MIME
34
+ VERSION = '0.1'
35
+
36
+ module Magic # :nodoc: all
37
+ class BadMagic < StandardError; end
38
+
39
+ class RootEntry
40
+ def initialize
41
+ @sub_entries = []
42
+ @indent = -1
43
+ end
44
+
45
+ def add_subentry(entry)
46
+ return unless entry.indent > @indent
47
+ if entry.indent == @indent + 1
48
+ @sub_entries << entry
49
+ elsif entry.indent > @indent + 1
50
+ if @sub_entries.last.respond_to? :add_subentry
51
+ @sub_entries.last.add_subentry entry
52
+ else
53
+ raise BadMagic
54
+ end
55
+ else
56
+ raise BadMagic
57
+ end
58
+ end
59
+
60
+ def check_file(f)
61
+ @sub_entries.empty? || @sub_entries.any? {|e| e.check_file f}
62
+ end
63
+ end
64
+
65
+ class Entry < RootEntry
66
+ attr_reader :indent
67
+ def initialize(indent, start_offset, value_length, value, mask, word_size, range_length)
68
+ super()
69
+ @indent = indent
70
+ @start_offset = start_offset
71
+ @value_length = value_length
72
+ @value = value.freeze
73
+ @mask = mask.freeze
74
+ @word_size = word_size
75
+ @range_length = range_length
76
+ end
77
+
78
+ def check_file(f)
79
+ check_entry(f) && super(f)
80
+ end
81
+
82
+ private
83
+ def check_entry(f)
84
+ f.pos = @start_offset
85
+ f.read(@value_length) == @value
86
+ end
87
+ end
88
+
89
+ def self.parse(magic)
90
+ parsed = RootEntry.new
91
+ entry = magic
92
+
93
+ until entry.empty?
94
+ entry = entry.sub /^(\d?)>(\d+)=/, ''
95
+ indent = $1.to_i
96
+ start_offset = $2.to_i
97
+ value_length = entry.unpack('n').first
98
+ value, entry = entry.unpack("x2a#{value_length}a*")
99
+
100
+ if entry[/./m] == '&'
101
+ mask, entry = entry.unpack("xa#{value_length}a*")
102
+ end
103
+
104
+ if entry[/./m] == '~'
105
+ entry =~ /^~(\d+)(.*)/m
106
+ word_size = $1
107
+ entry = $2
108
+ end
109
+
110
+ if entry[/./m] == '+'
111
+ entry =~ /^\+(\d+)(.*)/m
112
+ range_length = $1
113
+ entry = $2
114
+ end
115
+ entry = entry.sub /^[^\n]*\n/m, ''
116
+
117
+ parsed.add_subentry Entry.new(indent, start_offset, value_length, value, mask, word_size, range_length)
118
+ end
119
+
120
+ parsed
121
+ end
122
+ end
123
+
124
+ # Type represents a single mime type such as <b>text/html</b>.
125
+ class Type
126
+ attr_reader :magic_priority # :nodoc:
127
+
128
+ # Returns the type of a mime type as a String, such as <b>text/html</b>.
129
+ attr_reader :type
130
+
131
+ # Returns the media part of the type of a mime type as a string,
132
+ # such as <b>text</b> for a type of <b>text/html</b>.
133
+ def media; @type.split('/', 2).first; end
134
+
135
+ # Returns the subtype part of the type of a mime type as a string,
136
+ # such as <b>html</b> for a type of <b>text/html</b>.
137
+ def subtype; @type.split('/', 2).last; end
138
+
139
+ # Synonym of type.
140
+ def to_s; @type; end
141
+
142
+ # Returns a Hash of the comments associated with a mime type in
143
+ # different languages.
144
+ #
145
+ # MIME.types['text/html'].default
146
+ # => "HTML page"
147
+ #
148
+ # MIME.types['text/html'].comment['fr']
149
+ # => "page HTML"
150
+ def comment
151
+ file = ''
152
+ MIME.mime_dirs.each { |dir|
153
+ file = "#{dir}/#{@type}.xml"
154
+ break if File.file? file
155
+ }
156
+
157
+ open(file) { |f|
158
+ doc = REXML::Document.new f
159
+ comments = {}
160
+ REXML::XPath.match(doc, '*/comment').each { |c|
161
+ if att = c.attributes['xml:lang']
162
+ comments[att] = c.text
163
+ else
164
+ comments.default = c.text
165
+ end
166
+ }
167
+ }
168
+ comments
169
+ end
170
+
171
+ # Returns all the types this type is a subclass of.
172
+ def parents
173
+ file = ''
174
+ MIME.mime_dirs.each { |dir|
175
+ file = "#{dir}/#{@type}.xml"
176
+ break if File.file? file
177
+ }
178
+
179
+ open(file) { |f|
180
+ doc = REXML::Document.new f
181
+ REXML::XPath.match(doc, '*/sub-class-of').collect { |c|
182
+ MIME[c.attributes['type']]
183
+ }
184
+ }
185
+ end
186
+
187
+ # Equality test.
188
+ #
189
+ # MIME['text/html'] == 'text/html'
190
+ # => true
191
+ def ==(type)
192
+ if type.is_a? Type
193
+ @type == type.type
194
+ elsif type.respond_to? :to_str
195
+ @type == type
196
+ else
197
+ false
198
+ end
199
+ end
200
+
201
+ # Check if _filename_ is of this particular type by comparing it to
202
+ # some common extensions.
203
+ #
204
+ # MIME.types['text/html'].match_filename? 'index.html'
205
+ # => true
206
+ def match_filename?(filename)
207
+ @glob_patterns.any? {|pattern| File.fnmatch pattern, filename}
208
+ end
209
+
210
+ # Check if _file_ is of this particular type by looking for precise
211
+ # patterns (_magic_ numbers) in different locations of the file.
212
+ #
213
+ # _file_ must be an IO object opened with read permissions.
214
+ def match_file?(file)
215
+ if @magic.nil?
216
+ false
217
+ else
218
+ @magic.check_file file
219
+ end
220
+ end
221
+
222
+ def initialize(type) # :nodoc:
223
+ @type = type.freeze
224
+ @glob_patterns = []
225
+ end
226
+
227
+ def load_magic(magic, priority) # :nodoc:
228
+ @magic_priority = priority
229
+ @magic = Magic.parse magic
230
+ end
231
+
232
+ def add_glob(glob) # :nodoc:
233
+ @glob_patterns << glob.freeze
234
+ end
235
+ end
236
+
237
+ class << self
238
+ attr_reader :mime_dirs # :nodoc:
239
+
240
+ # Returns the MIME::Type object corresponding to _type_.
241
+ def [](type)
242
+ @types.fetch type, nil
243
+ end
244
+
245
+ # Look for the type of a file by doing successive checks on
246
+ # the filename patterns.
247
+ #
248
+ # Returns a MIME::Type object or _nil_ if nothing matches.
249
+ def check_globs(filename)
250
+ enum = Enumerable::Enumerator.new(@globs, :each_key)
251
+ found = enum.select { |pattern| File.fnmatch pattern, filename }
252
+
253
+ if found.empty?
254
+ downcase_filename = filename.downcase
255
+ found = enum.select { |pattern|
256
+ File.fnmatch pattern, downcase_filename
257
+ }
258
+ end
259
+
260
+ @globs[found.max]
261
+ end
262
+
263
+ # Look for the type of a file by doing successive checks on
264
+ # _magic_ numbers.
265
+ #
266
+ # Returns a MIME::Type object or _nil_ if nothing matches.
267
+ def check_magics(file)
268
+ if file.respond_to? :read
269
+ check_magics_with_priority(file, 0)
270
+ else
271
+ open(file) {|f| check_magics_with_priority(f, 0) }
272
+ end
273
+ end
274
+
275
+ # Look for the type of a file by doing successive checks with
276
+ # the filename patterns or magic numbers. If none of the matches
277
+ # are successful, returns a type of <b>application/octet-stream</b> if
278
+ # the file contains control characters at its beginning, or <b>text/plain</b> otherwise.
279
+ #
280
+ # Returns a MIME::Type object.
281
+ def check(filename)
282
+ check_special(filename) ||
283
+ open(filename) { |f|
284
+ check_magics_with_priority(f, 80) ||
285
+ check_globs(filename) ||
286
+ check_magics_with_priority(f, 0) ||
287
+ check_default(f)
288
+ }
289
+ end
290
+
291
+ private
292
+ def check_magics_with_priority(f, priority_threshold)
293
+ @magics.find { |t|
294
+ break if t.magic_priority < priority_threshold
295
+ t.match_file? f
296
+ }
297
+ end
298
+
299
+ def check_special(filename)
300
+ case File.ftype(filename)
301
+ when 'directory' then @types['inode/directory']
302
+ when 'characterSpecial' then @types['inode/chardevice']
303
+ when 'blockSpecial' then @types['inode/blockdevice']
304
+ when 'fifo' then @types['inode/fifo']
305
+ when 'socket' then @types['inode/socket']
306
+ else
307
+ nil
308
+ end
309
+ end
310
+
311
+ def check_default(f)
312
+ f.pos = 0
313
+ firsts = f.read(32) || ''
314
+ bytes = firsts.unpack('C*')
315
+ if bytes.any? {|byte| byte < 32 && ![9, 10, 13].include?(byte) }
316
+ @types['application/octet-stream']
317
+ else
318
+ @types['text/plain']
319
+ end
320
+ end
321
+
322
+ def load_globs(file)
323
+ open(file) { |f|
324
+ f.each { |line|
325
+ next if line =~ /^#/
326
+ cline = line.chomp
327
+ type, pattern = cline.split ':', 2
328
+ @types[type].add_glob pattern
329
+ @globs[pattern] = @types[type] unless @globs.has_key? pattern
330
+ }
331
+ }
332
+ end
333
+
334
+ def load_magic(file)
335
+ open(file) { |f|
336
+ raise 'Bad magic file' if f.readline != "MIME-Magic\0\n"
337
+
338
+ f.gets =~ /^\[(\d\d):(.+)\]/
339
+ priority = $1.to_i
340
+ type = $2
341
+ buf =''
342
+
343
+ f.each { |line|
344
+ if line =~ /^\[(\d\d):(.+)\]/
345
+ @types[type].load_magic buf, priority
346
+ @magics << @types[type]
347
+
348
+ priority = $1.to_i
349
+ type = $2
350
+ buf = ''
351
+ else
352
+ buf << line
353
+ end
354
+ }
355
+ }
356
+ end
357
+ end
358
+
359
+ xdg_data_home = ENV['XDG_DATA_HOME'] || "#{ENV['HOME']}/.local/share"
360
+ xdg_data_dirs = ENV['XDG_DATA_DIRS'] || "/usr/local/share/:/usr/share"
361
+
362
+ @mime_dirs = (xdg_data_home + ':' + xdg_data_dirs).split(':').collect { |dir|
363
+ "#{dir}/mime"
364
+ }
365
+
366
+ @types = Hash.new {|h,k| h[k] = Type.new(k)}
367
+ @magics = []
368
+ @globs = {}
369
+
370
+ @mime_dirs.each {|dir|
371
+ glob_file = "#{dir}/globs"
372
+ load_globs glob_file if File.file? glob_file
373
+
374
+ magic_file = "#{dir}/magic"
375
+ load_magic magic_file if File.file? magic_file
376
+ }
377
+ end
data/rakefile ADDED
@@ -0,0 +1,30 @@
1
+ require 'rake/rdoctask'
2
+ require 'rake/packagetask'
3
+ require 'rake/gempackagetask'
4
+
5
+ require 'lib/shared-mime-info'
6
+
7
+ PKG_FILES = FileList["lib/*.rb", "rakefile", "copying.txt"].to_a
8
+
9
+ spec = Gem::Specification.new do |s|
10
+ s.summary = "Library to guess the MIME type of a file with both filename lookup and magic file detection"
11
+ s.name = "shared-mime-info"
12
+ s.author = "Hank Lords"
13
+ s.email = "hanklords@gmail.com"
14
+ s.version = MIME::VERSION
15
+ s.has_rdoc = true
16
+ s.require_path = 'lib'
17
+ s.autorequire = 'rake'
18
+ s.files = PKG_FILES
19
+ # s.description = ""
20
+ end
21
+
22
+ Rake::RDocTask.new do |rd|
23
+ rd.rdoc_files.include "lib/*.rb"
24
+ rd.options << "--inline-source"
25
+ rd.main = "MIME"
26
+ end
27
+
28
+ Rake::GemPackageTask.new spec do |p|
29
+ p.need_tar_gz = true
30
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: shared-mime-info
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.1"
7
+ date: 2006-09-24 00:00:00 +02:00
8
+ summary: Library to guess the MIME type of a file with both filename lookup and magic file detection
9
+ require_paths:
10
+ - lib
11
+ email: hanklords@gmail.com
12
+ homepage:
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: rake
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - Hank Lords
30
+ files:
31
+ - lib/shared-mime-info.rb
32
+ - rakefile
33
+ - copying.txt
34
+ test_files: []
35
+
36
+ rdoc_options: []
37
+
38
+ extra_rdoc_files: []
39
+
40
+ executables: []
41
+
42
+ extensions: []
43
+
44
+ requirements: []
45
+
46
+ dependencies: []
47
+