shared-mime-info 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/copying.txt +20 -0
- data/lib/shared-mime-info.rb +377 -0
- data/rakefile +30 -0
- metadata +47 -0
data/copying.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2006 Hank Lords <hanklords@gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be included in all
|
12
|
+
copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@@ -0,0 +1,377 @@
|
|
1
|
+
# Copyright (c) 2006 Hank Lords <hanklords@gmail.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
# a copy of this software and associated documentation files (the
|
5
|
+
# "Software"), to deal in the Software without restriction, including
|
6
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
# the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be included in all
|
12
|
+
# copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
17
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
18
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
19
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
20
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
require 'enumerator'
|
23
|
+
require 'rexml/document'
|
24
|
+
|
25
|
+
# shared-mime-info is a pure Ruby library for accessing the MIME info
|
26
|
+
# database provided by Freedesktop[http://freedesktop.org/] on
|
27
|
+
# {Standards/shared-mime-info-spec}[http://wiki.freedesktop.org/wiki/Standards_2fshared_2dmime_2dinfo_2dspec].
|
28
|
+
#
|
29
|
+
# This provides a way to guess the mime type of a file by doing both
|
30
|
+
# filename lookups and _magic_ file checks. This implementation tries to
|
31
|
+
# follow the version 0.13 of the
|
32
|
+
# specification[http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.13.html].
|
33
|
+
module MIME
|
34
|
+
VERSION = '0.1'
|
35
|
+
|
36
|
+
module Magic # :nodoc: all
|
37
|
+
class BadMagic < StandardError; end
|
38
|
+
|
39
|
+
class RootEntry
|
40
|
+
def initialize
|
41
|
+
@sub_entries = []
|
42
|
+
@indent = -1
|
43
|
+
end
|
44
|
+
|
45
|
+
def add_subentry(entry)
|
46
|
+
return unless entry.indent > @indent
|
47
|
+
if entry.indent == @indent + 1
|
48
|
+
@sub_entries << entry
|
49
|
+
elsif entry.indent > @indent + 1
|
50
|
+
if @sub_entries.last.respond_to? :add_subentry
|
51
|
+
@sub_entries.last.add_subentry entry
|
52
|
+
else
|
53
|
+
raise BadMagic
|
54
|
+
end
|
55
|
+
else
|
56
|
+
raise BadMagic
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def check_file(f)
|
61
|
+
@sub_entries.empty? || @sub_entries.any? {|e| e.check_file f}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Entry < RootEntry
|
66
|
+
attr_reader :indent
|
67
|
+
def initialize(indent, start_offset, value_length, value, mask, word_size, range_length)
|
68
|
+
super()
|
69
|
+
@indent = indent
|
70
|
+
@start_offset = start_offset
|
71
|
+
@value_length = value_length
|
72
|
+
@value = value.freeze
|
73
|
+
@mask = mask.freeze
|
74
|
+
@word_size = word_size
|
75
|
+
@range_length = range_length
|
76
|
+
end
|
77
|
+
|
78
|
+
def check_file(f)
|
79
|
+
check_entry(f) && super(f)
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
def check_entry(f)
|
84
|
+
f.pos = @start_offset
|
85
|
+
f.read(@value_length) == @value
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.parse(magic)
|
90
|
+
parsed = RootEntry.new
|
91
|
+
entry = magic
|
92
|
+
|
93
|
+
until entry.empty?
|
94
|
+
entry = entry.sub /^(\d?)>(\d+)=/, ''
|
95
|
+
indent = $1.to_i
|
96
|
+
start_offset = $2.to_i
|
97
|
+
value_length = entry.unpack('n').first
|
98
|
+
value, entry = entry.unpack("x2a#{value_length}a*")
|
99
|
+
|
100
|
+
if entry[/./m] == '&'
|
101
|
+
mask, entry = entry.unpack("xa#{value_length}a*")
|
102
|
+
end
|
103
|
+
|
104
|
+
if entry[/./m] == '~'
|
105
|
+
entry =~ /^~(\d+)(.*)/m
|
106
|
+
word_size = $1
|
107
|
+
entry = $2
|
108
|
+
end
|
109
|
+
|
110
|
+
if entry[/./m] == '+'
|
111
|
+
entry =~ /^\+(\d+)(.*)/m
|
112
|
+
range_length = $1
|
113
|
+
entry = $2
|
114
|
+
end
|
115
|
+
entry = entry.sub /^[^\n]*\n/m, ''
|
116
|
+
|
117
|
+
parsed.add_subentry Entry.new(indent, start_offset, value_length, value, mask, word_size, range_length)
|
118
|
+
end
|
119
|
+
|
120
|
+
parsed
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Type represents a single mime type such as <b>text/html</b>.
|
125
|
+
class Type
|
126
|
+
attr_reader :magic_priority # :nodoc:
|
127
|
+
|
128
|
+
# Returns the type of a mime type as a String, such as <b>text/html</b>.
|
129
|
+
attr_reader :type
|
130
|
+
|
131
|
+
# Returns the media part of the type of a mime type as a string,
|
132
|
+
# such as <b>text</b> for a type of <b>text/html</b>.
|
133
|
+
def media; @type.split('/', 2).first; end
|
134
|
+
|
135
|
+
# Returns the subtype part of the type of a mime type as a string,
|
136
|
+
# such as <b>html</b> for a type of <b>text/html</b>.
|
137
|
+
def subtype; @type.split('/', 2).last; end
|
138
|
+
|
139
|
+
# Synonym of type.
|
140
|
+
def to_s; @type; end
|
141
|
+
|
142
|
+
# Returns a Hash of the comments associated with a mime type in
|
143
|
+
# different languages.
|
144
|
+
#
|
145
|
+
# MIME.types['text/html'].default
|
146
|
+
# => "HTML page"
|
147
|
+
#
|
148
|
+
# MIME.types['text/html'].comment['fr']
|
149
|
+
# => "page HTML"
|
150
|
+
def comment
|
151
|
+
file = ''
|
152
|
+
MIME.mime_dirs.each { |dir|
|
153
|
+
file = "#{dir}/#{@type}.xml"
|
154
|
+
break if File.file? file
|
155
|
+
}
|
156
|
+
|
157
|
+
open(file) { |f|
|
158
|
+
doc = REXML::Document.new f
|
159
|
+
comments = {}
|
160
|
+
REXML::XPath.match(doc, '*/comment').each { |c|
|
161
|
+
if att = c.attributes['xml:lang']
|
162
|
+
comments[att] = c.text
|
163
|
+
else
|
164
|
+
comments.default = c.text
|
165
|
+
end
|
166
|
+
}
|
167
|
+
}
|
168
|
+
comments
|
169
|
+
end
|
170
|
+
|
171
|
+
# Returns all the types this type is a subclass of.
|
172
|
+
def parents
|
173
|
+
file = ''
|
174
|
+
MIME.mime_dirs.each { |dir|
|
175
|
+
file = "#{dir}/#{@type}.xml"
|
176
|
+
break if File.file? file
|
177
|
+
}
|
178
|
+
|
179
|
+
open(file) { |f|
|
180
|
+
doc = REXML::Document.new f
|
181
|
+
REXML::XPath.match(doc, '*/sub-class-of').collect { |c|
|
182
|
+
MIME[c.attributes['type']]
|
183
|
+
}
|
184
|
+
}
|
185
|
+
end
|
186
|
+
|
187
|
+
# Equality test.
|
188
|
+
#
|
189
|
+
# MIME['text/html'] == 'text/html'
|
190
|
+
# => true
|
191
|
+
def ==(type)
|
192
|
+
if type.is_a? Type
|
193
|
+
@type == type.type
|
194
|
+
elsif type.respond_to? :to_str
|
195
|
+
@type == type
|
196
|
+
else
|
197
|
+
false
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# Check if _filename_ is of this particular type by comparing it to
|
202
|
+
# some common extensions.
|
203
|
+
#
|
204
|
+
# MIME.types['text/html'].match_filename? 'index.html'
|
205
|
+
# => true
|
206
|
+
def match_filename?(filename)
|
207
|
+
@glob_patterns.any? {|pattern| File.fnmatch pattern, filename}
|
208
|
+
end
|
209
|
+
|
210
|
+
# Check if _file_ is of this particular type by looking for precise
|
211
|
+
# patterns (_magic_ numbers) in different locations of the file.
|
212
|
+
#
|
213
|
+
# _file_ must be an IO object opened with read permissions.
|
214
|
+
def match_file?(file)
|
215
|
+
if @magic.nil?
|
216
|
+
false
|
217
|
+
else
|
218
|
+
@magic.check_file file
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def initialize(type) # :nodoc:
|
223
|
+
@type = type.freeze
|
224
|
+
@glob_patterns = []
|
225
|
+
end
|
226
|
+
|
227
|
+
def load_magic(magic, priority) # :nodoc:
|
228
|
+
@magic_priority = priority
|
229
|
+
@magic = Magic.parse magic
|
230
|
+
end
|
231
|
+
|
232
|
+
def add_glob(glob) # :nodoc:
|
233
|
+
@glob_patterns << glob.freeze
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
class << self
|
238
|
+
attr_reader :mime_dirs # :nodoc:
|
239
|
+
|
240
|
+
# Returns the MIME::Type object corresponding to _type_.
|
241
|
+
def [](type)
|
242
|
+
@types.fetch type, nil
|
243
|
+
end
|
244
|
+
|
245
|
+
# Look for the type of a file by doing successive checks on
|
246
|
+
# the filename patterns.
|
247
|
+
#
|
248
|
+
# Returns a MIME::Type object or _nil_ if nothing matches.
|
249
|
+
def check_globs(filename)
|
250
|
+
enum = Enumerable::Enumerator.new(@globs, :each_key)
|
251
|
+
found = enum.select { |pattern| File.fnmatch pattern, filename }
|
252
|
+
|
253
|
+
if found.empty?
|
254
|
+
downcase_filename = filename.downcase
|
255
|
+
found = enum.select { |pattern|
|
256
|
+
File.fnmatch pattern, downcase_filename
|
257
|
+
}
|
258
|
+
end
|
259
|
+
|
260
|
+
@globs[found.max]
|
261
|
+
end
|
262
|
+
|
263
|
+
# Look for the type of a file by doing successive checks on
|
264
|
+
# _magic_ numbers.
|
265
|
+
#
|
266
|
+
# Returns a MIME::Type object or _nil_ if nothing matches.
|
267
|
+
def check_magics(file)
|
268
|
+
if file.respond_to? :read
|
269
|
+
check_magics_with_priority(file, 0)
|
270
|
+
else
|
271
|
+
open(file) {|f| check_magics_with_priority(f, 0) }
|
272
|
+
end
|
273
|
+
end
|
274
|
+
|
275
|
+
# Look for the type of a file by doing successive checks with
|
276
|
+
# the filename patterns or magic numbers. If none of the matches
|
277
|
+
# are successful, returns a type of <b>application/octet-stream</b> if
|
278
|
+
# the file contains control characters at its beginning, or <b>text/plain</b> otherwise.
|
279
|
+
#
|
280
|
+
# Returns a MIME::Type object.
|
281
|
+
def check(filename)
|
282
|
+
check_special(filename) ||
|
283
|
+
open(filename) { |f|
|
284
|
+
check_magics_with_priority(f, 80) ||
|
285
|
+
check_globs(filename) ||
|
286
|
+
check_magics_with_priority(f, 0) ||
|
287
|
+
check_default(f)
|
288
|
+
}
|
289
|
+
end
|
290
|
+
|
291
|
+
private
|
292
|
+
def check_magics_with_priority(f, priority_threshold)
|
293
|
+
@magics.find { |t|
|
294
|
+
break if t.magic_priority < priority_threshold
|
295
|
+
t.match_file? f
|
296
|
+
}
|
297
|
+
end
|
298
|
+
|
299
|
+
def check_special(filename)
|
300
|
+
case File.ftype(filename)
|
301
|
+
when 'directory' then @types['inode/directory']
|
302
|
+
when 'characterSpecial' then @types['inode/chardevice']
|
303
|
+
when 'blockSpecial' then @types['inode/blockdevice']
|
304
|
+
when 'fifo' then @types['inode/fifo']
|
305
|
+
when 'socket' then @types['inode/socket']
|
306
|
+
else
|
307
|
+
nil
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def check_default(f)
|
312
|
+
f.pos = 0
|
313
|
+
firsts = f.read(32) || ''
|
314
|
+
bytes = firsts.unpack('C*')
|
315
|
+
if bytes.any? {|byte| byte < 32 && ![9, 10, 13].include?(byte) }
|
316
|
+
@types['application/octet-stream']
|
317
|
+
else
|
318
|
+
@types['text/plain']
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def load_globs(file)
|
323
|
+
open(file) { |f|
|
324
|
+
f.each { |line|
|
325
|
+
next if line =~ /^#/
|
326
|
+
cline = line.chomp
|
327
|
+
type, pattern = cline.split ':', 2
|
328
|
+
@types[type].add_glob pattern
|
329
|
+
@globs[pattern] = @types[type] unless @globs.has_key? pattern
|
330
|
+
}
|
331
|
+
}
|
332
|
+
end
|
333
|
+
|
334
|
+
def load_magic(file)
|
335
|
+
open(file) { |f|
|
336
|
+
raise 'Bad magic file' if f.readline != "MIME-Magic\0\n"
|
337
|
+
|
338
|
+
f.gets =~ /^\[(\d\d):(.+)\]/
|
339
|
+
priority = $1.to_i
|
340
|
+
type = $2
|
341
|
+
buf =''
|
342
|
+
|
343
|
+
f.each { |line|
|
344
|
+
if line =~ /^\[(\d\d):(.+)\]/
|
345
|
+
@types[type].load_magic buf, priority
|
346
|
+
@magics << @types[type]
|
347
|
+
|
348
|
+
priority = $1.to_i
|
349
|
+
type = $2
|
350
|
+
buf = ''
|
351
|
+
else
|
352
|
+
buf << line
|
353
|
+
end
|
354
|
+
}
|
355
|
+
}
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
xdg_data_home = ENV['XDG_DATA_HOME'] || "#{ENV['HOME']}/.local/share"
|
360
|
+
xdg_data_dirs = ENV['XDG_DATA_DIRS'] || "/usr/local/share/:/usr/share"
|
361
|
+
|
362
|
+
@mime_dirs = (xdg_data_home + ':' + xdg_data_dirs).split(':').collect { |dir|
|
363
|
+
"#{dir}/mime"
|
364
|
+
}
|
365
|
+
|
366
|
+
@types = Hash.new {|h,k| h[k] = Type.new(k)}
|
367
|
+
@magics = []
|
368
|
+
@globs = {}
|
369
|
+
|
370
|
+
@mime_dirs.each {|dir|
|
371
|
+
glob_file = "#{dir}/globs"
|
372
|
+
load_globs glob_file if File.file? glob_file
|
373
|
+
|
374
|
+
magic_file = "#{dir}/magic"
|
375
|
+
load_magic magic_file if File.file? magic_file
|
376
|
+
}
|
377
|
+
end
|
data/rakefile
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
require 'rake/packagetask'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
|
5
|
+
require 'lib/shared-mime-info'
|
6
|
+
|
7
|
+
PKG_FILES = FileList["lib/*.rb", "rakefile", "copying.txt"].to_a
|
8
|
+
|
9
|
+
spec = Gem::Specification.new do |s|
|
10
|
+
s.summary = "Library to guess the MIME type of a file with both filename lookup and magic file detection"
|
11
|
+
s.name = "shared-mime-info"
|
12
|
+
s.author = "Hank Lords"
|
13
|
+
s.email = "hanklords@gmail.com"
|
14
|
+
s.version = MIME::VERSION
|
15
|
+
s.has_rdoc = true
|
16
|
+
s.require_path = 'lib'
|
17
|
+
s.autorequire = 'rake'
|
18
|
+
s.files = PKG_FILES
|
19
|
+
# s.description = ""
|
20
|
+
end
|
21
|
+
|
22
|
+
Rake::RDocTask.new do |rd|
|
23
|
+
rd.rdoc_files.include "lib/*.rb"
|
24
|
+
rd.options << "--inline-source"
|
25
|
+
rd.main = "MIME"
|
26
|
+
end
|
27
|
+
|
28
|
+
Rake::GemPackageTask.new spec do |p|
|
29
|
+
p.need_tar_gz = true
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.11
|
3
|
+
specification_version: 1
|
4
|
+
name: shared-mime-info
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: "0.1"
|
7
|
+
date: 2006-09-24 00:00:00 +02:00
|
8
|
+
summary: Library to guess the MIME type of a file with both filename lookup and magic file detection
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: hanklords@gmail.com
|
12
|
+
homepage:
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: rake
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
authors:
|
29
|
+
- Hank Lords
|
30
|
+
files:
|
31
|
+
- lib/shared-mime-info.rb
|
32
|
+
- rakefile
|
33
|
+
- copying.txt
|
34
|
+
test_files: []
|
35
|
+
|
36
|
+
rdoc_options: []
|
37
|
+
|
38
|
+
extra_rdoc_files: []
|
39
|
+
|
40
|
+
executables: []
|
41
|
+
|
42
|
+
extensions: []
|
43
|
+
|
44
|
+
requirements: []
|
45
|
+
|
46
|
+
dependencies: []
|
47
|
+
|