shared-mime-info 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/copying.txt +20 -0
  2. data/lib/shared-mime-info.rb +377 -0
  3. data/rakefile +30 -0
  4. metadata +47 -0
data/copying.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2006 Hank Lords <hanklords@gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,377 @@
1
+ # Copyright (c) 2006 Hank Lords <hanklords@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining
4
+ # a copy of this software and associated documentation files (the
5
+ # "Software"), to deal in the Software without restriction, including
6
+ # without limitation the rights to use, copy, modify, merge, publish,
7
+ # distribute, sublicense, and/or sell copies of the Software, and to
8
+ # permit persons to whom the Software is furnished to do so, subject to
9
+ # the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in all
12
+ # copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ require 'enumerator'
23
+ require 'rexml/document'
24
+
25
+ # shared-mime-info is a pure Ruby library for accessing the MIME info
26
+ # database provided by Freedesktop[http://freedesktop.org/] on
27
+ # {Standards/shared-mime-info-spec}[http://wiki.freedesktop.org/wiki/Standards_2fshared_2dmime_2dinfo_2dspec].
28
+ #
29
+ # This provides a way to guess the mime type of a file by doing both
30
+ # filename lookups and _magic_ file checks. This implementation tries to
31
+ # follow the version 0.13 of the
32
+ # specification[http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.13.html].
33
+ module MIME
34
+ VERSION = '0.1'
35
+
36
+ module Magic # :nodoc: all
37
+ class BadMagic < StandardError; end
38
+
39
+ class RootEntry
40
+ def initialize
41
+ @sub_entries = []
42
+ @indent = -1
43
+ end
44
+
45
+ def add_subentry(entry)
46
+ return unless entry.indent > @indent
47
+ if entry.indent == @indent + 1
48
+ @sub_entries << entry
49
+ elsif entry.indent > @indent + 1
50
+ if @sub_entries.last.respond_to? :add_subentry
51
+ @sub_entries.last.add_subentry entry
52
+ else
53
+ raise BadMagic
54
+ end
55
+ else
56
+ raise BadMagic
57
+ end
58
+ end
59
+
60
+ def check_file(f)
61
+ @sub_entries.empty? || @sub_entries.any? {|e| e.check_file f}
62
+ end
63
+ end
64
+
65
+ class Entry < RootEntry
66
+ attr_reader :indent
67
+ def initialize(indent, start_offset, value_length, value, mask, word_size, range_length)
68
+ super()
69
+ @indent = indent
70
+ @start_offset = start_offset
71
+ @value_length = value_length
72
+ @value = value.freeze
73
+ @mask = mask.freeze
74
+ @word_size = word_size
75
+ @range_length = range_length
76
+ end
77
+
78
+ def check_file(f)
79
+ check_entry(f) && super(f)
80
+ end
81
+
82
+ private
83
+ def check_entry(f)
84
+ f.pos = @start_offset
85
+ f.read(@value_length) == @value
86
+ end
87
+ end
88
+
89
+ def self.parse(magic)
90
+ parsed = RootEntry.new
91
+ entry = magic
92
+
93
+ until entry.empty?
94
+ entry = entry.sub /^(\d?)>(\d+)=/, ''
95
+ indent = $1.to_i
96
+ start_offset = $2.to_i
97
+ value_length = entry.unpack('n').first
98
+ value, entry = entry.unpack("x2a#{value_length}a*")
99
+
100
+ if entry[/./m] == '&'
101
+ mask, entry = entry.unpack("xa#{value_length}a*")
102
+ end
103
+
104
+ if entry[/./m] == '~'
105
+ entry =~ /^~(\d+)(.*)/m
106
+ word_size = $1
107
+ entry = $2
108
+ end
109
+
110
+ if entry[/./m] == '+'
111
+ entry =~ /^\+(\d+)(.*)/m
112
+ range_length = $1
113
+ entry = $2
114
+ end
115
+ entry = entry.sub /^[^\n]*\n/m, ''
116
+
117
+ parsed.add_subentry Entry.new(indent, start_offset, value_length, value, mask, word_size, range_length)
118
+ end
119
+
120
+ parsed
121
+ end
122
+ end
123
+
124
+ # Type represents a single mime type such as <b>text/html</b>.
125
+ class Type
126
+ attr_reader :magic_priority # :nodoc:
127
+
128
+ # Returns the type of a mime type as a String, such as <b>text/html</b>.
129
+ attr_reader :type
130
+
131
+ # Returns the media part of the type of a mime type as a string,
132
+ # such as <b>text</b> for a type of <b>text/html</b>.
133
+ def media; @type.split('/', 2).first; end
134
+
135
+ # Returns the subtype part of the type of a mime type as a string,
136
+ # such as <b>html</b> for a type of <b>text/html</b>.
137
+ def subtype; @type.split('/', 2).last; end
138
+
139
+ # Synonym of type.
140
+ def to_s; @type; end
141
+
142
+ # Returns a Hash of the comments associated with a mime type in
143
+ # different languages.
144
+ #
145
+ # MIME.types['text/html'].default
146
+ # => "HTML page"
147
+ #
148
+ # MIME.types['text/html'].comment['fr']
149
+ # => "page HTML"
150
+ def comment
151
+ file = ''
152
+ MIME.mime_dirs.each { |dir|
153
+ file = "#{dir}/#{@type}.xml"
154
+ break if File.file? file
155
+ }
156
+
157
+ open(file) { |f|
158
+ doc = REXML::Document.new f
159
+ comments = {}
160
+ REXML::XPath.match(doc, '*/comment').each { |c|
161
+ if att = c.attributes['xml:lang']
162
+ comments[att] = c.text
163
+ else
164
+ comments.default = c.text
165
+ end
166
+ }
167
+ }
168
+ comments
169
+ end
170
+
171
+ # Returns all the types this type is a subclass of.
172
+ def parents
173
+ file = ''
174
+ MIME.mime_dirs.each { |dir|
175
+ file = "#{dir}/#{@type}.xml"
176
+ break if File.file? file
177
+ }
178
+
179
+ open(file) { |f|
180
+ doc = REXML::Document.new f
181
+ REXML::XPath.match(doc, '*/sub-class-of').collect { |c|
182
+ MIME[c.attributes['type']]
183
+ }
184
+ }
185
+ end
186
+
187
+ # Equality test.
188
+ #
189
+ # MIME['text/html'] == 'text/html'
190
+ # => true
191
+ def ==(type)
192
+ if type.is_a? Type
193
+ @type == type.type
194
+ elsif type.respond_to? :to_str
195
+ @type == type
196
+ else
197
+ false
198
+ end
199
+ end
200
+
201
+ # Check if _filename_ is of this particular type by comparing it to
202
+ # some common extensions.
203
+ #
204
+ # MIME.types['text/html'].match_filename? 'index.html'
205
+ # => true
206
+ def match_filename?(filename)
207
+ @glob_patterns.any? {|pattern| File.fnmatch pattern, filename}
208
+ end
209
+
210
+ # Check if _file_ is of this particular type by looking for precise
211
+ # patterns (_magic_ numbers) in different locations of the file.
212
+ #
213
+ # _file_ must be an IO object opened with read permissions.
214
+ def match_file?(file)
215
+ if @magic.nil?
216
+ false
217
+ else
218
+ @magic.check_file file
219
+ end
220
+ end
221
+
222
+ def initialize(type) # :nodoc:
223
+ @type = type.freeze
224
+ @glob_patterns = []
225
+ end
226
+
227
+ def load_magic(magic, priority) # :nodoc:
228
+ @magic_priority = priority
229
+ @magic = Magic.parse magic
230
+ end
231
+
232
+ def add_glob(glob) # :nodoc:
233
+ @glob_patterns << glob.freeze
234
+ end
235
+ end
236
+
237
+ class << self
238
+ attr_reader :mime_dirs # :nodoc:
239
+
240
+ # Returns the MIME::Type object corresponding to _type_.
241
+ def [](type)
242
+ @types.fetch type, nil
243
+ end
244
+
245
+ # Look for the type of a file by doing successive checks on
246
+ # the filename patterns.
247
+ #
248
+ # Returns a MIME::Type object or _nil_ if nothing matches.
249
+ def check_globs(filename)
250
+ enum = Enumerable::Enumerator.new(@globs, :each_key)
251
+ found = enum.select { |pattern| File.fnmatch pattern, filename }
252
+
253
+ if found.empty?
254
+ downcase_filename = filename.downcase
255
+ found = enum.select { |pattern|
256
+ File.fnmatch pattern, downcase_filename
257
+ }
258
+ end
259
+
260
+ @globs[found.max]
261
+ end
262
+
263
+ # Look for the type of a file by doing successive checks on
264
+ # _magic_ numbers.
265
+ #
266
+ # Returns a MIME::Type object or _nil_ if nothing matches.
267
+ def check_magics(file)
268
+ if file.respond_to? :read
269
+ check_magics_with_priority(file, 0)
270
+ else
271
+ open(file) {|f| check_magics_with_priority(f, 0) }
272
+ end
273
+ end
274
+
275
+ # Look for the type of a file by doing successive checks with
276
+ # the filename patterns or magic numbers. If none of the matches
277
+ # are successful, returns a type of <b>application/octet-stream</b> if
278
+ # the file contains control characters at its beginning, or <b>text/plain</b> otherwise.
279
+ #
280
+ # Returns a MIME::Type object.
281
+ def check(filename)
282
+ check_special(filename) ||
283
+ open(filename) { |f|
284
+ check_magics_with_priority(f, 80) ||
285
+ check_globs(filename) ||
286
+ check_magics_with_priority(f, 0) ||
287
+ check_default(f)
288
+ }
289
+ end
290
+
291
+ private
292
+ def check_magics_with_priority(f, priority_threshold)
293
+ @magics.find { |t|
294
+ break if t.magic_priority < priority_threshold
295
+ t.match_file? f
296
+ }
297
+ end
298
+
299
+ def check_special(filename)
300
+ case File.ftype(filename)
301
+ when 'directory' then @types['inode/directory']
302
+ when 'characterSpecial' then @types['inode/chardevice']
303
+ when 'blockSpecial' then @types['inode/blockdevice']
304
+ when 'fifo' then @types['inode/fifo']
305
+ when 'socket' then @types['inode/socket']
306
+ else
307
+ nil
308
+ end
309
+ end
310
+
311
+ def check_default(f)
312
+ f.pos = 0
313
+ firsts = f.read(32) || ''
314
+ bytes = firsts.unpack('C*')
315
+ if bytes.any? {|byte| byte < 32 && ![9, 10, 13].include?(byte) }
316
+ @types['application/octet-stream']
317
+ else
318
+ @types['text/plain']
319
+ end
320
+ end
321
+
322
+ def load_globs(file)
323
+ open(file) { |f|
324
+ f.each { |line|
325
+ next if line =~ /^#/
326
+ cline = line.chomp
327
+ type, pattern = cline.split ':', 2
328
+ @types[type].add_glob pattern
329
+ @globs[pattern] = @types[type] unless @globs.has_key? pattern
330
+ }
331
+ }
332
+ end
333
+
334
+ def load_magic(file)
335
+ open(file) { |f|
336
+ raise 'Bad magic file' if f.readline != "MIME-Magic\0\n"
337
+
338
+ f.gets =~ /^\[(\d\d):(.+)\]/
339
+ priority = $1.to_i
340
+ type = $2
341
+ buf =''
342
+
343
+ f.each { |line|
344
+ if line =~ /^\[(\d\d):(.+)\]/
345
+ @types[type].load_magic buf, priority
346
+ @magics << @types[type]
347
+
348
+ priority = $1.to_i
349
+ type = $2
350
+ buf = ''
351
+ else
352
+ buf << line
353
+ end
354
+ }
355
+ }
356
+ end
357
+ end
358
+
359
+ xdg_data_home = ENV['XDG_DATA_HOME'] || "#{ENV['HOME']}/.local/share"
360
+ xdg_data_dirs = ENV['XDG_DATA_DIRS'] || "/usr/local/share/:/usr/share"
361
+
362
+ @mime_dirs = (xdg_data_home + ':' + xdg_data_dirs).split(':').collect { |dir|
363
+ "#{dir}/mime"
364
+ }
365
+
366
+ @types = Hash.new {|h,k| h[k] = Type.new(k)}
367
+ @magics = []
368
+ @globs = {}
369
+
370
+ @mime_dirs.each {|dir|
371
+ glob_file = "#{dir}/globs"
372
+ load_globs glob_file if File.file? glob_file
373
+
374
+ magic_file = "#{dir}/magic"
375
+ load_magic magic_file if File.file? magic_file
376
+ }
377
+ end
data/rakefile ADDED
@@ -0,0 +1,30 @@
1
+ require 'rake/rdoctask'
2
+ require 'rake/packagetask'
3
+ require 'rake/gempackagetask'
4
+
5
+ require 'lib/shared-mime-info'
6
+
7
+ PKG_FILES = FileList["lib/*.rb", "rakefile", "copying.txt"].to_a
8
+
9
+ spec = Gem::Specification.new do |s|
10
+ s.summary = "Library to guess the MIME type of a file with both filename lookup and magic file detection"
11
+ s.name = "shared-mime-info"
12
+ s.author = "Hank Lords"
13
+ s.email = "hanklords@gmail.com"
14
+ s.version = MIME::VERSION
15
+ s.has_rdoc = true
16
+ s.require_path = 'lib'
17
+ s.autorequire = 'rake'
18
+ s.files = PKG_FILES
19
+ # s.description = ""
20
+ end
21
+
22
+ Rake::RDocTask.new do |rd|
23
+ rd.rdoc_files.include "lib/*.rb"
24
+ rd.options << "--inline-source"
25
+ rd.main = "MIME"
26
+ end
27
+
28
+ Rake::GemPackageTask.new spec do |p|
29
+ p.need_tar_gz = true
30
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: shared-mime-info
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.1"
7
+ date: 2006-09-24 00:00:00 +02:00
8
+ summary: Library to guess the MIME type of a file with both filename lookup and magic file detection
9
+ require_paths:
10
+ - lib
11
+ email: hanklords@gmail.com
12
+ homepage:
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: rake
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - Hank Lords
30
+ files:
31
+ - lib/shared-mime-info.rb
32
+ - rakefile
33
+ - copying.txt
34
+ test_files: []
35
+
36
+ rdoc_options: []
37
+
38
+ extra_rdoc_files: []
39
+
40
+ executables: []
41
+
42
+ extensions: []
43
+
44
+ requirements: []
45
+
46
+ dependencies: []
47
+