hanklords-shared-mime-info 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/LICENSE +13 -0
  2. data/README.rdoc +19 -0
  3. data/Rakefile +35 -0
  4. data/lib/shared-mime-info.rb +315 -0
  5. metadata +56 -0
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2006 Mael Clerambault <mael@clerambault.fr>
2
+
3
+ Permission to use, copy, modify, and distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = shared-mime-info
2
+
3
+ shared-mime-info is a pure Ruby library for accessing the MIME info database provided by Freedesktop[http://freedesktop.org/] on {Standards/shared-mime-info-spec}[http://freedesktop.org/wiki/Specifications/shared-mime-info-spec].
4
+
5
+ = Project
6
+
7
+ The rubyforge project : http://rubyforge.org/projects/shared-mime
8
+
9
+ The github repository: http://github.com/hanklords/shared-mime-info/tree/master
10
+
11
+
12
+ = Usage
13
+
14
+ require 'shared-mime-info'
15
+
16
+ MIME.check 'sample.jpg' # => #<MIME::Type ..., @type="image/jpeg">
17
+
18
+ MIME['image/jpeg'].match_filename? 'sample.jpg' # => true
19
+
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ require 'rake/rdoctask'
2
+ require 'rake/packagetask'
3
+ require 'rake/gempackagetask'
4
+
5
+ PKG_FILES = FileList["lib/*.rb", "Rakefile", "LICENSE", "README.rdoc"]
6
+
7
+ spec = Gem::Specification.new do |s|
8
+ s.summary = "Library to guess the MIME type of a file with both filename lookup and magic file detection"
9
+ s.name = "shared-mime-info"
10
+ s.author = "Mael Clerambault"
11
+ s.email = "mael@clerambault.fr"
12
+ s.version = '0.1'
13
+ s.has_rdoc = true
14
+ s.require_path = 'lib'
15
+ s.files = PKG_FILES.to_a
16
+ end
17
+
18
+ Rake::RDocTask.new do |rd|
19
+ rd.rdoc_files.include "README.rdoc", "lib/*.rb"
20
+ rd.options << "--inline-source"
21
+ end
22
+
23
+ Rake::GemPackageTask.new spec do |p|
24
+ p.need_tar_gz = true
25
+ end
26
+
27
+ desc 'Generate the magics parser'
28
+ file "lib/magics.rb" => "magics.rl" do |t|
29
+ sh "ragel -R -o #{t.name} #{t.prerequisites.join(' ')}"
30
+ end
31
+
32
+ desc 'Generate the gemspec'
33
+ task :spec do
34
+ open("#{spec.name}.gemspec", "w") {|g| g.puts spec.to_ruby }
35
+ end
@@ -0,0 +1,315 @@
1
+ # Copyright (c) 2006 Mael Clerambault <mael@clerambault.fr>
2
+ #
3
+ # Permission to use, copy, modify, and distribute this software for any
4
+ # purpose with or without fee is hereby granted, provided that the above
5
+ # copyright notice and this permission notice appear in all copies.
6
+ #
7
+ # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
+
15
+ $: << File.dirname(__FILE__)
16
+
17
+ require 'rexml/document'
18
+ require 'magics'
19
+
20
+ # This provides a way to guess the mime type of a file by doing both
21
+ # filename lookups and _magic_ file checks. This implementation tries to
22
+ # follow the version 0.13 of the
23
+ # specification[http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.13.html].
24
+ module MIME
25
+ VERSION = '0.1'
26
+
27
+ module Magic # :nodoc: all
28
+ class BadMagic < StandardError; end
29
+
30
+ class Entry
31
+ attr_reader :indent
32
+ def initialize(indent, start_offset, value_length, value, mask, word_size, range_length)
33
+ @indent = indent
34
+ @start_offset = start_offset
35
+ @value_length = value_length
36
+ @value = value.freeze
37
+ @mask = mask.freeze
38
+ @word_size = word_size
39
+ @range_length = range_length
40
+ @sub_entries = []
41
+ end
42
+
43
+ def add_subentry(entry)
44
+ if entry.indent == @indent + 1
45
+ @sub_entries << entry
46
+ elsif entry.indent > @indent + 1
47
+ if not @sub_entries.empty?
48
+ @sub_entries.last.add_subentry entry
49
+ else
50
+ raise BadMagic
51
+ end
52
+ else
53
+ raise BadMagic
54
+ end
55
+ end
56
+
57
+ def =~(f)
58
+ check_file(f) and (@sub_entries.empty? || @sub_entries.any? {|e| e =~ f})
59
+ end
60
+
61
+ private
62
+ def check_file(f)
63
+ f.pos = @start_offset
64
+ m = (@mask || [0xff].pack('c') * @value_length ).unpack('c*')
65
+ v = @value.unpack('c*')
66
+ r = (f.read(@value_length + @range_length -1)|| '').unpack('c*')
67
+ range_length = 0
68
+ found = false
69
+ while not found and range_length < r.size
70
+ found = v.zip(m, r[range_length, @value_length]).all? {|vb, mb, rb| (rb & mb) == (vb & mb) }
71
+ range_length = range_length + 1
72
+ end
73
+ found
74
+ end
75
+ end
76
+
77
+ class RootEntry < Entry
78
+ attr_reader :priority, :type
79
+ def initialize(type, priority)
80
+ @indent = -1
81
+ @type = type
82
+ @priority = priority
83
+ @sub_entries = []
84
+ end
85
+
86
+ private
87
+ def check_file(*args) true end
88
+ end
89
+ end
90
+
91
+ # Type represents a single mime type such as <b>text/html</b>.
92
+ class Type
93
+ # Returns the type of a mime type as a String, such as <b>text/html</b>.
94
+ attr_reader :type
95
+
96
+ attr_reader :magics, :glob_patterns
97
+
98
+ # Returns the media part of the type of a mime type as a string,
99
+ # such as <b>text</b> for a type of <b>text/html</b>.
100
+ def media; @type.split('/', 2).first; end
101
+
102
+ # Returns the subtype part of the type of a mime type as a string,
103
+ # such as <b>html</b> for a type of <b>text/html</b>.
104
+ def subtype; @type.split('/', 2).last; end
105
+
106
+ # Synonym of type.
107
+ def to_s; @type; end
108
+
109
+ # Returns a Hash of the comments associated with a mime type in
110
+ # different languages.
111
+ #
112
+ # MIME['text/html'].comment.default
113
+ # => "HTML page"
114
+ #
115
+ # MIME['text/html'].comment['fr']
116
+ # => "page HTML"
117
+ def comment
118
+ file = ''
119
+ MIME.mime_dirs.each { |dir|
120
+ file = "#{dir}/#{@type}.xml"
121
+ break if File.file? file
122
+ }
123
+
124
+ comments = {}
125
+ open(file) { |f|
126
+ doc = REXML::Document.new f
127
+ REXML::XPath.match(doc, '*/comment').each { |c|
128
+ if att = c.attributes['xml:lang']
129
+ comments[att] = c.text
130
+ else
131
+ comments.default = c.text
132
+ end
133
+ }
134
+ }
135
+ comments
136
+ end
137
+
138
+ # Returns all the types this type is a subclass of.
139
+ def parents
140
+ file = ''
141
+ MIME.mime_dirs.each { |dir|
142
+ file = "#{dir}/#{@type}.xml"
143
+ break if File.file? file
144
+ }
145
+
146
+ open(file) { |f|
147
+ doc = REXML::Document.new f
148
+ REXML::XPath.match(doc, '*/sub-class-of').collect { |c|
149
+ MIME[c.attributes['type']]
150
+ }
151
+ }
152
+ end
153
+
154
+ # Equality test.
155
+ #
156
+ # MIME['text/html'] == 'text/html'
157
+ # => true
158
+ def ==(type)
159
+ if type.is_a? Type
160
+ @type == type.type
161
+ elsif type.respond_to? :to_str
162
+ @type == type
163
+ else
164
+ false
165
+ end
166
+ end
167
+
168
+ # Check if _filename_ is of this particular type by comparing it to
169
+ # some common extensions.
170
+ #
171
+ # MIME['text/html'].match_filename? 'index.html'
172
+ # => true
173
+ def match_filename?(filename)
174
+ basename = File.basename(filename)
175
+ @glob_patterns.any? {|pattern| File.fnmatch pattern, basename}
176
+ end
177
+
178
+ # Check if _file_ is of this particular type by looking for precise
179
+ # patterns (_magic_ numbers) in different locations of the file.
180
+ #
181
+ # _file_ must be an IO object opened with read permissions.
182
+ def match_file?(f)
183
+ @magics.any? {|m| m =~ f }
184
+ end
185
+
186
+ def initialize(type) # :nodoc:
187
+ @type = type
188
+ @glob_patterns = []
189
+ @magics = []
190
+ end
191
+ end
192
+
193
+ class << self
194
+ attr_reader :mime_dirs # :nodoc:
195
+
196
+ # Returns the MIME::Type object corresponding to _type_.
197
+ def [](type)
198
+ @types.fetch type, nil
199
+ end
200
+
201
+ # Look for the type of a file by doing successive checks on
202
+ # the filename patterns.
203
+ #
204
+ # Returns a MIME::Type object or _nil_ if nothing matches.
205
+ def check_globs(filename)
206
+ basename = File.basename(filename)
207
+ enum = Enumerable::Enumerator.new(@globs, :each_key)
208
+ found = enum.select { |pattern| File.fnmatch pattern, basename }
209
+
210
+ if found.empty?
211
+ downcase_basename = basename.downcase
212
+ found = enum.select { |pattern|
213
+ File.fnmatch pattern, downcase_basename
214
+ }
215
+ end
216
+
217
+ @globs[found.max]
218
+ end
219
+
220
+ # Look for the type of a file by doing successive checks on
221
+ # _magic_ numbers.
222
+ #
223
+ # Returns a MIME::Type object or _nil_ if nothing matches.
224
+ def check_magics(file)
225
+ if file.respond_to? :read
226
+ check_magics_type(file, @magics)
227
+ else
228
+ open(file) {|f| check_magics_type(f, @magics) }
229
+ end
230
+ end
231
+
232
+ # Look for the type of a file by doing successive checks with
233
+ # the filename patterns or magic numbers. If none of the matches
234
+ # are successful, returns a type of <b>application/octet-stream</b> if
235
+ # the file contains control characters at its beginning, or <b>text/plain</b> otherwise.
236
+ #
237
+ # Returns a MIME::Type object.
238
+ def check(filename)
239
+ check_special(filename) ||
240
+ open(filename) { |f|
241
+ check_magics_gt80(f) ||
242
+ check_globs(filename) ||
243
+ check_magics_lt80(f) ||
244
+ check_default(f)
245
+ }
246
+ end
247
+
248
+ private
249
+ def check_magics_type(f, set); c = set.find {|m| m =~ f} and MIME[c.type] end
250
+ def check_magics_gt80(f); check_magics_type(f, @magics_gt80) end
251
+ def check_magics_lt80(f); check_magics_type(f, @magics_lt80) end
252
+
253
+ def check_special(filename)
254
+ case File.ftype(filename)
255
+ when 'directory' then @types['inode/directory']
256
+ when 'characterSpecial' then @types['inode/chardevice']
257
+ when 'blockSpecial' then @types['inode/blockdevice']
258
+ when 'fifo' then @types['inode/fifo']
259
+ when 'socket' then @types['inode/socket']
260
+ else
261
+ nil
262
+ end
263
+ end
264
+
265
+ def check_default(f)
266
+ f.pos = 0
267
+ firsts = f.read(32) || ''
268
+ bytes = firsts.unpack('C*')
269
+ if bytes.any? {|byte| byte < 32 && ![9, 10, 13].include?(byte) }
270
+ @types['application/octet-stream']
271
+ else
272
+ @types['text/plain']
273
+ end
274
+ end
275
+
276
+ def load_globs(file)
277
+ open(file) { |f|
278
+ f.each { |line|
279
+ next if line =~ /^#/
280
+ cline = line.chomp
281
+ type, pattern = cline.split ':', 2
282
+ @types[type].glob_patterns << pattern
283
+ @globs[pattern] = @types[type] unless @globs.has_key? pattern
284
+ }
285
+ }
286
+ end
287
+
288
+ def load_magic(file)
289
+ @magics.concat Magic.parse_magic(File.read(file))
290
+ end
291
+ end
292
+
293
+ xdg_data_home = ENV['XDG_DATA_HOME'] || "#{ENV['HOME']}/.local/share"
294
+ xdg_data_dirs = ENV['XDG_DATA_DIRS'] || "/usr/local/share/:/usr/share"
295
+
296
+ @mime_dirs = (xdg_data_home + ':' + xdg_data_dirs).split(':').collect { |dir|
297
+ "#{dir}/mime"
298
+ }
299
+
300
+ @types = Hash.new {|h,k| h[k] = Type.new(k)}
301
+ @magics = []
302
+ @globs = {}
303
+
304
+ @mime_dirs.each {|dir|
305
+ glob_file = "#{dir}/globs"
306
+ load_globs glob_file if File.file? glob_file
307
+
308
+ magic_file = "#{dir}/magic"
309
+ load_magic magic_file if File.file? magic_file
310
+ }
311
+
312
+ @magics.sort! {|a,b| b.priority <=> a.priority}
313
+ @magics.each {|m| @types[m.type].magics << m}
314
+ @magics_gt80, @magics_lt80 = @magics.partition {|m| m.priority >= 80}
315
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hanklords-shared-mime-info
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - Mael Clerambault
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-02-15 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: mael@clerambault.fr
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/shared-mime-info.rb
26
+ - Rakefile
27
+ - LICENSE
28
+ - README.rdoc
29
+ has_rdoc: true
30
+ homepage:
31
+ post_install_message:
32
+ rdoc_options: []
33
+
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: "0"
41
+ version:
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ requirements: []
49
+
50
+ rubyforge_project:
51
+ rubygems_version: 1.2.0
52
+ signing_key:
53
+ specification_version: 2
54
+ summary: Library to guess the MIME type of a file with both filename lookup and magic file detection
55
+ test_files: []
56
+