hanklords-shared-mime-info 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/LICENSE +13 -0
  2. data/README.rdoc +19 -0
  3. data/Rakefile +35 -0
  4. data/lib/shared-mime-info.rb +315 -0
  5. metadata +56 -0
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2006 Mael Clerambault <mael@clerambault.fr>
2
+
3
+ Permission to use, copy, modify, and distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = shared-mime-info
2
+
3
+ shared-mime-info is a pure Ruby library for accessing the MIME info database provided by Freedesktop[http://freedesktop.org/] on {Standards/shared-mime-info-spec}[http://freedesktop.org/wiki/Specifications/shared-mime-info-spec].
4
+
5
+ = Project
6
+
7
+ The rubyforge project : http://rubyforge.org/projects/shared-mime
8
+
9
+ The github repository: http://github.com/hanklords/shared-mime-info/tree/master
10
+
11
+
12
+ = Usage
13
+
14
+ require 'shared-mime-info'
15
+
16
+ MIME.check 'sample.jpg' # => #<MIME::Type ..., @type="image/jpeg">
17
+
18
+ MIME['image/jpeg'].match_filename? 'sample.jpg' # => true
19
+
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ require 'rake/rdoctask'
2
+ require 'rake/packagetask'
3
+ require 'rake/gempackagetask'
4
+
5
+ PKG_FILES = FileList["lib/*.rb", "Rakefile", "LICENSE", "README.rdoc"]
6
+
7
+ spec = Gem::Specification.new do |s|
8
+ s.summary = "Library to guess the MIME type of a file with both filename lookup and magic file detection"
9
+ s.name = "shared-mime-info"
10
+ s.author = "Mael Clerambault"
11
+ s.email = "mael@clerambault.fr"
12
+ s.version = '0.1'
13
+ s.has_rdoc = true
14
+ s.require_path = 'lib'
15
+ s.files = PKG_FILES.to_a
16
+ end
17
+
18
+ Rake::RDocTask.new do |rd|
19
+ rd.rdoc_files.include "README.rdoc", "lib/*.rb"
20
+ rd.options << "--inline-source"
21
+ end
22
+
23
+ Rake::GemPackageTask.new spec do |p|
24
+ p.need_tar_gz = true
25
+ end
26
+
27
+ desc 'Generate the magics parser'
28
+ file "lib/magics.rb" => "magics.rl" do |t|
29
+ sh "ragel -R -o #{t.name} #{t.prerequisites.join(' ')}"
30
+ end
31
+
32
+ desc 'Generate the gemspec'
33
+ task :spec do
34
+ open("#{spec.name}.gemspec", "w") {|g| g.puts spec.to_ruby }
35
+ end
@@ -0,0 +1,315 @@
1
+ # Copyright (c) 2006 Mael Clerambault <mael@clerambault.fr>
2
+ #
3
+ # Permission to use, copy, modify, and distribute this software for any
4
+ # purpose with or without fee is hereby granted, provided that the above
5
+ # copyright notice and this permission notice appear in all copies.
6
+ #
7
+ # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
+
15
+ $: << File.dirname(__FILE__)
16
+
17
+ require 'rexml/document'
18
+ require 'magics'
19
+
20
+ # This provides a way to guess the mime type of a file by doing both
21
+ # filename lookups and _magic_ file checks. This implementation tries to
22
+ # follow the version 0.13 of the
23
+ # specification[http://standards.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.13.html].
24
+ module MIME
25
+ VERSION = '0.1'
26
+
27
+ module Magic # :nodoc: all
28
+ class BadMagic < StandardError; end
29
+
30
+ class Entry
31
+ attr_reader :indent
32
+ def initialize(indent, start_offset, value_length, value, mask, word_size, range_length)
33
+ @indent = indent
34
+ @start_offset = start_offset
35
+ @value_length = value_length
36
+ @value = value.freeze
37
+ @mask = mask.freeze
38
+ @word_size = word_size
39
+ @range_length = range_length
40
+ @sub_entries = []
41
+ end
42
+
43
+ def add_subentry(entry)
44
+ if entry.indent == @indent + 1
45
+ @sub_entries << entry
46
+ elsif entry.indent > @indent + 1
47
+ if not @sub_entries.empty?
48
+ @sub_entries.last.add_subentry entry
49
+ else
50
+ raise BadMagic
51
+ end
52
+ else
53
+ raise BadMagic
54
+ end
55
+ end
56
+
57
+ def =~(f)
58
+ check_file(f) and (@sub_entries.empty? || @sub_entries.any? {|e| e =~ f})
59
+ end
60
+
61
+ private
62
+ def check_file(f)
63
+ f.pos = @start_offset
64
+ m = (@mask || [0xff].pack('c') * @value_length ).unpack('c*')
65
+ v = @value.unpack('c*')
66
+ r = (f.read(@value_length + @range_length -1)|| '').unpack('c*')
67
+ range_length = 0
68
+ found = false
69
+ while not found and range_length < r.size
70
+ found = v.zip(m, r[range_length, @value_length]).all? {|vb, mb, rb| (rb & mb) == (vb & mb) }
71
+ range_length = range_length + 1
72
+ end
73
+ found
74
+ end
75
+ end
76
+
77
+ class RootEntry < Entry
78
+ attr_reader :priority, :type
79
+ def initialize(type, priority)
80
+ @indent = -1
81
+ @type = type
82
+ @priority = priority
83
+ @sub_entries = []
84
+ end
85
+
86
+ private
87
+ def check_file(*args) true end
88
+ end
89
+ end
90
+
91
+ # Type represents a single mime type such as <b>text/html</b>.
92
+ class Type
93
+ # Returns the type of a mime type as a String, such as <b>text/html</b>.
94
+ attr_reader :type
95
+
96
+ attr_reader :magics, :glob_patterns
97
+
98
+ # Returns the media part of the type of a mime type as a string,
99
+ # such as <b>text</b> for a type of <b>text/html</b>.
100
+ def media; @type.split('/', 2).first; end
101
+
102
+ # Returns the subtype part of the type of a mime type as a string,
103
+ # such as <b>html</b> for a type of <b>text/html</b>.
104
+ def subtype; @type.split('/', 2).last; end
105
+
106
+ # Synonym of type.
107
+ def to_s; @type; end
108
+
109
+ # Returns a Hash of the comments associated with a mime type in
110
+ # different languages.
111
+ #
112
+ # MIME['text/html'].comment.default
113
+ # => "HTML page"
114
+ #
115
+ # MIME['text/html'].comment['fr']
116
+ # => "page HTML"
117
+ def comment
118
+ file = ''
119
+ MIME.mime_dirs.each { |dir|
120
+ file = "#{dir}/#{@type}.xml"
121
+ break if File.file? file
122
+ }
123
+
124
+ comments = {}
125
+ open(file) { |f|
126
+ doc = REXML::Document.new f
127
+ REXML::XPath.match(doc, '*/comment').each { |c|
128
+ if att = c.attributes['xml:lang']
129
+ comments[att] = c.text
130
+ else
131
+ comments.default = c.text
132
+ end
133
+ }
134
+ }
135
+ comments
136
+ end
137
+
138
+ # Returns all the types this type is a subclass of.
139
+ def parents
140
+ file = ''
141
+ MIME.mime_dirs.each { |dir|
142
+ file = "#{dir}/#{@type}.xml"
143
+ break if File.file? file
144
+ }
145
+
146
+ open(file) { |f|
147
+ doc = REXML::Document.new f
148
+ REXML::XPath.match(doc, '*/sub-class-of').collect { |c|
149
+ MIME[c.attributes['type']]
150
+ }
151
+ }
152
+ end
153
+
154
+ # Equality test.
155
+ #
156
+ # MIME['text/html'] == 'text/html'
157
+ # => true
158
+ def ==(type)
159
+ if type.is_a? Type
160
+ @type == type.type
161
+ elsif type.respond_to? :to_str
162
+ @type == type
163
+ else
164
+ false
165
+ end
166
+ end
167
+
168
+ # Check if _filename_ is of this particular type by comparing it to
169
+ # some common extensions.
170
+ #
171
+ # MIME['text/html'].match_filename? 'index.html'
172
+ # => true
173
+ def match_filename?(filename)
174
+ basename = File.basename(filename)
175
+ @glob_patterns.any? {|pattern| File.fnmatch pattern, basename}
176
+ end
177
+
178
+ # Check if _file_ is of this particular type by looking for precise
179
+ # patterns (_magic_ numbers) in different locations of the file.
180
+ #
181
+ # _file_ must be an IO object opened with read permissions.
182
+ def match_file?(f)
183
+ @magics.any? {|m| m =~ f }
184
+ end
185
+
186
+ def initialize(type) # :nodoc:
187
+ @type = type
188
+ @glob_patterns = []
189
+ @magics = []
190
+ end
191
+ end
192
+
193
+ class << self
194
+ attr_reader :mime_dirs # :nodoc:
195
+
196
+ # Returns the MIME::Type object corresponding to _type_.
197
+ def [](type)
198
+ @types.fetch type, nil
199
+ end
200
+
201
+ # Look for the type of a file by doing successive checks on
202
+ # the filename patterns.
203
+ #
204
+ # Returns a MIME::Type object or _nil_ if nothing matches.
205
+ def check_globs(filename)
206
+ basename = File.basename(filename)
207
+ enum = Enumerable::Enumerator.new(@globs, :each_key)
208
+ found = enum.select { |pattern| File.fnmatch pattern, basename }
209
+
210
+ if found.empty?
211
+ downcase_basename = basename.downcase
212
+ found = enum.select { |pattern|
213
+ File.fnmatch pattern, downcase_basename
214
+ }
215
+ end
216
+
217
+ @globs[found.max]
218
+ end
219
+
220
+ # Look for the type of a file by doing successive checks on
221
+ # _magic_ numbers.
222
+ #
223
+ # Returns a MIME::Type object or _nil_ if nothing matches.
224
+ def check_magics(file)
225
+ if file.respond_to? :read
226
+ check_magics_type(file, @magics)
227
+ else
228
+ open(file) {|f| check_magics_type(f, @magics) }
229
+ end
230
+ end
231
+
232
+ # Look for the type of a file by doing successive checks with
233
+ # the filename patterns or magic numbers. If none of the matches
234
+ # are successful, returns a type of <b>application/octet-stream</b> if
235
+ # the file contains control characters at its beginning, or <b>text/plain</b> otherwise.
236
+ #
237
+ # Returns a MIME::Type object.
238
+ def check(filename)
239
+ check_special(filename) ||
240
+ open(filename) { |f|
241
+ check_magics_gt80(f) ||
242
+ check_globs(filename) ||
243
+ check_magics_lt80(f) ||
244
+ check_default(f)
245
+ }
246
+ end
247
+
248
+ private
249
+ def check_magics_type(f, set); c = set.find {|m| m =~ f} and MIME[c.type] end
250
+ def check_magics_gt80(f); check_magics_type(f, @magics_gt80) end
251
+ def check_magics_lt80(f); check_magics_type(f, @magics_lt80) end
252
+
253
+ def check_special(filename)
254
+ case File.ftype(filename)
255
+ when 'directory' then @types['inode/directory']
256
+ when 'characterSpecial' then @types['inode/chardevice']
257
+ when 'blockSpecial' then @types['inode/blockdevice']
258
+ when 'fifo' then @types['inode/fifo']
259
+ when 'socket' then @types['inode/socket']
260
+ else
261
+ nil
262
+ end
263
+ end
264
+
265
+ def check_default(f)
266
+ f.pos = 0
267
+ firsts = f.read(32) || ''
268
+ bytes = firsts.unpack('C*')
269
+ if bytes.any? {|byte| byte < 32 && ![9, 10, 13].include?(byte) }
270
+ @types['application/octet-stream']
271
+ else
272
+ @types['text/plain']
273
+ end
274
+ end
275
+
276
+ def load_globs(file)
277
+ open(file) { |f|
278
+ f.each { |line|
279
+ next if line =~ /^#/
280
+ cline = line.chomp
281
+ type, pattern = cline.split ':', 2
282
+ @types[type].glob_patterns << pattern
283
+ @globs[pattern] = @types[type] unless @globs.has_key? pattern
284
+ }
285
+ }
286
+ end
287
+
288
+ def load_magic(file)
289
+ @magics.concat Magic.parse_magic(File.read(file))
290
+ end
291
+ end
292
+
293
+ xdg_data_home = ENV['XDG_DATA_HOME'] || "#{ENV['HOME']}/.local/share"
294
+ xdg_data_dirs = ENV['XDG_DATA_DIRS'] || "/usr/local/share/:/usr/share"
295
+
296
+ @mime_dirs = (xdg_data_home + ':' + xdg_data_dirs).split(':').collect { |dir|
297
+ "#{dir}/mime"
298
+ }
299
+
300
+ @types = Hash.new {|h,k| h[k] = Type.new(k)}
301
+ @magics = []
302
+ @globs = {}
303
+
304
+ @mime_dirs.each {|dir|
305
+ glob_file = "#{dir}/globs"
306
+ load_globs glob_file if File.file? glob_file
307
+
308
+ magic_file = "#{dir}/magic"
309
+ load_magic magic_file if File.file? magic_file
310
+ }
311
+
312
+ @magics.sort! {|a,b| b.priority <=> a.priority}
313
+ @magics.each {|m| @types[m.type].magics << m}
314
+ @magics_gt80, @magics_lt80 = @magics.partition {|m| m.priority >= 80}
315
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hanklords-shared-mime-info
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - Mael Clerambault
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-02-15 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: mael@clerambault.fr
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/shared-mime-info.rb
26
+ - Rakefile
27
+ - LICENSE
28
+ - README.rdoc
29
+ has_rdoc: true
30
+ homepage:
31
+ post_install_message:
32
+ rdoc_options: []
33
+
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: "0"
41
+ version:
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ requirements: []
49
+
50
+ rubyforge_project:
51
+ rubygems_version: 1.2.0
52
+ signing_key:
53
+ specification_version: 2
54
+ summary: Library to guess the MIME type of a file with both filename lookup and magic file detection
55
+ test_files: []
56
+