RubyGems - content_type - Versions diffs - 1.1.0 → 2.0.0 - Mend

content_type 1.1.0 → 2.0.0

Files changed (4) hide show

data/README ADDED Viewed

@@ -0,0 +1,42 @@
+|
+|  ____,____, _,  _,____,____,_,  _,____,
+| (-/  (-/  \(-|\ |(-|  (-|_,(-|\ |(-|
+|  _\__,_\__/,_| \|,_|,  _|__,_| \|,_|,
+| (    (     (     (    (    (     (
+|               ____,_  _,____,____,
+|              (-|  (-\_/(-|__|-|_,
+|               _|,   _|, _|   _|__,
+|              (     (   (    (
+|
+|       libmagic bindings by dsturnbull
++------------------------------------------
+SYNOPSIS
+     $ gem install content_type
+     irb> require 'content_type'
+     irb> File.content_type('file.pdf')             #=> "application/pdf"
+     irb> File.open('file.doc').content_type        #=> "application/msword"
+     irb> ContentType.new('file.jpg').content_type  #=> "image/jpeg"
+DESCRIPTION
+     ContentType is a simple C extension that binds to libmagic in order to
+     efficiently detect mime types of files.
+     Using the should_be_faster rspec extension,
+       ext.should be_at_least(5).times.faster_than(shell)
+     In addition to being fast, it is far more accurate than the current
+     practice of casing a regexp on file extensions.
+DIAGNOSTICS
+     ContentType raises "ArgumentError: invalid file" for all errors relating to
+     the file it's working on.
+     ContentType raises "RuntimeError: open", "RuntimeError: load" and
+     "RuntimeError: file" for libmagic errors.
+     Any file that cannot be identified is simply said to be "data".
+AVAILABILITY
+     http://github.com/dsturnbull/content_type

data/ext/content_type.c CHANGED Viewed

@@ -2,8 +2,10 @@
 #include <magic.h>
 #include <stdio.h>
 #include <sys/stat.h>
+#include <stdbool.h>
 #define MAGIC_OPTIONS MAGIC_SYMLINK | MAGIC_MIME_TYPE | MAGIC_PRESERVE_ATIME
+#define MAX_EXT_LEN 16
 VALUE content_type = Qnil;
 VALUE content_type_initialize(VALUE self, VALUE path);
@@ -13,6 +15,27 @@ VALUE file_content_type_wrap(VALUE self, VALUE path);
 VALUE file_content_type(VALUE self);
 VALUE file_singleton_content_type(VALUE self, VALUE path);
+// http://www.webdeveloper.com/forum/showthread.php?t=162526
+const char *content_type_ext_overrides[][2] = {
+	{ "docm", "application/vnd.ms-word.document.macroEnabled.12" },
+	{ "docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document" },
+	{ "dotm", "application/vnd.ms-word.template.macroEnabled.12" },
+	{ "dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template" },
+	{ "potm", "application/vnd.ms-powerpoint.template.macroEnabled.12" },
+	{ "potx", "application/vnd.openxmlformats-officedocument.presentationml.template" },
+	{ "ppam", "application/vnd.ms-powerpoint.addin.macroEnabled.12" },
+	{ "ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12" },
+	{ "ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow" },
+	{ "pptm", "application/vnd.ms-powerpoint.presentation.macroEnabled.12" },
+	{ "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation" },
+	{ "xlam", "application/vnd.ms-excel.addin.macroEnabled.12" },
+	{ "xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12" },
+	{ "xlsm", "application/vnd.ms-excel.sheet.macroEnabled.12" },
+	{ "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" },
+	{ "xltm", "application/vnd.ms-excel.template.macroEnabled.12" },
+	{ "xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.template" },
+};
 void magic_fail(const char *error);
 void
@@ -54,12 +77,48 @@ content_type_initialize(VALUE self, VALUE path)
     return self;
 }
+bool
+content_type_file_ext(VALUE self, char *ext)
+{
+	char	*filepath;
+	char	t;
+	int		i, j, k;
+	filepath = RSTRING_PTR(rb_iv_get(self, "@filepath"));
+	j = 0;
+	for (i = RSTRING_LEN(rb_iv_get(self, "@filepath")) - 1; i > 0 && j < MAX_EXT_LEN; i--) {
+		if (filepath[i] == '.') {
+			for (k = 0; k < j/2 ; k++) {
+				t = ext[j - 1 - k];
+				ext[j - 1 - k] = ext[k];
+				ext[k] = t;
+			}
+			return ext;
+		}
+		ext[j] = filepath[i];
+		j++;
+	}
+	return NULL;
+}
 VALUE
 content_type_content_type(VALUE self)
 {
-    VALUE            ct;
-    struct magic_set *mh;
-    const char       *mime;
+    VALUE				ct;
+    struct magic_set	*mh;
+    const char			*mime;
+	char				ext[MAX_EXT_LEN];	// TODO dynamicly sized
+	int					i;
+	if (content_type_file_ext(self, ext))
+		for (i = sizeof(content_type_ext_overrides) / sizeof(char *) / 2 - 1; i >= 0; i--)
+			if ((memcmp(ext, content_type_ext_overrides[i][0], strlen(content_type_ext_overrides[i][0]))) == 0) {
+				rb_iv_set(self, "@content_type", rb_str_new2(content_type_ext_overrides[i][1]));
+				rb_iv_set(self, "@processed", Qtrue);
+				return rb_iv_get(self, "@content_type");
+			}
     if (rb_iv_get(self, "@processed"))
         return rb_iv_get(self, "@content_type");

data/spec/content_type_spec.rb CHANGED Viewed

@@ -8,6 +8,9 @@ describe ContentType do
     @img = 'spec/fixtures/grindewald.jpg'
     @pdf = 'spec/fixtures/pdftest.pdf'
     @lzm = 'spec/fixtures/compressed.jpg.lz'
+    @dcx = 'spec/fixtures/wordtest.docx'
+    @dot = 'spec/fixtures/.supercabanafuntimekgozzzzzzzzzzzzzzzzzzzz'
+    @pdf_with_charset = 'spec/fixtures/bash.pdf'
   end
   context 'initialising' do
@@ -31,6 +34,18 @@ describe ContentType do
     end
   end
+  context 'file ext overrides' do
+    it 'should detect docx files' do
+      ct = ContentType.new(@dcx)
+      ct.content_type.should == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+    end
+    it 'should handle dot files' do
+      ct = ContentType.new(@dot)
+      ct.content_type.should == 'text/plain'
+    end
+  end
   context 'detecting mime type' do
     it 'should detect images' do
       ct = ContentType.new(@img)
@@ -42,6 +57,11 @@ describe ContentType do
       ct.content_type.should == 'application/pdf'
     end
+    it 'should detect mime types with charsets by ignoring the charset' do
+      ct = ContentType.new(@pdf_with_charset)
+      ct.content_type.should == 'application/pdf'
+    end
     it 'should detect lzma files' do
       ct = ContentType.new(@lzm)
       ct.content_type.should == 'application/x-lzip'

metadata CHANGED Viewed

@@ -3,10 +3,10 @@ name: content_type
 version: !ruby/object:Gem::Version
   prerelease: false
   segments:
-  - 1
-  - 1
+  - 2
   - 0
-  version: 1.1.0
+  - 0
+  version: 2.0.0
 platform: ruby
 authors:
 - David Turnbull
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-03-16 00:00:00 +11:00
+date: 2010-04-08 00:00:00 +10:00
 default_executable:
 dependencies: []
@@ -24,12 +24,13 @@ executables: []
 extensions:
 - ext/extconf.rb
-extra_rdoc_files: []
+extra_rdoc_files:
+- README
 files:
 - Rakefile
 - ext/content_type.c
 - ext/extconf.rb
+- README
 has_rdoc: true
 homepage: http://github.com/dsturnbull/content_type
 licenses: []