content_type 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,42 @@
1
+ |
2
+ | ____,____, _, _,____,____,_, _,____,
3
+ | (-/ (-/ \(-|\ |(-| (-|_,(-|\ |(-|
4
+ | _\__,_\__/,_| \|,_|, _|__,_| \|,_|,
5
+ | ( ( ( ( ( ( (
6
+ | ____,_ _,____,____,
7
+ | (-| (-\_/(-|__|-|_,
8
+ | _|, _|, _| _|__,
9
+ | ( ( ( (
10
+ |
11
+ | libmagic bindings by dsturnbull
12
+ +------------------------------------------
13
+
14
+ SYNOPSIS
15
+ $ gem install content_type
16
+ irb> require 'content_type'
17
+ irb> File.content_type('file.pdf') #=> "application/pdf"
18
+ irb> File.open('file.doc').content_type #=> "application/msword"
19
+ irb> ContentType.new('file.jpg').content_type #=> "image/jpeg"
20
+
21
+ DESCRIPTION
22
+ ContentType is a simple C extension that binds to libmagic in order to
23
+ efficiently detect mime types of files.
24
+
25
+ Using the should_be_faster rspec extension,
26
+ ext.should be_at_least(5).times.faster_than(shell)
27
+
28
+ In addition to being fast, it is far more accurate than the current
29
+ practice of casing a regexp on file extensions.
30
+
31
+ DIAGNOSTICS
32
+ ContentType raises "ArgumentError: invalid file" for all errors relating to
33
+ the file it's working on.
34
+
35
+ ContentType raises "RuntimeError: open", "RuntimeError: load" and
36
+ "RuntimeError: file" for libmagic errors.
37
+
38
+ Any file that cannot be identified is simply said to be "data".
39
+
40
+ AVAILABILITY
41
+ http://github.com/dsturnbull/content_type
42
+
data/ext/content_type.c CHANGED
@@ -2,8 +2,10 @@
2
2
  #include <magic.h>
3
3
  #include <stdio.h>
4
4
  #include <sys/stat.h>
5
+ #include <stdbool.h>
5
6
 
6
7
  #define MAGIC_OPTIONS MAGIC_SYMLINK | MAGIC_MIME_TYPE | MAGIC_PRESERVE_ATIME
8
+ #define MAX_EXT_LEN 16
7
9
 
8
10
  VALUE content_type = Qnil;
9
11
  VALUE content_type_initialize(VALUE self, VALUE path);
@@ -13,6 +15,27 @@ VALUE file_content_type_wrap(VALUE self, VALUE path);
13
15
  VALUE file_content_type(VALUE self);
14
16
  VALUE file_singleton_content_type(VALUE self, VALUE path);
15
17
 
18
+ // http://www.webdeveloper.com/forum/showthread.php?t=162526
19
+ const char *content_type_ext_overrides[][2] = {
20
+ { "docm", "application/vnd.ms-word.document.macroEnabled.12" },
21
+ { "docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document" },
22
+ { "dotm", "application/vnd.ms-word.template.macroEnabled.12" },
23
+ { "dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template" },
24
+ { "potm", "application/vnd.ms-powerpoint.template.macroEnabled.12" },
25
+ { "potx", "application/vnd.openxmlformats-officedocument.presentationml.template" },
26
+ { "ppam", "application/vnd.ms-powerpoint.addin.macroEnabled.12" },
27
+ { "ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12" },
28
+ { "ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow" },
29
+ { "pptm", "application/vnd.ms-powerpoint.presentation.macroEnabled.12" },
30
+ { "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation" },
31
+ { "xlam", "application/vnd.ms-excel.addin.macroEnabled.12" },
32
+ { "xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12" },
33
+ { "xlsm", "application/vnd.ms-excel.sheet.macroEnabled.12" },
34
+ { "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" },
35
+ { "xltm", "application/vnd.ms-excel.template.macroEnabled.12" },
36
+ { "xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.template" },
37
+ };
38
+
16
39
  void magic_fail(const char *error);
17
40
 
18
41
  void
@@ -54,12 +77,48 @@ content_type_initialize(VALUE self, VALUE path)
54
77
  return self;
55
78
  }
56
79
 
80
+ bool
81
+ content_type_file_ext(VALUE self, char *ext)
82
+ {
83
+ char *filepath;
84
+ char t;
85
+ int i, j, k;
86
+
87
+ filepath = RSTRING_PTR(rb_iv_get(self, "@filepath"));
88
+
89
+ j = 0;
90
+ for (i = RSTRING_LEN(rb_iv_get(self, "@filepath")) - 1; i > 0 && j < MAX_EXT_LEN; i--) {
91
+ if (filepath[i] == '.') {
92
+ for (k = 0; k < j/2 ; k++) {
93
+ t = ext[j - 1 - k];
94
+ ext[j - 1 - k] = ext[k];
95
+ ext[k] = t;
96
+ }
97
+ return ext;
98
+ }
99
+ ext[j] = filepath[i];
100
+ j++;
101
+ }
102
+
103
+ return NULL;
104
+ }
105
+
57
106
  VALUE
58
107
  content_type_content_type(VALUE self)
59
108
  {
60
- VALUE ct;
61
- struct magic_set *mh;
62
- const char *mime;
109
+ VALUE ct;
110
+ struct magic_set *mh;
111
+ const char *mime;
112
+ char ext[MAX_EXT_LEN]; // TODO dynamicly sized
113
+ int i;
114
+
115
+ if (content_type_file_ext(self, ext))
116
+ for (i = sizeof(content_type_ext_overrides) / sizeof(char *) / 2 - 1; i >= 0; i--)
117
+ if ((memcmp(ext, content_type_ext_overrides[i][0], strlen(content_type_ext_overrides[i][0]))) == 0) {
118
+ rb_iv_set(self, "@content_type", rb_str_new2(content_type_ext_overrides[i][1]));
119
+ rb_iv_set(self, "@processed", Qtrue);
120
+ return rb_iv_get(self, "@content_type");
121
+ }
63
122
 
64
123
  if (rb_iv_get(self, "@processed"))
65
124
  return rb_iv_get(self, "@content_type");
@@ -8,6 +8,9 @@ describe ContentType do
8
8
  @img = 'spec/fixtures/grindewald.jpg'
9
9
  @pdf = 'spec/fixtures/pdftest.pdf'
10
10
  @lzm = 'spec/fixtures/compressed.jpg.lz'
11
+ @dcx = 'spec/fixtures/wordtest.docx'
12
+ @dot = 'spec/fixtures/.supercabanafuntimekgozzzzzzzzzzzzzzzzzzzz'
13
+ @pdf_with_charset = 'spec/fixtures/bash.pdf'
11
14
  end
12
15
 
13
16
  context 'initialising' do
@@ -31,6 +34,18 @@ describe ContentType do
31
34
  end
32
35
  end
33
36
 
37
+ context 'file ext overrides' do
38
+ it 'should detect docx files' do
39
+ ct = ContentType.new(@dcx)
40
+ ct.content_type.should == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
41
+ end
42
+
43
+ it 'should handle dot files' do
44
+ ct = ContentType.new(@dot)
45
+ ct.content_type.should == 'text/plain'
46
+ end
47
+ end
48
+
34
49
  context 'detecting mime type' do
35
50
  it 'should detect images' do
36
51
  ct = ContentType.new(@img)
@@ -42,6 +57,11 @@ describe ContentType do
42
57
  ct.content_type.should == 'application/pdf'
43
58
  end
44
59
 
60
+ it 'should detect mime types with charsets by ignoring the charset' do
61
+ ct = ContentType.new(@pdf_with_charset)
62
+ ct.content_type.should == 'application/pdf'
63
+ end
64
+
45
65
  it 'should detect lzma files' do
46
66
  ct = ContentType.new(@lzm)
47
67
  ct.content_type.should == 'application/x-lzip'
metadata CHANGED
@@ -3,10 +3,10 @@ name: content_type
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
- - 1
7
- - 1
6
+ - 2
8
7
  - 0
9
- version: 1.1.0
8
+ - 0
9
+ version: 2.0.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Turnbull
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-03-16 00:00:00 +11:00
17
+ date: 2010-04-08 00:00:00 +10:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -24,12 +24,13 @@ executables: []
24
24
 
25
25
  extensions:
26
26
  - ext/extconf.rb
27
- extra_rdoc_files: []
28
-
27
+ extra_rdoc_files:
28
+ - README
29
29
  files:
30
30
  - Rakefile
31
31
  - ext/content_type.c
32
32
  - ext/extconf.rb
33
+ - README
33
34
  has_rdoc: true
34
35
  homepage: http://github.com/dsturnbull/content_type
35
36
  licenses: []