content_type 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +42 -0
- data/ext/content_type.c +62 -3
- data/spec/content_type_spec.rb +20 -0
- metadata +7 -6
data/README
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
|
|
2
|
+
| ____,____, _, _,____,____,_, _,____,
|
3
|
+
| (-/ (-/ \(-|\ |(-| (-|_,(-|\ |(-|
|
4
|
+
| _\__,_\__/,_| \|,_|, _|__,_| \|,_|,
|
5
|
+
| ( ( ( ( ( ( (
|
6
|
+
| ____,_ _,____,____,
|
7
|
+
| (-| (-\_/(-|__|-|_,
|
8
|
+
| _|, _|, _| _|__,
|
9
|
+
| ( ( ( (
|
10
|
+
|
|
11
|
+
| libmagic bindings by dsturnbull
|
12
|
+
+------------------------------------------
|
13
|
+
|
14
|
+
SYNOPSIS
|
15
|
+
$ gem install content_type
|
16
|
+
irb> require 'content_type'
|
17
|
+
irb> File.content_type('file.pdf') #=> "application/pdf"
|
18
|
+
irb> File.open('file.doc').content_type #=> "application/msword"
|
19
|
+
irb> ContentType.new('file.jpg').content_type #=> "image/jpeg"
|
20
|
+
|
21
|
+
DESCRIPTION
|
22
|
+
ContentType is a simple C extension that binds to libmagic in order to
|
23
|
+
efficiently detect mime types of files.
|
24
|
+
|
25
|
+
Using the should_be_faster rspec extension,
|
26
|
+
ext.should be_at_least(5).times.faster_than(shell)
|
27
|
+
|
28
|
+
In addition to being fast, it is far more accurate than the current
|
29
|
+
practice of casing a regexp on file extensions.
|
30
|
+
|
31
|
+
DIAGNOSTICS
|
32
|
+
ContentType raises "ArgumentError: invalid file" for all errors relating to
|
33
|
+
the file it's working on.
|
34
|
+
|
35
|
+
ContentType raises "RuntimeError: open", "RuntimeError: load" and
|
36
|
+
"RuntimeError: file" for libmagic errors.
|
37
|
+
|
38
|
+
Any file that cannot be identified is simply said to be "data".
|
39
|
+
|
40
|
+
AVAILABILITY
|
41
|
+
http://github.com/dsturnbull/content_type
|
42
|
+
|
data/ext/content_type.c
CHANGED
@@ -2,8 +2,10 @@
|
|
2
2
|
#include <magic.h>
|
3
3
|
#include <stdio.h>
|
4
4
|
#include <sys/stat.h>
|
5
|
+
#include <stdbool.h>
|
5
6
|
|
6
7
|
#define MAGIC_OPTIONS MAGIC_SYMLINK | MAGIC_MIME_TYPE | MAGIC_PRESERVE_ATIME
|
8
|
+
#define MAX_EXT_LEN 16
|
7
9
|
|
8
10
|
VALUE content_type = Qnil;
|
9
11
|
VALUE content_type_initialize(VALUE self, VALUE path);
|
@@ -13,6 +15,27 @@ VALUE file_content_type_wrap(VALUE self, VALUE path);
|
|
13
15
|
VALUE file_content_type(VALUE self);
|
14
16
|
VALUE file_singleton_content_type(VALUE self, VALUE path);
|
15
17
|
|
18
|
+
// http://www.webdeveloper.com/forum/showthread.php?t=162526
|
19
|
+
const char *content_type_ext_overrides[][2] = {
|
20
|
+
{ "docm", "application/vnd.ms-word.document.macroEnabled.12" },
|
21
|
+
{ "docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document" },
|
22
|
+
{ "dotm", "application/vnd.ms-word.template.macroEnabled.12" },
|
23
|
+
{ "dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template" },
|
24
|
+
{ "potm", "application/vnd.ms-powerpoint.template.macroEnabled.12" },
|
25
|
+
{ "potx", "application/vnd.openxmlformats-officedocument.presentationml.template" },
|
26
|
+
{ "ppam", "application/vnd.ms-powerpoint.addin.macroEnabled.12" },
|
27
|
+
{ "ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12" },
|
28
|
+
{ "ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow" },
|
29
|
+
{ "pptm", "application/vnd.ms-powerpoint.presentation.macroEnabled.12" },
|
30
|
+
{ "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation" },
|
31
|
+
{ "xlam", "application/vnd.ms-excel.addin.macroEnabled.12" },
|
32
|
+
{ "xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12" },
|
33
|
+
{ "xlsm", "application/vnd.ms-excel.sheet.macroEnabled.12" },
|
34
|
+
{ "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" },
|
35
|
+
{ "xltm", "application/vnd.ms-excel.template.macroEnabled.12" },
|
36
|
+
{ "xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.template" },
|
37
|
+
};
|
38
|
+
|
16
39
|
void magic_fail(const char *error);
|
17
40
|
|
18
41
|
void
|
@@ -54,12 +77,48 @@ content_type_initialize(VALUE self, VALUE path)
|
|
54
77
|
return self;
|
55
78
|
}
|
56
79
|
|
80
|
+
bool
|
81
|
+
content_type_file_ext(VALUE self, char *ext)
|
82
|
+
{
|
83
|
+
char *filepath;
|
84
|
+
char t;
|
85
|
+
int i, j, k;
|
86
|
+
|
87
|
+
filepath = RSTRING_PTR(rb_iv_get(self, "@filepath"));
|
88
|
+
|
89
|
+
j = 0;
|
90
|
+
for (i = RSTRING_LEN(rb_iv_get(self, "@filepath")) - 1; i > 0 && j < MAX_EXT_LEN; i--) {
|
91
|
+
if (filepath[i] == '.') {
|
92
|
+
for (k = 0; k < j/2 ; k++) {
|
93
|
+
t = ext[j - 1 - k];
|
94
|
+
ext[j - 1 - k] = ext[k];
|
95
|
+
ext[k] = t;
|
96
|
+
}
|
97
|
+
return ext;
|
98
|
+
}
|
99
|
+
ext[j] = filepath[i];
|
100
|
+
j++;
|
101
|
+
}
|
102
|
+
|
103
|
+
return NULL;
|
104
|
+
}
|
105
|
+
|
57
106
|
VALUE
|
58
107
|
content_type_content_type(VALUE self)
|
59
108
|
{
|
60
|
-
VALUE
|
61
|
-
struct magic_set
|
62
|
-
const char
|
109
|
+
VALUE ct;
|
110
|
+
struct magic_set *mh;
|
111
|
+
const char *mime;
|
112
|
+
char ext[MAX_EXT_LEN]; // TODO dynamicly sized
|
113
|
+
int i;
|
114
|
+
|
115
|
+
if (content_type_file_ext(self, ext))
|
116
|
+
for (i = sizeof(content_type_ext_overrides) / sizeof(char *) / 2 - 1; i >= 0; i--)
|
117
|
+
if ((memcmp(ext, content_type_ext_overrides[i][0], strlen(content_type_ext_overrides[i][0]))) == 0) {
|
118
|
+
rb_iv_set(self, "@content_type", rb_str_new2(content_type_ext_overrides[i][1]));
|
119
|
+
rb_iv_set(self, "@processed", Qtrue);
|
120
|
+
return rb_iv_get(self, "@content_type");
|
121
|
+
}
|
63
122
|
|
64
123
|
if (rb_iv_get(self, "@processed"))
|
65
124
|
return rb_iv_get(self, "@content_type");
|
data/spec/content_type_spec.rb
CHANGED
@@ -8,6 +8,9 @@ describe ContentType do
|
|
8
8
|
@img = 'spec/fixtures/grindewald.jpg'
|
9
9
|
@pdf = 'spec/fixtures/pdftest.pdf'
|
10
10
|
@lzm = 'spec/fixtures/compressed.jpg.lz'
|
11
|
+
@dcx = 'spec/fixtures/wordtest.docx'
|
12
|
+
@dot = 'spec/fixtures/.supercabanafuntimekgozzzzzzzzzzzzzzzzzzzz'
|
13
|
+
@pdf_with_charset = 'spec/fixtures/bash.pdf'
|
11
14
|
end
|
12
15
|
|
13
16
|
context 'initialising' do
|
@@ -31,6 +34,18 @@ describe ContentType do
|
|
31
34
|
end
|
32
35
|
end
|
33
36
|
|
37
|
+
context 'file ext overrides' do
|
38
|
+
it 'should detect docx files' do
|
39
|
+
ct = ContentType.new(@dcx)
|
40
|
+
ct.content_type.should == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should handle dot files' do
|
44
|
+
ct = ContentType.new(@dot)
|
45
|
+
ct.content_type.should == 'text/plain'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
34
49
|
context 'detecting mime type' do
|
35
50
|
it 'should detect images' do
|
36
51
|
ct = ContentType.new(@img)
|
@@ -42,6 +57,11 @@ describe ContentType do
|
|
42
57
|
ct.content_type.should == 'application/pdf'
|
43
58
|
end
|
44
59
|
|
60
|
+
it 'should detect mime types with charsets by ignoring the charset' do
|
61
|
+
ct = ContentType.new(@pdf_with_charset)
|
62
|
+
ct.content_type.should == 'application/pdf'
|
63
|
+
end
|
64
|
+
|
45
65
|
it 'should detect lzma files' do
|
46
66
|
ct = ContentType.new(@lzm)
|
47
67
|
ct.content_type.should == 'application/x-lzip'
|
metadata
CHANGED
@@ -3,10 +3,10 @@ name: content_type
|
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
|
-
-
|
7
|
-
- 1
|
6
|
+
- 2
|
8
7
|
- 0
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 2.0.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Turnbull
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-04-08 00:00:00 +10:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -24,12 +24,13 @@ executables: []
|
|
24
24
|
|
25
25
|
extensions:
|
26
26
|
- ext/extconf.rb
|
27
|
-
extra_rdoc_files:
|
28
|
-
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README
|
29
29
|
files:
|
30
30
|
- Rakefile
|
31
31
|
- ext/content_type.c
|
32
32
|
- ext/extconf.rb
|
33
|
+
- README
|
33
34
|
has_rdoc: true
|
34
35
|
homepage: http://github.com/dsturnbull/content_type
|
35
36
|
licenses: []
|