content_type 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +42 -0
- data/ext/content_type.c +62 -3
- data/spec/content_type_spec.rb +20 -0
- metadata +7 -6
data/README
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
|
|
2
|
+
| ____,____, _, _,____,____,_, _,____,
|
3
|
+
| (-/ (-/ \(-|\ |(-| (-|_,(-|\ |(-|
|
4
|
+
| _\__,_\__/,_| \|,_|, _|__,_| \|,_|,
|
5
|
+
| ( ( ( ( ( ( (
|
6
|
+
| ____,_ _,____,____,
|
7
|
+
| (-| (-\_/(-|__|-|_,
|
8
|
+
| _|, _|, _| _|__,
|
9
|
+
| ( ( ( (
|
10
|
+
|
|
11
|
+
| libmagic bindings by dsturnbull
|
12
|
+
+------------------------------------------
|
13
|
+
|
14
|
+
SYNOPSIS
|
15
|
+
$ gem install content_type
|
16
|
+
irb> require 'content_type'
|
17
|
+
irb> File.content_type('file.pdf') #=> "application/pdf"
|
18
|
+
irb> File.open('file.doc').content_type #=> "application/msword"
|
19
|
+
irb> ContentType.new('file.jpg').content_type #=> "image/jpeg"
|
20
|
+
|
21
|
+
DESCRIPTION
|
22
|
+
ContentType is a simple C extension that binds to libmagic in order to
|
23
|
+
efficiently detect mime types of files.
|
24
|
+
|
25
|
+
Using the should_be_faster rspec extension,
|
26
|
+
ext.should be_at_least(5).times.faster_than(shell)
|
27
|
+
|
28
|
+
In addition to being fast, it is far more accurate than the current
|
29
|
+
practice of casing a regexp on file extensions.
|
30
|
+
|
31
|
+
DIAGNOSTICS
|
32
|
+
ContentType raises "ArgumentError: invalid file" for all errors relating to
|
33
|
+
the file it's working on.
|
34
|
+
|
35
|
+
ContentType raises "RuntimeError: open", "RuntimeError: load" and
|
36
|
+
"RuntimeError: file" for libmagic errors.
|
37
|
+
|
38
|
+
Any file that cannot be identified is simply said to be "data".
|
39
|
+
|
40
|
+
AVAILABILITY
|
41
|
+
http://github.com/dsturnbull/content_type
|
42
|
+
|
data/ext/content_type.c
CHANGED
@@ -2,8 +2,10 @@
|
|
2
2
|
#include <magic.h>
|
3
3
|
#include <stdio.h>
|
4
4
|
#include <sys/stat.h>
|
5
|
+
#include <stdbool.h>
|
5
6
|
|
6
7
|
#define MAGIC_OPTIONS MAGIC_SYMLINK | MAGIC_MIME_TYPE | MAGIC_PRESERVE_ATIME
|
8
|
+
#define MAX_EXT_LEN 16
|
7
9
|
|
8
10
|
VALUE content_type = Qnil;
|
9
11
|
VALUE content_type_initialize(VALUE self, VALUE path);
|
@@ -13,6 +15,27 @@ VALUE file_content_type_wrap(VALUE self, VALUE path);
|
|
13
15
|
VALUE file_content_type(VALUE self);
|
14
16
|
VALUE file_singleton_content_type(VALUE self, VALUE path);
|
15
17
|
|
18
|
+
// http://www.webdeveloper.com/forum/showthread.php?t=162526
|
19
|
+
const char *content_type_ext_overrides[][2] = {
|
20
|
+
{ "docm", "application/vnd.ms-word.document.macroEnabled.12" },
|
21
|
+
{ "docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document" },
|
22
|
+
{ "dotm", "application/vnd.ms-word.template.macroEnabled.12" },
|
23
|
+
{ "dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template" },
|
24
|
+
{ "potm", "application/vnd.ms-powerpoint.template.macroEnabled.12" },
|
25
|
+
{ "potx", "application/vnd.openxmlformats-officedocument.presentationml.template" },
|
26
|
+
{ "ppam", "application/vnd.ms-powerpoint.addin.macroEnabled.12" },
|
27
|
+
{ "ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12" },
|
28
|
+
{ "ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow" },
|
29
|
+
{ "pptm", "application/vnd.ms-powerpoint.presentation.macroEnabled.12" },
|
30
|
+
{ "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation" },
|
31
|
+
{ "xlam", "application/vnd.ms-excel.addin.macroEnabled.12" },
|
32
|
+
{ "xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12" },
|
33
|
+
{ "xlsm", "application/vnd.ms-excel.sheet.macroEnabled.12" },
|
34
|
+
{ "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" },
|
35
|
+
{ "xltm", "application/vnd.ms-excel.template.macroEnabled.12" },
|
36
|
+
{ "xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.template" },
|
37
|
+
};
|
38
|
+
|
16
39
|
void magic_fail(const char *error);
|
17
40
|
|
18
41
|
void
|
@@ -54,12 +77,48 @@ content_type_initialize(VALUE self, VALUE path)
|
|
54
77
|
return self;
|
55
78
|
}
|
56
79
|
|
80
|
+
bool
|
81
|
+
content_type_file_ext(VALUE self, char *ext)
|
82
|
+
{
|
83
|
+
char *filepath;
|
84
|
+
char t;
|
85
|
+
int i, j, k;
|
86
|
+
|
87
|
+
filepath = RSTRING_PTR(rb_iv_get(self, "@filepath"));
|
88
|
+
|
89
|
+
j = 0;
|
90
|
+
for (i = RSTRING_LEN(rb_iv_get(self, "@filepath")) - 1; i > 0 && j < MAX_EXT_LEN; i--) {
|
91
|
+
if (filepath[i] == '.') {
|
92
|
+
for (k = 0; k < j/2 ; k++) {
|
93
|
+
t = ext[j - 1 - k];
|
94
|
+
ext[j - 1 - k] = ext[k];
|
95
|
+
ext[k] = t;
|
96
|
+
}
|
97
|
+
return ext;
|
98
|
+
}
|
99
|
+
ext[j] = filepath[i];
|
100
|
+
j++;
|
101
|
+
}
|
102
|
+
|
103
|
+
return NULL;
|
104
|
+
}
|
105
|
+
|
57
106
|
VALUE
|
58
107
|
content_type_content_type(VALUE self)
|
59
108
|
{
|
60
|
-
VALUE
|
61
|
-
struct magic_set
|
62
|
-
const char
|
109
|
+
VALUE ct;
|
110
|
+
struct magic_set *mh;
|
111
|
+
const char *mime;
|
112
|
+
char ext[MAX_EXT_LEN]; // TODO dynamicly sized
|
113
|
+
int i;
|
114
|
+
|
115
|
+
if (content_type_file_ext(self, ext))
|
116
|
+
for (i = sizeof(content_type_ext_overrides) / sizeof(char *) / 2 - 1; i >= 0; i--)
|
117
|
+
if ((memcmp(ext, content_type_ext_overrides[i][0], strlen(content_type_ext_overrides[i][0]))) == 0) {
|
118
|
+
rb_iv_set(self, "@content_type", rb_str_new2(content_type_ext_overrides[i][1]));
|
119
|
+
rb_iv_set(self, "@processed", Qtrue);
|
120
|
+
return rb_iv_get(self, "@content_type");
|
121
|
+
}
|
63
122
|
|
64
123
|
if (rb_iv_get(self, "@processed"))
|
65
124
|
return rb_iv_get(self, "@content_type");
|
data/spec/content_type_spec.rb
CHANGED
@@ -8,6 +8,9 @@ describe ContentType do
|
|
8
8
|
@img = 'spec/fixtures/grindewald.jpg'
|
9
9
|
@pdf = 'spec/fixtures/pdftest.pdf'
|
10
10
|
@lzm = 'spec/fixtures/compressed.jpg.lz'
|
11
|
+
@dcx = 'spec/fixtures/wordtest.docx'
|
12
|
+
@dot = 'spec/fixtures/.supercabanafuntimekgozzzzzzzzzzzzzzzzzzzz'
|
13
|
+
@pdf_with_charset = 'spec/fixtures/bash.pdf'
|
11
14
|
end
|
12
15
|
|
13
16
|
context 'initialising' do
|
@@ -31,6 +34,18 @@ describe ContentType do
|
|
31
34
|
end
|
32
35
|
end
|
33
36
|
|
37
|
+
context 'file ext overrides' do
|
38
|
+
it 'should detect docx files' do
|
39
|
+
ct = ContentType.new(@dcx)
|
40
|
+
ct.content_type.should == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should handle dot files' do
|
44
|
+
ct = ContentType.new(@dot)
|
45
|
+
ct.content_type.should == 'text/plain'
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
34
49
|
context 'detecting mime type' do
|
35
50
|
it 'should detect images' do
|
36
51
|
ct = ContentType.new(@img)
|
@@ -42,6 +57,11 @@ describe ContentType do
|
|
42
57
|
ct.content_type.should == 'application/pdf'
|
43
58
|
end
|
44
59
|
|
60
|
+
it 'should detect mime types with charsets by ignoring the charset' do
|
61
|
+
ct = ContentType.new(@pdf_with_charset)
|
62
|
+
ct.content_type.should == 'application/pdf'
|
63
|
+
end
|
64
|
+
|
45
65
|
it 'should detect lzma files' do
|
46
66
|
ct = ContentType.new(@lzm)
|
47
67
|
ct.content_type.should == 'application/x-lzip'
|
metadata
CHANGED
@@ -3,10 +3,10 @@ name: content_type
|
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
|
-
-
|
7
|
-
- 1
|
6
|
+
- 2
|
8
7
|
- 0
|
9
|
-
|
8
|
+
- 0
|
9
|
+
version: 2.0.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Turnbull
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-04-08 00:00:00 +10:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -24,12 +24,13 @@ executables: []
|
|
24
24
|
|
25
25
|
extensions:
|
26
26
|
- ext/extconf.rb
|
27
|
-
extra_rdoc_files:
|
28
|
-
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README
|
29
29
|
files:
|
30
30
|
- Rakefile
|
31
31
|
- ext/content_type.c
|
32
32
|
- ext/extconf.rb
|
33
|
+
- README
|
33
34
|
has_rdoc: true
|
34
35
|
homepage: http://github.com/dsturnbull/content_type
|
35
36
|
licenses: []
|