content_type 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,42 @@
1
+ |
2
+ | ____,____, _, _,____,____,_, _,____,
3
+ | (-/ (-/ \(-|\ |(-| (-|_,(-|\ |(-|
4
+ | _\__,_\__/,_| \|,_|, _|__,_| \|,_|,
5
+ | ( ( ( ( ( ( (
6
+ | ____,_ _,____,____,
7
+ | (-| (-\_/(-|__|-|_,
8
+ | _|, _|, _| _|__,
9
+ | ( ( ( (
10
+ |
11
+ | libmagic bindings by dsturnbull
12
+ +------------------------------------------
13
+
14
+ SYNOPSIS
15
+ $ gem install content_type
16
+ irb> require 'content_type'
17
+ irb> File.content_type('file.pdf') #=> "application/pdf"
18
+ irb> File.open('file.doc').content_type #=> "application/msword"
19
+ irb> ContentType.new('file.jpg').content_type #=> "image/jpeg"
20
+
21
+ DESCRIPTION
22
+ ContentType is a simple C extension that binds to libmagic in order to
23
+ efficiently detect mime types of files.
24
+
25
+ Using the should_be_faster rspec extension,
26
+ ext.should be_at_least(5).times.faster_than(shell)
27
+
28
+ In addition to being fast, it is far more accurate than the current
29
+ practice of casing a regexp on file extensions.
30
+
31
+ DIAGNOSTICS
32
+ ContentType raises "ArgumentError: invalid file" for all errors relating to
33
+ the file it's working on.
34
+
35
+ ContentType raises "RuntimeError: open", "RuntimeError: load" and
36
+ "RuntimeError: file" for libmagic errors.
37
+
38
+ Any file that cannot be identified is simply said to be "data".
39
+
40
+ AVAILABILITY
41
+ http://github.com/dsturnbull/content_type
42
+
data/ext/content_type.c CHANGED
@@ -2,8 +2,10 @@
2
2
  #include <magic.h>
3
3
  #include <stdio.h>
4
4
  #include <sys/stat.h>
5
+ #include <stdbool.h>
5
6
 
6
7
  #define MAGIC_OPTIONS MAGIC_SYMLINK | MAGIC_MIME_TYPE | MAGIC_PRESERVE_ATIME
8
+ #define MAX_EXT_LEN 16
7
9
 
8
10
  VALUE content_type = Qnil;
9
11
  VALUE content_type_initialize(VALUE self, VALUE path);
@@ -13,6 +15,27 @@ VALUE file_content_type_wrap(VALUE self, VALUE path);
13
15
  VALUE file_content_type(VALUE self);
14
16
  VALUE file_singleton_content_type(VALUE self, VALUE path);
15
17
 
18
+ // http://www.webdeveloper.com/forum/showthread.php?t=162526
19
+ const char *content_type_ext_overrides[][2] = {
20
+ { "docm", "application/vnd.ms-word.document.macroEnabled.12" },
21
+ { "docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document" },
22
+ { "dotm", "application/vnd.ms-word.template.macroEnabled.12" },
23
+ { "dotx", "application/vnd.openxmlformats-officedocument.wordprocessingml.template" },
24
+ { "potm", "application/vnd.ms-powerpoint.template.macroEnabled.12" },
25
+ { "potx", "application/vnd.openxmlformats-officedocument.presentationml.template" },
26
+ { "ppam", "application/vnd.ms-powerpoint.addin.macroEnabled.12" },
27
+ { "ppsm", "application/vnd.ms-powerpoint.slideshow.macroEnabled.12" },
28
+ { "ppsx", "application/vnd.openxmlformats-officedocument.presentationml.slideshow" },
29
+ { "pptm", "application/vnd.ms-powerpoint.presentation.macroEnabled.12" },
30
+ { "pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation" },
31
+ { "xlam", "application/vnd.ms-excel.addin.macroEnabled.12" },
32
+ { "xlsb", "application/vnd.ms-excel.sheet.binary.macroEnabled.12" },
33
+ { "xlsm", "application/vnd.ms-excel.sheet.macroEnabled.12" },
34
+ { "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" },
35
+ { "xltm", "application/vnd.ms-excel.template.macroEnabled.12" },
36
+ { "xltx", "application/vnd.openxmlformats-officedocument.spreadsheetml.template" },
37
+ };
38
+
16
39
  void magic_fail(const char *error);
17
40
 
18
41
  void
@@ -54,12 +77,48 @@ content_type_initialize(VALUE self, VALUE path)
54
77
  return self;
55
78
  }
56
79
 
80
+ bool
81
+ content_type_file_ext(VALUE self, char *ext)
82
+ {
83
+ char *filepath;
84
+ char t;
85
+ int i, j, k;
86
+
87
+ filepath = RSTRING_PTR(rb_iv_get(self, "@filepath"));
88
+
89
+ j = 0;
90
+ for (i = RSTRING_LEN(rb_iv_get(self, "@filepath")) - 1; i > 0 && j < MAX_EXT_LEN; i--) {
91
+ if (filepath[i] == '.') {
92
+ for (k = 0; k < j/2 ; k++) {
93
+ t = ext[j - 1 - k];
94
+ ext[j - 1 - k] = ext[k];
95
+ ext[k] = t;
96
+ }
97
+ return ext;
98
+ }
99
+ ext[j] = filepath[i];
100
+ j++;
101
+ }
102
+
103
+ return NULL;
104
+ }
105
+
57
106
  VALUE
58
107
  content_type_content_type(VALUE self)
59
108
  {
60
- VALUE ct;
61
- struct magic_set *mh;
62
- const char *mime;
109
+ VALUE ct;
110
+ struct magic_set *mh;
111
+ const char *mime;
112
+ char ext[MAX_EXT_LEN]; // TODO dynamicly sized
113
+ int i;
114
+
115
+ if (content_type_file_ext(self, ext))
116
+ for (i = sizeof(content_type_ext_overrides) / sizeof(char *) / 2 - 1; i >= 0; i--)
117
+ if ((memcmp(ext, content_type_ext_overrides[i][0], strlen(content_type_ext_overrides[i][0]))) == 0) {
118
+ rb_iv_set(self, "@content_type", rb_str_new2(content_type_ext_overrides[i][1]));
119
+ rb_iv_set(self, "@processed", Qtrue);
120
+ return rb_iv_get(self, "@content_type");
121
+ }
63
122
 
64
123
  if (rb_iv_get(self, "@processed"))
65
124
  return rb_iv_get(self, "@content_type");
@@ -8,6 +8,9 @@ describe ContentType do
8
8
  @img = 'spec/fixtures/grindewald.jpg'
9
9
  @pdf = 'spec/fixtures/pdftest.pdf'
10
10
  @lzm = 'spec/fixtures/compressed.jpg.lz'
11
+ @dcx = 'spec/fixtures/wordtest.docx'
12
+ @dot = 'spec/fixtures/.supercabanafuntimekgozzzzzzzzzzzzzzzzzzzz'
13
+ @pdf_with_charset = 'spec/fixtures/bash.pdf'
11
14
  end
12
15
 
13
16
  context 'initialising' do
@@ -31,6 +34,18 @@ describe ContentType do
31
34
  end
32
35
  end
33
36
 
37
+ context 'file ext overrides' do
38
+ it 'should detect docx files' do
39
+ ct = ContentType.new(@dcx)
40
+ ct.content_type.should == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
41
+ end
42
+
43
+ it 'should handle dot files' do
44
+ ct = ContentType.new(@dot)
45
+ ct.content_type.should == 'text/plain'
46
+ end
47
+ end
48
+
34
49
  context 'detecting mime type' do
35
50
  it 'should detect images' do
36
51
  ct = ContentType.new(@img)
@@ -42,6 +57,11 @@ describe ContentType do
42
57
  ct.content_type.should == 'application/pdf'
43
58
  end
44
59
 
60
+ it 'should detect mime types with charsets by ignoring the charset' do
61
+ ct = ContentType.new(@pdf_with_charset)
62
+ ct.content_type.should == 'application/pdf'
63
+ end
64
+
45
65
  it 'should detect lzma files' do
46
66
  ct = ContentType.new(@lzm)
47
67
  ct.content_type.should == 'application/x-lzip'
metadata CHANGED
@@ -3,10 +3,10 @@ name: content_type
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
- - 1
7
- - 1
6
+ - 2
8
7
  - 0
9
- version: 1.1.0
8
+ - 0
9
+ version: 2.0.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - David Turnbull
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-03-16 00:00:00 +11:00
17
+ date: 2010-04-08 00:00:00 +10:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -24,12 +24,13 @@ executables: []
24
24
 
25
25
  extensions:
26
26
  - ext/extconf.rb
27
- extra_rdoc_files: []
28
-
27
+ extra_rdoc_files:
28
+ - README
29
29
  files:
30
30
  - Rakefile
31
31
  - ext/content_type.c
32
32
  - ext/extconf.rb
33
+ - README
33
34
  has_rdoc: true
34
35
  homepage: http://github.com/dsturnbull/content_type
35
36
  licenses: []