mimetyper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +206 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +163 -0
- data/Rakefile +10 -0
- data/lib/mimetyper/mime_database.rb +422 -0
- data/lib/mimetyper.rb +143 -0
- data/mimetyper.gemspec +25 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 602ca434a098ef893ef34c2329c90dfe9919c8b585224065484bc3e2cb422c8d
|
4
|
+
data.tar.gz: ed6c204d3a4b91ed432a723be994690107a0e71288f9eca6bf46c9c390f46e39
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d2502c55d94e8b1656a1b955f7ac2f5f53aaa35dbdc095cdb768fc311912cf11ba7cf0c6b83ebf41193043691ffde2c2a4b585a66226157fe997e2bae309fb18
|
7
|
+
data.tar.gz: a75262f588369afe7882dd09ddb21f9eccd50c954b61198acf72a94ad580174632ef78a2448f07a12805ce582d06379cd9457a941973961176f64d127044612e
|
data/.gitignore
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
Gemfile.lock
|
13
|
+
|
14
|
+
## Documentation cache and generated files:
|
15
|
+
/.yardoc/
|
16
|
+
/_yardoc/
|
17
|
+
/doc/
|
18
|
+
/rdoc/
|
19
|
+
|
20
|
+
## Environment normalization:
|
21
|
+
/.bundle/
|
22
|
+
/vendor/bundle
|
23
|
+
/lib/bundler/man/
|
24
|
+
|
25
|
+
## RubyMine/IntelliJ
|
26
|
+
.idea/
|
27
|
+
*.iml
|
28
|
+
*.ipr
|
29
|
+
*.iws
|
30
|
+
|
31
|
+
## VSCode
|
32
|
+
.vscode/
|
33
|
+
*.code-workspace
|
34
|
+
|
35
|
+
## macOS
|
36
|
+
.DS_Store
|
37
|
+
.AppleDouble
|
38
|
+
.LSOverride
|
39
|
+
Icon
|
40
|
+
._*
|
41
|
+
.DocumentRevisions-V100
|
42
|
+
.fseventsd
|
43
|
+
.Spotlight-V100
|
44
|
+
.TemporaryItems
|
45
|
+
.Trashes
|
46
|
+
.VolumeIcon.icns
|
47
|
+
.com.apple.timemachine.donotpresent
|
48
|
+
.AppleDB
|
49
|
+
.AppleDesktop
|
50
|
+
Network Trash Folder
|
51
|
+
Temporary Items
|
52
|
+
.apdisk
|
53
|
+
|
54
|
+
## Windows
|
55
|
+
Thumbs.db
|
56
|
+
Thumbs.db:encryptable
|
57
|
+
ehthumbs.db
|
58
|
+
ehthumbs_vista.db
|
59
|
+
*.stackdump
|
60
|
+
[Dd]esktop.ini
|
61
|
+
$RECYCLE.BIN/
|
62
|
+
*.cab
|
63
|
+
*.msi
|
64
|
+
*.msix
|
65
|
+
*.msm
|
66
|
+
*.msp
|
67
|
+
*.lnk
|
68
|
+
|
69
|
+
## Linux
|
70
|
+
*~
|
71
|
+
.fuse_hidden*
|
72
|
+
.directory
|
73
|
+
.Trash-*
|
74
|
+
.nfs*
|
75
|
+
|
76
|
+
## Vim
|
77
|
+
[._]*.s[a-v][a-z]
|
78
|
+
[._]*.sw[a-p]
|
79
|
+
[._]s[a-rt-v][a-z]
|
80
|
+
[._]ss[a-gi-z]
|
81
|
+
[._]sw[a-p]
|
82
|
+
Session.vim
|
83
|
+
Sessionx.vim
|
84
|
+
.netrwhist
|
85
|
+
tags
|
86
|
+
[._]*.un~
|
87
|
+
|
88
|
+
## Emacs
|
89
|
+
\#*\#
|
90
|
+
/.emacs.desktop
|
91
|
+
/.emacs.desktop.lock
|
92
|
+
*.elc
|
93
|
+
auto-save-list
|
94
|
+
tramp
|
95
|
+
.\#*
|
96
|
+
.org-id-locations
|
97
|
+
*_archive
|
98
|
+
*_flymake.*
|
99
|
+
/eshell/history
|
100
|
+
/eshell/lastdir
|
101
|
+
/elpa/
|
102
|
+
*.rel
|
103
|
+
/auto/
|
104
|
+
.cask/
|
105
|
+
dist/
|
106
|
+
flycheck_*.el
|
107
|
+
.projectile
|
108
|
+
.dir-locals.el
|
109
|
+
/network-security.data
|
110
|
+
|
111
|
+
## Ruby specific
|
112
|
+
*.rbc
|
113
|
+
capybara-*.html
|
114
|
+
.rspec
|
115
|
+
/db/*.sqlite3
|
116
|
+
/db/*.sqlite3-journal
|
117
|
+
/db/*.sqlite3-[0-9]*
|
118
|
+
/public/system
|
119
|
+
/coverage/
|
120
|
+
/spec/tmp
|
121
|
+
*.orig
|
122
|
+
rerun.txt
|
123
|
+
pickle-email-*.html
|
124
|
+
.byebug_history
|
125
|
+
|
126
|
+
## Bundler
|
127
|
+
/.bundle/
|
128
|
+
/vendor/bundle
|
129
|
+
/lib/bundler/man/
|
130
|
+
.ruby-version
|
131
|
+
.ruby-gemset
|
132
|
+
.rvmrc
|
133
|
+
|
134
|
+
## Testing
|
135
|
+
/test/reports/
|
136
|
+
/test/tmp/
|
137
|
+
/test/version_tmp/
|
138
|
+
/spec/reports/
|
139
|
+
|
140
|
+
## Environment variables
|
141
|
+
.env
|
142
|
+
.env.local
|
143
|
+
.env.*.local
|
144
|
+
|
145
|
+
## Logs
|
146
|
+
*.log
|
147
|
+
/log/*
|
148
|
+
!/log/.keep
|
149
|
+
|
150
|
+
## Temporary files
|
151
|
+
/tmp/*
|
152
|
+
!/tmp/.keep
|
153
|
+
*.tmp
|
154
|
+
*.bak
|
155
|
+
*.swp
|
156
|
+
*~.nib
|
157
|
+
|
158
|
+
## Build artifacts
|
159
|
+
/build/
|
160
|
+
/dist/
|
161
|
+
/out/
|
162
|
+
|
163
|
+
## Gem build artifacts
|
164
|
+
*.gem
|
165
|
+
*.rbc
|
166
|
+
.bundle
|
167
|
+
.config
|
168
|
+
InstalledFiles
|
169
|
+
lib/bundler/man
|
170
|
+
pkg
|
171
|
+
rdoc
|
172
|
+
spec/reports
|
173
|
+
test/tmp
|
174
|
+
test/version_tmp
|
175
|
+
|
176
|
+
## Documentation
|
177
|
+
.yardoc
|
178
|
+
_yardoc
|
179
|
+
doc/
|
180
|
+
rdoc/
|
181
|
+
|
182
|
+
## Code coverage
|
183
|
+
coverage
|
184
|
+
.nyc_output
|
185
|
+
|
186
|
+
## Debugger
|
187
|
+
.byebug_history
|
188
|
+
.pry_history
|
189
|
+
|
190
|
+
## Node (in case of any JS tooling)
|
191
|
+
node_modules/
|
192
|
+
npm-debug.log*
|
193
|
+
yarn-debug.log*
|
194
|
+
yarn-error.log*
|
195
|
+
|
196
|
+
## CTags
|
197
|
+
tags
|
198
|
+
TAGS
|
199
|
+
|
200
|
+
## Other
|
201
|
+
.rake_tasks~
|
202
|
+
.sass-cache
|
203
|
+
.jekyll-cache/
|
204
|
+
.jekyll-metadata
|
205
|
+
|
206
|
+
DEPLOY.md
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
# MimeTyper
|
2
|
+
|
3
|
+
A pure Ruby MIME type detection library focused on accuracy and reliability. MimeTyper uses magic byte detection and file extensions to accurately identify file types without any external dependencies.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
- **Pure Ruby** - No external dependencies or C extensions
|
8
|
+
- **Comprehensive** - Supports 150+ file types including images, documents, audio, video, archives, and more
|
9
|
+
- **Accurate** - Uses magic byte detection for reliable type identification
|
10
|
+
- **Fast** - Optimized for performance with minimal file reading
|
11
|
+
- **Simple API** - Easy to use with just three methods
|
12
|
+
- **Reliable** - Extensive test coverage ensures accuracy
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
Add this line to your application's Gemfile:
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
gem 'mimetyper'
|
20
|
+
```
|
21
|
+
|
22
|
+
And then execute:
|
23
|
+
|
24
|
+
```bash
|
25
|
+
$ bundle install
|
26
|
+
```
|
27
|
+
|
28
|
+
Or install it yourself as:
|
29
|
+
|
30
|
+
```bash
|
31
|
+
$ gem install mimetyper
|
32
|
+
```
|
33
|
+
|
34
|
+
## Usage
|
35
|
+
|
36
|
+
MimeTyper provides three simple methods for MIME type detection:
|
37
|
+
|
38
|
+
### Detect from file path
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
require 'mimetyper'
|
42
|
+
|
43
|
+
# Detects MIME type using magic bytes first, falls back to extension
|
44
|
+
MimeTyper.from_file('document.pdf')
|
45
|
+
# => "application/pdf"
|
46
|
+
|
47
|
+
MimeTyper.from_file('photo.jpg')
|
48
|
+
# => "image/jpeg"
|
49
|
+
|
50
|
+
MimeTyper.from_file('unknown.bin')
|
51
|
+
# => "application/octet-stream"
|
52
|
+
```
|
53
|
+
|
54
|
+
### Detect from data
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
# Detect MIME type from binary data
|
58
|
+
data = File.read('image.png', mode: 'rb')
|
59
|
+
MimeTyper.from_data(data)
|
60
|
+
# => "image/png"
|
61
|
+
|
62
|
+
# Optionally provide a filename for fallback detection
|
63
|
+
unknown_data = "\x00\x01\x02"
|
64
|
+
MimeTyper.from_data(unknown_data, filename: 'file.txt')
|
65
|
+
# => "text/plain"
|
66
|
+
```
|
67
|
+
|
68
|
+
### Detect from extension only
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
# Direct extension lookup (less accurate than magic byte detection)
|
72
|
+
MimeTyper.from_extension('mp4')
|
73
|
+
# => "video/mp4"
|
74
|
+
|
75
|
+
MimeTyper.from_extension('.docx')
|
76
|
+
# => "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
77
|
+
```
|
78
|
+
|
79
|
+
## Supported Types
|
80
|
+
|
81
|
+
MimeTyper supports a comprehensive range of file types:
|
82
|
+
|
83
|
+
### Images
|
84
|
+
- JPEG, PNG, GIF, WebP, BMP, ICO, TIFF
|
85
|
+
- SVG, HEIC, AVIF, JP2
|
86
|
+
- PSD (Photoshop), XCF (GIMP)
|
87
|
+
|
88
|
+
### Documents
|
89
|
+
- PDF, RTF
|
90
|
+
- Microsoft Office: DOC, DOCX, XLS, XLSX, PPT, PPTX
|
91
|
+
- OpenDocument: ODT, ODS, ODP
|
92
|
+
|
93
|
+
### Audio
|
94
|
+
- MP3, WAV, FLAC, OGG, M4A
|
95
|
+
- MIDI, AIFF, WMA, AAC, OPUS
|
96
|
+
|
97
|
+
### Video
|
98
|
+
- MP4, AVI, MOV, WebM, MKV
|
99
|
+
- FLV, MPEG, 3GP, WMV
|
100
|
+
|
101
|
+
### Archives
|
102
|
+
- ZIP, RAR, 7Z, TAR
|
103
|
+
- GZIP, BZIP2, XZ
|
104
|
+
|
105
|
+
### Programming
|
106
|
+
- Source code files (Ruby, Python, JavaScript, Java, Go, etc.)
|
107
|
+
- JSON, XML, YAML, TOML
|
108
|
+
- HTML, CSS
|
109
|
+
|
110
|
+
### Fonts
|
111
|
+
- TTF, OTF, WOFF, WOFF2, EOT
|
112
|
+
|
113
|
+
### Executables
|
114
|
+
- EXE, DLL, ELF, Mach-O
|
115
|
+
- JAR, APK, DEX
|
116
|
+
|
117
|
+
### Databases
|
118
|
+
- SQLite
|
119
|
+
|
120
|
+
## How It Works
|
121
|
+
|
122
|
+
MimeTyper uses a two-tier detection approach:
|
123
|
+
|
124
|
+
1. **Magic Byte Detection**: Reads the first few bytes of a file to identify unique file signatures (magic bytes). This is the most reliable method.
|
125
|
+
|
126
|
+
2. **Extension Fallback**: If magic byte detection fails or is inconclusive, falls back to file extension mapping.
|
127
|
+
|
128
|
+
The library maintains a comprehensive database of:
|
129
|
+
- Magic byte signatures with specific offsets
|
130
|
+
- File extension mappings
|
131
|
+
- Special detection logic for complex formats (Office documents, media containers, etc.)
|
132
|
+
|
133
|
+
## Performance
|
134
|
+
|
135
|
+
MimeTyper is designed for performance:
|
136
|
+
- Reads only the first 4KB of files for detection
|
137
|
+
- Magic bytes are checked in order of popularity
|
138
|
+
- Efficient binary string matching
|
139
|
+
|
140
|
+
## Accuracy and Reliability
|
141
|
+
|
142
|
+
- Extensive test coverage with real-world file samples
|
143
|
+
- Handles edge cases like:
|
144
|
+
- Office Open XML format detection (DOCX vs ZIP)
|
145
|
+
- Different video container formats
|
146
|
+
- Text encoding detection
|
147
|
+
- Malformed or truncated files
|
148
|
+
|
149
|
+
## Default Behavior
|
150
|
+
|
151
|
+
When MimeTyper cannot determine a specific MIME type, it returns `"application/octet-stream"` as a safe default.
|
152
|
+
|
153
|
+
## Thread Safety
|
154
|
+
|
155
|
+
MimeTyper is thread-safe. All methods are stateless and can be called concurrently from multiple threads.
|
156
|
+
|
157
|
+
## Contributing
|
158
|
+
|
159
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/vancuren/mimetyper.
|
160
|
+
|
161
|
+
## License
|
162
|
+
|
163
|
+
The gem is available as open source under the terms of the MIT License.
|
data/Rakefile
ADDED
@@ -0,0 +1,422 @@
|
|
1
|
+
module MimeTyper
|
2
|
+
module MimeDatabase
|
3
|
+
MAGIC_BYTES = [
|
4
|
+
# Images
|
5
|
+
{ magic: "\xFF\xD8\xFF".b, offset: 0, mime: "image/jpeg", extensions: %w[jpg jpeg jpe jif jfif jfi] },
|
6
|
+
{ magic: "\x89PNG\r\n\x1A\n".b, offset: 0, mime: "image/png", extensions: %w[png] },
|
7
|
+
{ magic: "GIF87a".b, offset: 0, mime: "image/gif", extensions: %w[gif] },
|
8
|
+
{ magic: "GIF89a".b, offset: 0, mime: "image/gif", extensions: %w[gif] },
|
9
|
+
{ magic: "BM".b, offset: 0, mime: "image/bmp", extensions: %w[bmp dib] },
|
10
|
+
{ magic: "\x00\x00\x01\x00".b, offset: 0, mime: "image/vnd.microsoft.icon", extensions: %w[ico] },
|
11
|
+
{ magic: "\x00\x00\x02\x00".b, offset: 0, mime: "image/vnd.microsoft.icon", extensions: %w[ico cur] },
|
12
|
+
{ magic: "II\x2A\x00".b, offset: 0, mime: "image/tiff", extensions: %w[tif tiff] },
|
13
|
+
{ magic: "MM\x00\x2A".b, offset: 0, mime: "image/tiff", extensions: %w[tif tiff] },
|
14
|
+
{ magic: "RIFF".b, offset: 0, check: ->(data) { data[8,4] == "WEBP".b }, mime: "image/webp", extensions: %w[webp] },
|
15
|
+
{ magic: "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A".b, offset: 0, mime: "image/jp2", extensions: %w[jp2 j2k jpf jpm jpg2 j2c jpc jpx mj2] },
|
16
|
+
{ magic: "\x00\x00\x00\x20\x66\x74\x79\x70\x68\x65\x69\x63".b, offset: 0, mime: "image/heic", extensions: %w[heic] },
|
17
|
+
{ magic: "\x00\x00\x00\x20\x66\x74\x79\x70\x61\x76\x69\x66".b, offset: 0, mime: "image/avif", extensions: %w[avif] },
|
18
|
+
|
19
|
+
# Documents
|
20
|
+
{ magic: "%PDF".b, offset: 0, mime: "application/pdf", extensions: %w[pdf] },
|
21
|
+
{ magic: "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b, offset: 0, mime: "application/vnd.ms-office", extensions: %w[doc xls ppt msi msg] },
|
22
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/zip", extensions: %w[zip], check: ->(data) { !docx_check(data) && !xlsx_check(data) && !pptx_check(data) && !odt_check(data) } },
|
23
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", extensions: %w[docx], check: ->(data) { docx_check(data) } },
|
24
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", extensions: %w[xlsx], check: ->(data) { xlsx_check(data) } },
|
25
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/vnd.openxmlformats-officedocument.presentationml.presentation", extensions: %w[pptx], check: ->(data) { pptx_check(data) } },
|
26
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/vnd.oasis.opendocument.text", extensions: %w[odt], check: ->(data) { odt_check(data) } },
|
27
|
+
{ magic: "PK\x05\x06".b, offset: 0, mime: "application/zip", extensions: %w[zip] },
|
28
|
+
{ magic: "PK\x07\x08".b, offset: 0, mime: "application/zip", extensions: %w[zip] },
|
29
|
+
{ magic: "{\\rtf".b, offset: 0, mime: "application/rtf", extensions: %w[rtf] },
|
30
|
+
{ magic: "\x7B\x5C\x72\x74\x66\x31".b, offset: 0, mime: "application/rtf", extensions: %w[rtf] },
|
31
|
+
|
32
|
+
# Archives
|
33
|
+
{ magic: "Rar!\x1A\x07\x00".b, offset: 0, mime: "application/x-rar-compressed", extensions: %w[rar] },
|
34
|
+
{ magic: "Rar!\x1A\x07\x01\x00".b, offset: 0, mime: "application/x-rar-compressed", extensions: %w[rar] },
|
35
|
+
{ magic: "\x1F\x8B\x08".b, offset: 0, mime: "application/gzip", extensions: %w[gz gzip] },
|
36
|
+
{ magic: "BZh".b, offset: 0, mime: "application/x-bzip2", extensions: %w[bz2 boz] },
|
37
|
+
{ magic: "\x37\x7A\xBC\xAF\x27\x1C".b, offset: 0, mime: "application/x-7z-compressed", extensions: %w[7z] },
|
38
|
+
{ magic: "\xFD\x37\x7A\x58\x5A\x00".b, offset: 0, mime: "application/x-xz", extensions: %w[xz] },
|
39
|
+
{ magic: "ustar".b, offset: 257, mime: "application/x-tar", extensions: %w[tar] },
|
40
|
+
{ magic: "MSCF".b, offset: 0, mime: "application/vnd.ms-cab-compressed", extensions: %w[cab] },
|
41
|
+
|
42
|
+
# Audio
|
43
|
+
{ magic: "ID3".b, offset: 0, mime: "audio/mpeg", extensions: %w[mp3] },
|
44
|
+
{ magic: "\xFF\xFB".b, offset: 0, mime: "audio/mpeg", extensions: %w[mp3] },
|
45
|
+
{ magic: "\xFF\xF3".b, offset: 0, mime: "audio/mpeg", extensions: %w[mp3] },
|
46
|
+
{ magic: "\xFF\xF2".b, offset: 0, mime: "audio/mpeg", extensions: %w[mp3] },
|
47
|
+
{ magic: "OggS".b, offset: 0, mime: "audio/ogg", extensions: %w[ogg oga ogv] },
|
48
|
+
{ magic: "RIFF".b, offset: 0, check: ->(data) { data[8,4] == "WAVE".b }, mime: "audio/wav", extensions: %w[wav] },
|
49
|
+
{ magic: "fLaC".b, offset: 0, mime: "audio/flac", extensions: %w[flac] },
|
50
|
+
{ magic: "\x00\x00\x00\x20\x66\x74\x79\x70\x4D\x34\x41".b, offset: 0, mime: "audio/mp4", extensions: %w[m4a] },
|
51
|
+
{ magic: "MThd".b, offset: 0, mime: "audio/midi", extensions: %w[mid midi] },
|
52
|
+
|
53
|
+
# Video
|
54
|
+
{ magic: "\x00\x00\x00\x14\x66\x74\x79\x70\x69\x73\x6F\x6D".b, offset: 0, mime: "video/mp4", extensions: %w[mp4 m4v] },
|
55
|
+
{ magic: "\x00\x00\x00\x18\x66\x74\x79\x70\x6D\x70\x34\x32".b, offset: 0, mime: "video/mp4", extensions: %w[mp4 m4v] },
|
56
|
+
{ magic: "\x00\x00\x00\x1C\x66\x74\x79\x70\x4D\x53\x4E\x56".b, offset: 0, mime: "video/mp4", extensions: %w[mp4] },
|
57
|
+
{ magic: "\x00\x00\x00\x20\x66\x74\x79\x70\x69\x73\x6F\x6D".b, offset: 0, mime: "video/mp4", extensions: %w[mp4] },
|
58
|
+
{ magic: "\x00\x00\x00\x18\x66\x74\x79\x70\x33\x67\x70\x35".b, offset: 0, mime: "video/3gpp", extensions: %w[3gp 3gpp] },
|
59
|
+
{ magic: "\x00\x00\x00\x14\x66\x74\x79\x70\x71\x74\x20\x20".b, offset: 0, mime: "video/quicktime", extensions: %w[mov qt] },
|
60
|
+
{ magic: "\x00\x00\x00\x20\x66\x74\x79\x70\x71\x74\x20\x20".b, offset: 0, mime: "video/quicktime", extensions: %w[mov qt] },
|
61
|
+
{ magic: "RIFF".b, offset: 0, check: ->(data) { data[8,4] == "AVI ".b }, mime: "video/x-msvideo", extensions: %w[avi] },
|
62
|
+
{ magic: "\x1A\x45\xDF\xA3".b, offset: 0, mime: "video/webm", extensions: %w[webm mkv], check: ->(data) { webm_check(data) } },
|
63
|
+
{ magic: "FLV".b, offset: 0, mime: "video/x-flv", extensions: %w[flv] },
|
64
|
+
{ magic: "\x00\x00\x01\xBA".b, offset: 0, mime: "video/mpeg", extensions: %w[mpg mpeg] },
|
65
|
+
{ magic: "\x00\x00\x01\xB3".b, offset: 0, mime: "video/mpeg", extensions: %w[mpg mpeg] },
|
66
|
+
|
67
|
+
# Fonts
|
68
|
+
{ magic: "\x00\x01\x00\x00".b, offset: 0, mime: "font/ttf", extensions: %w[ttf] },
|
69
|
+
{ magic: "OTTO".b, offset: 0, mime: "font/otf", extensions: %w[otf] },
|
70
|
+
{ magic: "wOFF".b, offset: 0, mime: "font/woff", extensions: %w[woff] },
|
71
|
+
{ magic: "wOF2".b, offset: 0, mime: "font/woff2", extensions: %w[woff2] },
|
72
|
+
{ magic: "\x00\x00\x01\x00".b, offset: 0, check: ->(data) { data[4,2] == "\x00\x00".b }, mime: "application/vnd.ms-fontobject", extensions: %w[eot] },
|
73
|
+
|
74
|
+
# Executables
|
75
|
+
{ magic: "MZ".b, offset: 0, mime: "application/x-msdownload", extensions: %w[exe dll com bat msi] },
|
76
|
+
{ magic: "\x7FELF".b, offset: 0, mime: "application/x-executable", extensions: %w[elf axf bin o so] },
|
77
|
+
{ magic: "\xCA\xFE\xBA\xBE".b, offset: 0, mime: "application/java-vm", extensions: %w[class] },
|
78
|
+
{ magic: "\xCE\xFA\xED\xFE".b, offset: 0, mime: "application/x-mach-binary", extensions: %w[dylib] },
|
79
|
+
{ magic: "\xCF\xFA\xED\xFE".b, offset: 0, mime: "application/x-mach-binary", extensions: %w[dylib] },
|
80
|
+
{ magic: "\xFE\xED\xFA\xCE".b, offset: 0, mime: "application/x-mach-binary", extensions: %w[dylib] },
|
81
|
+
{ magic: "\xFE\xED\xFA\xCF".b, offset: 0, mime: "application/x-mach-binary", extensions: %w[dylib] },
|
82
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/java-archive", extensions: %w[jar], check: ->(data) { jar_check(data) } },
|
83
|
+
{ magic: "PK\x03\x04".b, offset: 0, mime: "application/vnd.android.package-archive", extensions: %w[apk], check: ->(data) { apk_check(data) } },
|
84
|
+
{ magic: "dex\n035\x00".b, offset: 0, mime: "application/x-android-dex", extensions: %w[dex] },
|
85
|
+
|
86
|
+
# Database
|
87
|
+
{ magic: "SQLite format 3\x00".b, offset: 0, mime: "application/x-sqlite3", extensions: %w[sqlite sqlite3 db db3 s3db sl3] },
|
88
|
+
|
89
|
+
# Text formats
|
90
|
+
{ magic: "<!DOCTYPE".b, offset: 0, mime: "text/html", extensions: %w[html htm], check: ->(data) { html_check(data) } },
|
91
|
+
{ magic: "<html".b, offset: 0, mime: "text/html", extensions: %w[html htm] },
|
92
|
+
{ magic: "<HTML".b, offset: 0, mime: "text/html", extensions: %w[html htm] },
|
93
|
+
{ magic: "<?xml".b, offset: 0, mime: "text/xml", extensions: %w[xml xsl xslt], check: ->(data) { !svg_check(data) } },
|
94
|
+
{ magic: "<svg".b, offset: 0, mime: "image/svg+xml", extensions: %w[svg svgz] },
|
95
|
+
{ magic: "<?xml".b, offset: 0, mime: "image/svg+xml", extensions: %w[svg svgz], check: ->(data) { svg_check(data) } },
|
96
|
+
{ magic: "{".b, offset: 0, mime: "application/json", extensions: %w[json], check: ->(data) { json_check(data) } },
|
97
|
+
{ magic: "[".b, offset: 0, mime: "application/json", extensions: %w[json], check: ->(data) { json_check(data) } },
|
98
|
+
{ magic: "%!PS".b, offset: 0, mime: "application/postscript", extensions: %w[ps eps ai] },
|
99
|
+
{ magic: "%!PS-Adobe-".b, offset: 0, check: ->(data) { data[11,3] == "EPS".b }, mime: "application/eps", extensions: %w[eps epsf epsi] },
|
100
|
+
|
101
|
+
# Other
|
102
|
+
{ magic: "LZIP".b, offset: 0, mime: "application/x-lzip", extensions: %w[lz] },
|
103
|
+
{ magic: "\x1F\x9D".b, offset: 0, mime: "application/x-compress", extensions: %w[Z] },
|
104
|
+
{ magic: "\x1F\xA0".b, offset: 0, mime: "application/x-compress", extensions: %w[Z] },
|
105
|
+
{ magic: "-----BEGIN".b, offset: 0, mime: "application/x-pem-file", extensions: %w[pem crt cer key] },
|
106
|
+
{ magic: "\x89HDF\r\n\x1A\n".b, offset: 0, mime: "application/x-hdf", extensions: %w[hdf hdf4 h4] },
|
107
|
+
{ magic: "CDF\x01".b, offset: 0, mime: "application/x-netcdf", extensions: %w[nc cdf] },
|
108
|
+
{ magic: "8BPS".b, offset: 0, mime: "image/vnd.adobe.photoshop", extensions: %w[psd psb] },
|
109
|
+
{ magic: "gimp xcf ".b, offset: 0, mime: "image/x-xcf", extensions: %w[xcf] },
|
110
|
+
{ magic: "II\xBC\x00".b, offset: 0, mime: "image/vnd.djvu", extensions: %w[djvu djv] },
|
111
|
+
{ magic: "AT&TFORM".b, offset: 0, mime: "image/vnd.djvu", extensions: %w[djvu djv] },
|
112
|
+
{ magic: "BLENDER".b, offset: 0, mime: "application/x-blender", extensions: %w[blend] },
|
113
|
+
{ magic: "!<arch>\n".b, offset: 0, mime: "application/x-archive", extensions: %w[a ar lib] },
|
114
|
+
{ magic: "OggS".b, offset: 0, check: ->(data) { data[28,8] == "\x01vorbis".b }, mime: "audio/ogg", extensions: %w[ogg oga] },
|
115
|
+
{ magic: "OggS".b, offset: 0, check: ->(data) { data[28,7] == "\x80theora".b }, mime: "video/ogg", extensions: %w[ogv ogg] },
|
116
|
+
{ magic: "ftyp".b, offset: 4, mime: "video/mp4", extensions: %w[mp4 m4v f4v] },
|
117
|
+
{ magic: "moov".b, offset: 4, mime: "video/quicktime", extensions: %w[mov qt] },
|
118
|
+
{ magic: "free".b, offset: 4, mime: "video/quicktime", extensions: %w[mov qt] },
|
119
|
+
{ magic: "mdat".b, offset: 4, mime: "video/quicktime", extensions: %w[mov qt] },
|
120
|
+
{ magic: "wide".b, offset: 4, mime: "video/quicktime", extensions: %w[mov qt] },
|
121
|
+
{ magic: "pnot".b, offset: 4, mime: "video/quicktime", extensions: %w[mov qt] },
|
122
|
+
{ magic: "skip".b, offset: 4, mime: "video/quicktime", extensions: %w[mov qt] },
|
123
|
+
].freeze
|
124
|
+
|
125
|
+
# Extension-only fallback mappings for when magic bytes don't match
|
126
|
+
EXTENSION_MAP = {
|
127
|
+
# Text
|
128
|
+
"txt" => "text/plain",
|
129
|
+
"text" => "text/plain",
|
130
|
+
"log" => "text/plain",
|
131
|
+
"csv" => "text/csv",
|
132
|
+
"tsv" => "text/tab-separated-values",
|
133
|
+
"html" => "text/html",
|
134
|
+
"htm" => "text/html",
|
135
|
+
"xhtml" => "application/xhtml+xml",
|
136
|
+
"xml" => "text/xml",
|
137
|
+
"css" => "text/css",
|
138
|
+
"js" => "application/javascript",
|
139
|
+
"mjs" => "application/javascript",
|
140
|
+
"json" => "application/json",
|
141
|
+
"jsonld" => "application/ld+json",
|
142
|
+
"yaml" => "text/yaml",
|
143
|
+
"yml" => "text/yaml",
|
144
|
+
"toml" => "application/toml",
|
145
|
+
"ini" => "text/plain",
|
146
|
+
"cfg" => "text/plain",
|
147
|
+
"conf" => "text/plain",
|
148
|
+
"properties" => "text/plain",
|
149
|
+
|
150
|
+
# Programming languages
|
151
|
+
"c" => "text/x-c",
|
152
|
+
"cc" => "text/x-c++",
|
153
|
+
"cpp" => "text/x-c++",
|
154
|
+
"cxx" => "text/x-c++",
|
155
|
+
"h" => "text/x-c",
|
156
|
+
"hpp" => "text/x-c++",
|
157
|
+
"cs" => "text/x-csharp",
|
158
|
+
"java" => "text/x-java",
|
159
|
+
"py" => "text/x-python",
|
160
|
+
"rb" => "text/x-ruby",
|
161
|
+
"go" => "text/x-go",
|
162
|
+
"rs" => "text/x-rust",
|
163
|
+
"swift" => "text/x-swift",
|
164
|
+
"kt" => "text/x-kotlin",
|
165
|
+
"scala" => "text/x-scala",
|
166
|
+
"php" => "text/x-php",
|
167
|
+
"pl" => "text/x-perl",
|
168
|
+
"lua" => "text/x-lua",
|
169
|
+
"r" => "text/x-r",
|
170
|
+
"m" => "text/x-objc",
|
171
|
+
"mm" => "text/x-objc++",
|
172
|
+
"sql" => "application/sql",
|
173
|
+
"sh" => "application/x-sh",
|
174
|
+
"bash" => "application/x-sh",
|
175
|
+
"zsh" => "application/x-sh",
|
176
|
+
"fish" => "application/x-sh",
|
177
|
+
"ps1" => "application/x-powershell",
|
178
|
+
"bat" => "application/x-bat",
|
179
|
+
"cmd" => "application/x-bat",
|
180
|
+
|
181
|
+
# Markup
|
182
|
+
"md" => "text/markdown",
|
183
|
+
"markdown" => "text/markdown",
|
184
|
+
"rst" => "text/x-rst",
|
185
|
+
"adoc" => "text/asciidoc",
|
186
|
+
"latex" => "text/x-latex",
|
187
|
+
|
188
|
+
# Data
|
189
|
+
"ics" => "text/calendar",
|
190
|
+
"vcf" => "text/vcard",
|
191
|
+
|
192
|
+
# Images (fallback)
|
193
|
+
"svg" => "image/svg+xml",
|
194
|
+
"svgz" => "image/svg+xml",
|
195
|
+
"ico" => "image/vnd.microsoft.icon",
|
196
|
+
"webp" => "image/webp",
|
197
|
+
"bmp" => "image/bmp",
|
198
|
+
"jpg" => "image/jpeg",
|
199
|
+
"jpeg" => "image/jpeg",
|
200
|
+
"jpe" => "image/jpeg",
|
201
|
+
"png" => "image/png",
|
202
|
+
"gif" => "image/gif",
|
203
|
+
"tif" => "image/tiff",
|
204
|
+
"tiff" => "image/tiff",
|
205
|
+
"psd" => "image/vnd.adobe.photoshop",
|
206
|
+
"xcf" => "image/x-xcf",
|
207
|
+
"heic" => "image/heic",
|
208
|
+
"heif" => "image/heif",
|
209
|
+
"avif" => "image/avif",
|
210
|
+
"jxl" => "image/jxl",
|
211
|
+
|
212
|
+
# Audio (fallback)
|
213
|
+
"mp3" => "audio/mpeg",
|
214
|
+
"m4a" => "audio/mp4",
|
215
|
+
"wav" => "audio/wav",
|
216
|
+
"flac" => "audio/flac",
|
217
|
+
"ogg" => "audio/ogg",
|
218
|
+
"oga" => "audio/ogg",
|
219
|
+
"opus" => "audio/opus",
|
220
|
+
"wma" => "audio/x-ms-wma",
|
221
|
+
"aac" => "audio/aac",
|
222
|
+
"mid" => "audio/midi",
|
223
|
+
"midi" => "audio/midi",
|
224
|
+
"aiff" => "audio/aiff",
|
225
|
+
"aif" => "audio/aiff",
|
226
|
+
"aifc" => "audio/aiff",
|
227
|
+
"ape" => "audio/x-ape",
|
228
|
+
"wv" => "audio/wavpack",
|
229
|
+
"amr" => "audio/amr",
|
230
|
+
|
231
|
+
# Video (fallback)
|
232
|
+
"mp4" => "video/mp4",
|
233
|
+
"m4v" => "video/mp4",
|
234
|
+
"mkv" => "video/x-matroska",
|
235
|
+
"webm" => "video/webm",
|
236
|
+
"mov" => "video/quicktime",
|
237
|
+
"qt" => "video/quicktime",
|
238
|
+
"avi" => "video/x-msvideo",
|
239
|
+
"wmv" => "video/x-ms-wmv",
|
240
|
+
"flv" => "video/x-flv",
|
241
|
+
"mpg" => "video/mpeg",
|
242
|
+
"mpeg" => "video/mpeg",
|
243
|
+
"m2v" => "video/mpeg",
|
244
|
+
"3gp" => "video/3gpp",
|
245
|
+
"3g2" => "video/3gpp2",
|
246
|
+
"f4v" => "video/mp4",
|
247
|
+
"asf" => "video/x-ms-asf",
|
248
|
+
"rm" => "application/vnd.rn-realmedia",
|
249
|
+
"rmvb" => "application/vnd.rn-realmedia-vbr",
|
250
|
+
"vob" => "video/dvd",
|
251
|
+
|
252
|
+
# Documents (fallback)
|
253
|
+
"pdf" => "application/pdf",
|
254
|
+
"doc" => "application/vnd.ms-office",
|
255
|
+
"docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
256
|
+
"xls" => "application/vnd.ms-office",
|
257
|
+
"xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
258
|
+
"ppt" => "application/vnd.ms-powerpoint",
|
259
|
+
"pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
260
|
+
"odt" => "application/vnd.oasis.opendocument.text",
|
261
|
+
"ods" => "application/vnd.oasis.opendocument.spreadsheet",
|
262
|
+
"odp" => "application/vnd.oasis.opendocument.presentation",
|
263
|
+
"rtf" => "application/rtf",
|
264
|
+
"tex" => "application/x-tex",
|
265
|
+
|
266
|
+
# Archives (fallback)
|
267
|
+
"zip" => "application/zip",
|
268
|
+
"rar" => "application/x-rar-compressed",
|
269
|
+
"7z" => "application/x-7z-compressed",
|
270
|
+
"tar" => "application/x-tar",
|
271
|
+
"gz" => "application/gzip",
|
272
|
+
"bz2" => "application/x-bzip2",
|
273
|
+
"xz" => "application/x-xz",
|
274
|
+
"z" => "application/x-compress",
|
275
|
+
"lz" => "application/x-lzip",
|
276
|
+
"lzma" => "application/x-lzma",
|
277
|
+
"lzo" => "application/x-lzop",
|
278
|
+
"rz" => "application/x-rzip",
|
279
|
+
"sz" => "application/x-snappy",
|
280
|
+
"deb" => "application/vnd.debian.binary-package",
|
281
|
+
"rpm" => "application/x-rpm",
|
282
|
+
"jar" => "application/java-archive",
|
283
|
+
"war" => "application/java-archive",
|
284
|
+
"ear" => "application/java-archive",
|
285
|
+
"apk" => "application/vnd.android.package-archive",
|
286
|
+
"dmg" => "application/x-apple-diskimage",
|
287
|
+
"iso" => "application/x-iso9660-image",
|
288
|
+
"img" => "application/octet-stream",
|
289
|
+
|
290
|
+
# Fonts (fallback)
|
291
|
+
"ttf" => "font/ttf",
|
292
|
+
"otf" => "font/otf",
|
293
|
+
"woff" => "font/woff",
|
294
|
+
"woff2" => "font/woff2",
|
295
|
+
"eot" => "application/vnd.ms-fontobject",
|
296
|
+
"ttc" => "font/collection",
|
297
|
+
"pfb" => "font/type1",
|
298
|
+
"pfm" => "font/type1",
|
299
|
+
|
300
|
+
# Executables (fallback)
|
301
|
+
"exe" => "application/x-msdownload",
|
302
|
+
"dll" => "application/x-msdownload",
|
303
|
+
"so" => "application/x-sharedlib",
|
304
|
+
"dylib" => "application/x-mach-binary",
|
305
|
+
"class" => "application/java-vm",
|
306
|
+
"dex" => "application/x-android-dex",
|
307
|
+
|
308
|
+
# Other
|
309
|
+
"swf" => "application/x-shockwave-flash",
|
310
|
+
"xap" => "application/x-silverlight-app",
|
311
|
+
"ipa" => "application/x-ios-app",
|
312
|
+
"crx" => "application/x-chrome-extension",
|
313
|
+
"xpi" => "application/x-xpinstall",
|
314
|
+
"epub" => "application/epub+zip",
|
315
|
+
"mobi" => "application/x-mobipocket-ebook",
|
316
|
+
"azw" => "application/vnd.amazon.ebook",
|
317
|
+
"azw3" => "application/vnd.amazon.ebook",
|
318
|
+
}.freeze
|
319
|
+
|
320
|
+
module_function
|
321
|
+
|
322
|
+
def docx_check(data)
|
323
|
+
return false if data.size < 30
|
324
|
+
# Check for common DOCX internal file signatures
|
325
|
+
data.include?("word/".b) || data.include?("Word.Document".b) ||
|
326
|
+
data.include?("[Content_Types].xml".b) && (data.include?("word/document.xml".b) || data.include?("word/".b))
|
327
|
+
end
|
328
|
+
|
329
|
+
def xlsx_check(data)
|
330
|
+
return false if data.size < 30
|
331
|
+
# Check for common XLSX internal file signatures
|
332
|
+
data.include?("xl/".b) || data.include?("Excel".b) ||
|
333
|
+
data.include?("[Content_Types].xml".b) && (data.include?("xl/workbook.xml".b) || data.include?("xl/".b))
|
334
|
+
end
|
335
|
+
|
336
|
+
def pptx_check(data)
|
337
|
+
return false if data.size < 30
|
338
|
+
# Check for common PPTX internal file signatures
|
339
|
+
data.include?("ppt/".b) || data.include?("PowerPoint".b) ||
|
340
|
+
data.include?("[Content_Types].xml".b) && (data.include?("ppt/presentation.xml".b) || data.include?("ppt/".b))
|
341
|
+
end
|
342
|
+
|
343
|
+
def odt_check(data)
|
344
|
+
return false if data.size < 100
|
345
|
+
data.include?("manifest.rdf".b) && data.include?("content.xml".b)
|
346
|
+
end
|
347
|
+
|
348
|
+
def jar_check(data)
|
349
|
+
return false if data.size < 100
|
350
|
+
data.include?("META-INF/MANIFEST.MF".b)
|
351
|
+
end
|
352
|
+
|
353
|
+
def apk_check(data)
|
354
|
+
return false if data.size < 100
|
355
|
+
data.include?("AndroidManifest.xml".b) || data.include?("classes.dex".b)
|
356
|
+
end
|
357
|
+
|
358
|
+
def webm_check(data)
|
359
|
+
return false if data.size < 20
|
360
|
+
# Look for WebM signature in the EBML header
|
361
|
+
# WebM files have EBML header followed by doctype "webm"
|
362
|
+
# The "webm" string can appear anywhere in the first 100 bytes
|
363
|
+
search_area = data[0..[100, data.size].min]
|
364
|
+
return true if search_area.include?("webm".b)
|
365
|
+
return true if search_area.include?("matroska".b) # Matroska is also valid for MKV
|
366
|
+
false
|
367
|
+
end
|
368
|
+
|
369
|
+
def svg_check(data)
|
370
|
+
return false if data.size < 20
|
371
|
+
content = data[0..[500, data.size].min]
|
372
|
+
# Convert to string for case-insensitive check
|
373
|
+
content_str = content.force_encoding('UTF-8').downcase rescue content.downcase
|
374
|
+
content_str.include?("<svg") || content_str.include?("xmlns=\"http://www.w3.org/2000/svg\"")
|
375
|
+
end
|
376
|
+
|
377
|
+
def html_check(data)
|
378
|
+
return false if data.size < 10
|
379
|
+
content = data[0..[500, data.size].min]
|
380
|
+
# Convert to string for case-insensitive check
|
381
|
+
content_str = content.force_encoding('UTF-8').downcase rescue content.downcase
|
382
|
+
content_str.include?("<html") || content_str.include?("<!doctype html")
|
383
|
+
end
|
384
|
+
|
385
|
+
def json_check(data)
|
386
|
+
return false if data.empty?
|
387
|
+
content = data.strip
|
388
|
+
return false unless (content[0] == '{' && content[-1] == '}') || (content[0] == '[' && content[-1] == ']')
|
389
|
+
|
390
|
+
# Simple JSON validation - check for basic structure
|
391
|
+
depth = 0
|
392
|
+
in_string = false
|
393
|
+
escaped = false
|
394
|
+
|
395
|
+
content.each_char do |char|
|
396
|
+
if in_string
|
397
|
+
if escaped
|
398
|
+
escaped = false
|
399
|
+
elsif char == '\\'
|
400
|
+
escaped = true
|
401
|
+
elsif char == '"'
|
402
|
+
in_string = false
|
403
|
+
end
|
404
|
+
else
|
405
|
+
case char
|
406
|
+
when '"'
|
407
|
+
in_string = true
|
408
|
+
when '{', '['
|
409
|
+
depth += 1
|
410
|
+
when '}', ']'
|
411
|
+
depth -= 1
|
412
|
+
return false if depth < 0
|
413
|
+
end
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
depth == 0
|
418
|
+
rescue
|
419
|
+
false
|
420
|
+
end
|
421
|
+
end
|
422
|
+
end
|
data/lib/mimetyper.rb
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
require_relative "mimetyper/mime_database"
|
2
|
+
|
3
|
+
module MimeTyper
|
4
|
+
class << self
|
5
|
+
def from_file(path)
|
6
|
+
raise ArgumentError, "File path cannot be nil" if path.nil?
|
7
|
+
raise ArgumentError, "File does not exist: #{path}" unless File.exist?(path)
|
8
|
+
|
9
|
+
# Read file data for magic byte detection
|
10
|
+
data = File.open(path, "rb") { |f| f.read(4096) } || ""
|
11
|
+
|
12
|
+
# Try magic byte detection first
|
13
|
+
mime_type = detect_by_magic(data)
|
14
|
+
|
15
|
+
# If we only detected generic text/plain, try extension for more specific type
|
16
|
+
if mime_type == "text/plain" && path.include?('.')
|
17
|
+
extension = File.extname(path).downcase[1..-1]
|
18
|
+
extension_mime = detect_by_extension(extension) if extension
|
19
|
+
# Use extension-based type if it's more specific
|
20
|
+
mime_type = extension_mime if extension_mime && extension_mime != "text/plain"
|
21
|
+
elsif mime_type.nil? && path.include?('.')
|
22
|
+
# Fall back to extension if magic detection fails completely
|
23
|
+
extension = File.extname(path).downcase[1..-1]
|
24
|
+
mime_type = detect_by_extension(extension) if extension
|
25
|
+
end
|
26
|
+
|
27
|
+
mime_type || "application/octet-stream"
|
28
|
+
end
|
29
|
+
|
30
|
+
def from_data(data, filename: nil)
|
31
|
+
raise ArgumentError, "Data cannot be nil" if data.nil?
|
32
|
+
|
33
|
+
data = data.b if data.is_a?(String) # Ensure binary encoding
|
34
|
+
|
35
|
+
# Try magic byte detection first
|
36
|
+
mime_type = detect_by_magic(data)
|
37
|
+
|
38
|
+
# Fall back to filename extension if provided
|
39
|
+
if mime_type.nil? && filename && filename.include?('.')
|
40
|
+
extension = File.extname(filename).downcase[1..-1]
|
41
|
+
mime_type = detect_by_extension(extension) if extension
|
42
|
+
end
|
43
|
+
|
44
|
+
mime_type || "application/octet-stream"
|
45
|
+
end
|
46
|
+
|
47
|
+
def from_extension(extension)
|
48
|
+
raise ArgumentError, "Extension cannot be nil" if extension.nil?
|
49
|
+
|
50
|
+
extension = extension.downcase
|
51
|
+
extension = extension[1..-1] if extension.start_with?('.')
|
52
|
+
|
53
|
+
detect_by_extension(extension) || "application/octet-stream"
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def detect_by_magic(data)
|
59
|
+
return nil if data.nil? || data.empty?
|
60
|
+
|
61
|
+
MimeDatabase::MAGIC_BYTES.each do |entry|
|
62
|
+
next if entry[:offset] >= data.size
|
63
|
+
|
64
|
+
magic = entry[:magic]
|
65
|
+
offset = entry[:offset]
|
66
|
+
|
67
|
+
# Check if the magic bytes match at the specified offset
|
68
|
+
if data[offset, magic.size] == magic
|
69
|
+
# If there's an additional check function, use it
|
70
|
+
if entry[:check]
|
71
|
+
next unless entry[:check].call(data)
|
72
|
+
end
|
73
|
+
|
74
|
+
return entry[:mime]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Special case for text files - check if it's valid UTF-8
|
79
|
+
if data.size > 0 && is_text?(data)
|
80
|
+
return "text/plain"
|
81
|
+
end
|
82
|
+
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
|
86
|
+
def detect_by_extension(extension)
|
87
|
+
return nil if extension.nil? || extension.empty?
|
88
|
+
|
89
|
+
# First check in magic bytes extensions
|
90
|
+
MimeDatabase::MAGIC_BYTES.each do |entry|
|
91
|
+
if entry[:extensions] && entry[:extensions].include?(extension)
|
92
|
+
return entry[:mime]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Then check in extension map
|
97
|
+
MimeDatabase::EXTENSION_MAP[extension]
|
98
|
+
end
|
99
|
+
|
100
|
+
def is_text?(data)
|
101
|
+
# Check if data appears to be text
|
102
|
+
# Look for common text patterns and absence of binary markers
|
103
|
+
return false if data.nil? || data.empty?
|
104
|
+
|
105
|
+
# Don't detect single character as text
|
106
|
+
return false if data.size == 1
|
107
|
+
|
108
|
+
# Sample the first 512 bytes
|
109
|
+
sample = data[0, [512, data.size].min]
|
110
|
+
|
111
|
+
# Count control characters (excluding tab, newline, carriage return)
|
112
|
+
control_chars = 0
|
113
|
+
printable_chars = 0
|
114
|
+
null_bytes = 0
|
115
|
+
|
116
|
+
sample.each_byte do |byte|
|
117
|
+
if byte == 0
|
118
|
+
null_bytes += 1
|
119
|
+
elsif byte == 9 || byte == 10 || byte == 13 || (byte >= 32 && byte < 127)
|
120
|
+
printable_chars += 1
|
121
|
+
elsif byte < 32 || byte == 127
|
122
|
+
control_chars += 1
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# If there are null bytes, it's likely binary
|
127
|
+
return false if null_bytes > 0
|
128
|
+
|
129
|
+
# If more than 30% are control characters, it's likely binary
|
130
|
+
return false if control_chars > sample.size * 0.3
|
131
|
+
|
132
|
+
# If more than 80% are printable ASCII, it's likely text
|
133
|
+
return true if printable_chars > sample.size * 0.8
|
134
|
+
|
135
|
+
# Try to validate as UTF-8
|
136
|
+
begin
|
137
|
+
sample.force_encoding('UTF-8').valid_encoding?
|
138
|
+
rescue
|
139
|
+
false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
data/mimetyper.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "mimetyper"
|
3
|
+
spec.version = "1.0.0"
|
4
|
+
spec.authors = ["Russell Van Curen"]
|
5
|
+
spec.email = ["russell@vancuren.net"]
|
6
|
+
|
7
|
+
spec.summary = %q{A pure Ruby MIME type detection library focused on accuracy and reliability}
|
8
|
+
spec.description = %q{MimeTyper is a comprehensive, pure Ruby MIME type detection library that uses magic bytes and file extensions to accurately identify file types. Built for reliability and simplicity.}
|
9
|
+
spec.homepage = "https://github.com/vancuren/mimetyper"
|
10
|
+
spec.license = "MIT"
|
11
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
12
|
+
|
13
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
14
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
15
|
+
|
16
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
17
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
end
|
19
|
+
spec.bindir = "exe"
|
20
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
+
spec.require_paths = ["lib"]
|
22
|
+
|
23
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
24
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mimetyper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Russell Van Curen
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-09-25 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '13.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '13.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
41
|
+
description: MimeTyper is a comprehensive, pure Ruby MIME type detection library that
|
42
|
+
uses magic bytes and file extensions to accurately identify file types. Built for
|
43
|
+
reliability and simplicity.
|
44
|
+
email:
|
45
|
+
- russell@vancuren.net
|
46
|
+
executables: []
|
47
|
+
extensions: []
|
48
|
+
extra_rdoc_files: []
|
49
|
+
files:
|
50
|
+
- ".gitignore"
|
51
|
+
- Gemfile
|
52
|
+
- LICENSE.txt
|
53
|
+
- README.md
|
54
|
+
- Rakefile
|
55
|
+
- lib/mimetyper.rb
|
56
|
+
- lib/mimetyper/mime_database.rb
|
57
|
+
- mimetyper.gemspec
|
58
|
+
homepage: https://github.com/vancuren/mimetyper
|
59
|
+
licenses:
|
60
|
+
- MIT
|
61
|
+
metadata:
|
62
|
+
homepage_uri: https://github.com/vancuren/mimetyper
|
63
|
+
source_code_uri: https://github.com/vancuren/mimetyper
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 2.5.0
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubygems_version: 3.5.16
|
80
|
+
signing_key:
|
81
|
+
specification_version: 4
|
82
|
+
summary: A pure Ruby MIME type detection library focused on accuracy and reliability
|
83
|
+
test_files: []
|