best_type 0.0.2 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +86 -12
- data/config/internal_config_options.yml +6 -0
- data/lib/best_type.rb +5 -0
- data/lib/best_type/config.rb +9 -3
- data/lib/best_type/dc_type_lookup.rb +36 -8
- data/lib/best_type/pcdm_type_lookup.rb +113 -0
- data/lib/best_type/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fbde6b38db251ef80d23a7b7711e31adef831a93fbff03bbfcee54609eeb1c9d
|
4
|
+
data.tar.gz: 95e9cfecc1d063eb21755650a1e77307b86e8ff5a98c161e92854cdf236f2e7d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75cb28f10e1e533a4928b03a825bf00ebb98a6a20d4e66b657c800e0e46006952c93bc91a5a044855abe23c7d451ae6cd1a4a22391f992e07dcb2bc4e713e15f
|
7
|
+
data.tar.gz: 5a12871eda5fdd29f44b277286716810a39ef95a142d5b5328c6976011b89ed679d39923980d34e996318df3dfa8224dda249a0049c13098af3f534a9b65b0bb
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# best_type
|
2
2
|
|
3
|
-
A pure-ruby library for selecting the best MIME type for a file name
|
3
|
+
A pure-ruby library for selecting the best MIME type for a file name or DC type (http://dublincore.org/2012/06/14/dctype) for a file name / MIME type.
|
4
4
|
|
5
5
|
### Installation
|
6
6
|
|
@@ -8,33 +8,107 @@ A pure-ruby library for selecting the best MIME type for a file name, or DC type
|
|
8
8
|
gem install best_type
|
9
9
|
```
|
10
10
|
|
11
|
-
###
|
11
|
+
### Usage
|
12
12
|
|
13
13
|
```ruby
|
14
|
+
# require the gem
|
14
15
|
require 'best_type'
|
15
16
|
|
16
|
-
|
17
|
+
# detect mime type for file names
|
18
|
+
BestType.mime_type.for_file_name('myfile.jpg') # 'image/jpeg'
|
19
|
+
|
20
|
+
# detect mime type for file names (including full file path)
|
21
|
+
BestType.mime_type.for_file_name('/path/to/some/file.jpg') # 'image/jpeg'
|
22
|
+
|
23
|
+
# detect dc type for file names
|
24
|
+
BestType.dc_type.for_file_name('myfile.jpg') # 'StillImage'
|
25
|
+
|
26
|
+
# detect dc type for file names (including full file path)
|
27
|
+
BestType.dc_type.for_file_name('/path/to/some/file.jpg') # 'StillImage'
|
28
|
+
|
29
|
+
# detect dc type for mime types
|
30
|
+
BestType.dc_type.for_mime_type('image/jpeg') # 'StillImage'
|
31
|
+
|
32
|
+
# detect dc type for file names (including full file path)
|
33
|
+
BestType.pcdm_type.for_file_name('/path/to/some/file.jpg') # 'Image'
|
34
|
+
|
35
|
+
# detect dc type for mime types
|
36
|
+
BestType.pcdm_type.for_mime_type('image/jpeg') # 'Image'
|
37
|
+
```
|
38
|
+
|
39
|
+
### Add Custom Overrides
|
40
|
+
```ruby
|
41
|
+
BestType.configure({
|
42
|
+
extension_to_mime_type_overrides:
|
43
|
+
'custom': 'custom/type'
|
44
|
+
mime_type_to_dc_type_overrides:
|
45
|
+
'custom/type': 'CustomDC'
|
46
|
+
mime_type_to_pcdm_type_overrides:
|
47
|
+
'custom/type': 'CustomPCDM'
|
48
|
+
})
|
49
|
+
|
50
|
+
BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
|
51
|
+
BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
|
52
|
+
BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
|
53
|
+
|
17
54
|
```
|
18
55
|
|
19
|
-
### Rails
|
56
|
+
### Recommended Setup For Rails
|
20
57
|
|
21
|
-
Gemfile:
|
58
|
+
Add best_type to your Gemfile:
|
22
59
|
```ruby
|
23
60
|
gem 'best_type'
|
24
61
|
```
|
25
62
|
|
26
|
-
|
63
|
+
And then call it from anywhere!
|
64
|
+
|
65
|
+
If you want to set custom overrides, the best place to do so is in a Rails initializer:
|
27
66
|
```ruby
|
28
|
-
#
|
67
|
+
# config/initializers/best_type.rb
|
68
|
+
|
29
69
|
BestType.configure({
|
30
70
|
extension_to_mime_type_overrides:
|
31
|
-
'
|
71
|
+
'custom': 'custom/type'
|
32
72
|
mime_type_to_dc_type_overrides:
|
33
|
-
'
|
73
|
+
'custom/type': 'Custom'
|
74
|
+
mime_type_to_pcdm_type_overrides:
|
75
|
+
'custom/type': 'Custom'
|
34
76
|
})
|
77
|
+
```
|
78
|
+
|
79
|
+
You may also want to consider using a YAML file for configuration:
|
80
|
+
```ruby
|
81
|
+
# config/initializers/best_type.rb
|
35
82
|
|
36
|
-
|
37
|
-
|
83
|
+
BestType.configure(YAML.load_file(File.join(Rails.root, 'config/best_type.yml'))[Rails.env])
|
84
|
+
```
|
85
|
+
|
86
|
+
```yaml
|
87
|
+
# config/initializers/best_type.rb
|
88
|
+
|
89
|
+
development:
|
90
|
+
extension_to_mime_type_overrides:
|
91
|
+
'good': 'good/type'
|
92
|
+
mime_type_to_dc_type_overrides:
|
93
|
+
'good/type': 'Good'
|
94
|
+
mime_type_to_pcdm_type_overrides:
|
95
|
+
'good/type': 'Goodly'
|
96
|
+
|
97
|
+
test:
|
98
|
+
extension_to_mime_type_overrides:
|
99
|
+
'better': 'better/type'
|
100
|
+
mime_type_to_dc_type_overrides:
|
101
|
+
'better/type': 'Better'
|
102
|
+
mime_type_to_pcdm_type_overrides:
|
103
|
+
'best/type': 'Betterly'
|
104
|
+
|
105
|
+
production:
|
106
|
+
extension_to_mime_type_overrides:
|
107
|
+
'best': 'best/type'
|
108
|
+
mime_type_to_dc_type_overrides:
|
109
|
+
'best/type': 'Best'
|
110
|
+
mime_type_to_pcdm_type_overrides:
|
111
|
+
'best/type': 'Bestly'
|
38
112
|
```
|
39
113
|
|
40
114
|
### Running Tests (for developers):
|
@@ -49,4 +123,4 @@ bundle exec rake best_type:ci
|
|
49
123
|
|
50
124
|
```sh
|
51
125
|
bundle exec rake release
|
52
|
-
```
|
126
|
+
```
|
@@ -1,6 +1,12 @@
|
|
1
1
|
extension_to_mime_type_overrides:
|
2
2
|
'test': 'test/type'
|
3
3
|
'mp4': 'video/mp4'
|
4
|
+
'vtt': 'text/vtt'
|
5
|
+
'm4v': 'video/x-m4v'
|
4
6
|
mime_type_to_dc_type_overrides:
|
5
7
|
'test/type': 'Test'
|
6
8
|
'application/mxf': 'MovingImage'
|
9
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
|
10
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
|
11
|
+
mime_type_to_pcdm_type_overrides:
|
12
|
+
'test/type': 'Test'
|
data/lib/best_type.rb
CHANGED
@@ -2,6 +2,7 @@ require 'best_type/version'
|
|
2
2
|
require 'best_type/config'
|
3
3
|
require 'best_type/mime_type_lookup'
|
4
4
|
require 'best_type/dc_type_lookup'
|
5
|
+
require 'best_type/pcdm_type_lookup'
|
5
6
|
require 'yaml'
|
6
7
|
|
7
8
|
module BestType
|
@@ -15,6 +16,10 @@ module BestType
|
|
15
16
|
@dc_type ||= BestType::DcTypeLookup.new(mime_type)
|
16
17
|
end
|
17
18
|
|
19
|
+
def self.pcdm_type
|
20
|
+
@pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
|
21
|
+
end
|
22
|
+
|
18
23
|
def self.config(reload = false, user_config_options = {})
|
19
24
|
if @config.nil? || reload
|
20
25
|
@semaphore.synchronize do
|
data/lib/best_type/config.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module BestType
|
2
2
|
class Config
|
3
3
|
|
4
|
-
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides
|
4
|
+
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
|
5
5
|
|
6
6
|
def initialize(user_config_options = {})
|
7
7
|
# Get defaults from internal_custom_mapping.yml in gem
|
@@ -9,12 +9,14 @@ module BestType
|
|
9
9
|
internal_config_file_path = File.join(gem_dir, 'config/internal_config_options.yml')
|
10
10
|
internal_config_options = YAML.load_file(internal_config_file_path)
|
11
11
|
|
12
|
-
@extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides']
|
13
|
-
@mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides']
|
12
|
+
@extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides'] || {}
|
13
|
+
@mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides'] || {}
|
14
|
+
@mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
|
14
15
|
|
15
16
|
stringify_user_config_options_keys!(user_config_options)
|
16
17
|
add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides']) if user_config_options.key?('extension_to_mime_type_overrides')
|
17
18
|
add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides']) if user_config_options.key?('mime_type_to_dc_type_overrides')
|
19
|
+
add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides']) if user_config_options.key?('mime_type_to_pcdm_type_overrides')
|
18
20
|
end
|
19
21
|
|
20
22
|
private
|
@@ -27,6 +29,10 @@ module BestType
|
|
27
29
|
@mime_type_to_dc_type_overrides.merge!(overrides)
|
28
30
|
end
|
29
31
|
|
32
|
+
def add_mime_type_to_pcdm_type_overrides(overrides)
|
33
|
+
@mime_type_to_pcdm_type_overrides.merge!(overrides)
|
34
|
+
end
|
35
|
+
|
30
36
|
def stringify_user_config_options_keys!(user_config_options)
|
31
37
|
user_config_options_keys = user_config_options.keys
|
32
38
|
user_config_options_keys.each do |key|
|
@@ -3,11 +3,39 @@ module BestType
|
|
3
3
|
|
4
4
|
attr_reader :config
|
5
5
|
|
6
|
+
COLLECTION = 'Collection'.freeze
|
7
|
+
DATASET = 'Dataset'.freeze
|
8
|
+
EVENT = 'Event'.freeze
|
9
|
+
INTERACTIVE_RESOURCE = 'InteractiveResource'.freeze
|
10
|
+
MOVING_IMAGE = 'MovingImage'.freeze
|
11
|
+
PHYSICAL_OBJECT = 'PhysicalObject'.freeze
|
12
|
+
SERVICE = 'Service'.freeze
|
13
|
+
SOFTWARE = 'Software'.freeze
|
14
|
+
SOUND = 'Sound'.freeze
|
15
|
+
STILL_IMAGE = 'StillImage'.freeze
|
16
|
+
TEXT = 'Text'.freeze
|
17
|
+
|
18
|
+
# these include values that will not be derived from MIME/content types
|
19
|
+
VALID_TYPES = [
|
20
|
+
COLLECTION, EVENT, INTERACTIVE_RESOURCE, MOVING_IMAGE, PHYSICAL_OBJECT,
|
21
|
+
SERVICE, SOFTWARE, SOUND, STILL_IMAGE, TEXT
|
22
|
+
].freeze
|
23
|
+
|
24
|
+
FALLBACK_DC_TYPE = SOFTWARE
|
25
|
+
|
6
26
|
def initialize(mime_type_lookup_instance)
|
7
27
|
@mime_type_lookup = mime_type_lookup_instance
|
8
28
|
@config = @mime_type_lookup.config
|
9
29
|
end
|
10
30
|
|
31
|
+
def fallback_type
|
32
|
+
FALLBACK_DC_TYPE
|
33
|
+
end
|
34
|
+
|
35
|
+
def valid_type?(value)
|
36
|
+
VALID_TYPES.include? value
|
37
|
+
end
|
38
|
+
|
11
39
|
def for_file_name(file_name_or_path)
|
12
40
|
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
13
41
|
end
|
@@ -18,17 +46,17 @@ module BestType
|
|
18
46
|
return dc_type unless dc_type.nil?
|
19
47
|
|
20
48
|
mimes_to_dc = {
|
21
|
-
/^image/ =>
|
22
|
-
/^video/ =>
|
23
|
-
/^audio/ =>
|
24
|
-
/^text/ =>
|
25
|
-
/^application\/(pdf|msword)/ =>
|
26
|
-
/excel|spreadsheet|xls|application\/sql/ =>
|
27
|
-
/^application/ =>
|
49
|
+
/^image/ => STILL_IMAGE,
|
50
|
+
/^video/ => MOVING_IMAGE,
|
51
|
+
/^audio/ => SOUND,
|
52
|
+
/^text/ => TEXT,
|
53
|
+
/^application\/(pdf|msword)/ => TEXT,
|
54
|
+
/excel|spreadsheet|xls|application\/sql/ => DATASET,
|
55
|
+
/^application/ => SOFTWARE
|
28
56
|
}
|
29
57
|
|
30
58
|
dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
|
31
|
-
dc_type.last
|
59
|
+
dc_type.nil? ? FALLBACK_DC_TYPE : dc_type.last
|
32
60
|
end
|
33
61
|
|
34
62
|
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module BestType
|
2
|
+
class PcdmTypeLookup
|
3
|
+
|
4
|
+
attr_reader :config
|
5
|
+
|
6
|
+
# https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
|
7
|
+
ARCHIVE = "Archive".freeze
|
8
|
+
AUDIO = "Audio".freeze
|
9
|
+
DATABASE = "Database".freeze
|
10
|
+
DATASET = "Dataset".freeze
|
11
|
+
EMAIL = "Email".freeze
|
12
|
+
FONT = "Font".freeze
|
13
|
+
HTML = "HTML".freeze
|
14
|
+
IMAGE = "Image".freeze
|
15
|
+
PAGE_DESCRIPTION = "PageDescription".freeze
|
16
|
+
PRESENTATION = "Presentation".freeze
|
17
|
+
SOFTWARE = "Software".freeze
|
18
|
+
SOURCE_CODE = "SourceCode".freeze
|
19
|
+
SPREADSHEET = "Spreadsheet".freeze
|
20
|
+
STRUCTURED_TEXT = "StructuredText".freeze
|
21
|
+
TEXT = "Text".freeze
|
22
|
+
UNKNOWN = "Unknown".freeze
|
23
|
+
UNSTRUCTURED_TEXT = "UnstructuredText".freeze
|
24
|
+
VIDEO = "Video".freeze
|
25
|
+
WEBSITE = "Website".freeze
|
26
|
+
|
27
|
+
# these include values that will not be derived from MIME/content types
|
28
|
+
VALID_TYPES = [
|
29
|
+
ARCHIVE, AUDIO, DATABASE, DATASET, EMAIL, FONT, HTML, IMAGE, PAGE_DESCRIPTION, PRESENTATION, SOFTWARE,
|
30
|
+
SOURCE_CODE, SPREADSHEET, STRUCTURED_TEXT, TEXT, UNKNOWN, UNSTRUCTURED_TEXT, VIDEO, WEBSITE
|
31
|
+
].freeze
|
32
|
+
|
33
|
+
FALLBACK_TYPE = UNKNOWN
|
34
|
+
|
35
|
+
def initialize(mime_type_lookup_instance)
|
36
|
+
@mime_type_lookup = mime_type_lookup_instance
|
37
|
+
@config = @mime_type_lookup.config
|
38
|
+
end
|
39
|
+
|
40
|
+
def fallback_type
|
41
|
+
FALLBACK_TYPE
|
42
|
+
end
|
43
|
+
|
44
|
+
def valid_type?(value)
|
45
|
+
VALID_TYPES.include? value
|
46
|
+
end
|
47
|
+
|
48
|
+
def for_file_name(file_name_or_path)
|
49
|
+
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_mime_type(mime_type)
|
53
|
+
# Check config overrides first
|
54
|
+
file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
|
55
|
+
return file_type unless file_type.nil?
|
56
|
+
|
57
|
+
mimes_to_type = {
|
58
|
+
/^image/i => IMAGE,
|
59
|
+
/^video/i => VIDEO,
|
60
|
+
/^audio/i => AUDIO,
|
61
|
+
/^text/i => {
|
62
|
+
/\/css/i => SOURCE_CODE,
|
63
|
+
/\/html/i => HTML,
|
64
|
+
/.+/ => TEXT
|
65
|
+
},
|
66
|
+
/excel|spreadsheet|xls/i => SPREADSHEET,
|
67
|
+
/application\/sql/i => DATABASE,
|
68
|
+
/csv/i => DATASET,
|
69
|
+
/octet.stream/i => UNKNOWN,
|
70
|
+
/^application/i => {
|
71
|
+
/\/access/i => DATABASE,
|
72
|
+
/\/css/i => SOURCE_CODE,
|
73
|
+
/\/html/i => HTML,
|
74
|
+
/\/mbox/i => EMAIL,
|
75
|
+
/\/mp4/i => VIDEO,
|
76
|
+
/\/mp4a/i => AUDIO,
|
77
|
+
/\/msaccess/i => DATABASE,
|
78
|
+
/\/mxf/i => VIDEO,
|
79
|
+
/\/(pdf|msword)/i => PAGE_DESCRIPTION,
|
80
|
+
/\/postscript/i => PAGE_DESCRIPTION,
|
81
|
+
/\/powerpoint/i => PRESENTATION,
|
82
|
+
/\/rtf/i => PAGE_DESCRIPTION,
|
83
|
+
/\/sql/i => DATABASE,
|
84
|
+
/\/swf/ => VIDEO,
|
85
|
+
/\/vnd.ms-asf/i => VIDEO,
|
86
|
+
/\/vnd.ms-word/i => PAGE_DESCRIPTION,
|
87
|
+
/\/vnd.ms-wpl/i => PAGE_DESCRIPTION,
|
88
|
+
/\/vnd.oasis.opendocument.text/i => PAGE_DESCRIPTION,
|
89
|
+
/\/vnd.openxmlformats-officedocument.presentation/i => PRESENTATION,
|
90
|
+
/\/vnd.openxmlformats-officedocument.wordprocessingml/i => PAGE_DESCRIPTION,
|
91
|
+
/\/vnd.ms-powerpoint/i => PRESENTATION,
|
92
|
+
/\/vnd.sun.xml.calc/i => SPREADSHEET,
|
93
|
+
/\/vnd.sun.xml.impress/i => PRESENTATION,
|
94
|
+
/\/vnd.sun.xml.writer/i => PAGE_DESCRIPTION,
|
95
|
+
/\/xml/i => STRUCTURED_TEXT,
|
96
|
+
/\/x.mspublisher/i => PAGE_DESCRIPTION,
|
97
|
+
/\/x.shockwave-flash/ => VIDEO,
|
98
|
+
/\/x.spss/i => DATASET,
|
99
|
+
/\/zip/i => ARCHIVE,
|
100
|
+
/.+/ => UNKNOWN
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
|
105
|
+
return fallback_type unless file_type
|
106
|
+
if file_type&.last.is_a? Hash
|
107
|
+
file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern }
|
108
|
+
end
|
109
|
+
file_type.nil? ? fallback_type : file_type.last
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
data/lib/best_type/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: best_type
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- lib/best_type/config.rb
|
107
107
|
- lib/best_type/dc_type_lookup.rb
|
108
108
|
- lib/best_type/mime_type_lookup.rb
|
109
|
+
- lib/best_type/pcdm_type_lookup.rb
|
109
110
|
- lib/best_type/version.rb
|
110
111
|
- lib/tasks/best_type.rake
|
111
112
|
- lib/tasks/best_type/ci.rake
|
@@ -128,8 +129,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
129
|
- !ruby/object:Gem::Version
|
129
130
|
version: '0'
|
130
131
|
requirements: []
|
131
|
-
|
132
|
-
rubygems_version: 2.6.14
|
132
|
+
rubygems_version: 3.1.4
|
133
133
|
signing_key:
|
134
134
|
specification_version: 4
|
135
135
|
summary: A library for selecting the best mime type or dc type for a file.
|