best_type 0.0.10 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/config/internal_config_options.yml +5 -0
- data/lib/best_type/config.rb +22 -8
- data/lib/best_type/dc_type_lookup.rb +23 -15
- data/lib/best_type/mime_type_lookup.rb +7 -4
- data/lib/best_type/pcdm_type_lookup.rb +68 -59
- data/lib/best_type/version.rb +3 -3
- data/lib/best_type.rb +4 -3
- metadata +7 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b59a909ec727b9062dbdcf5b47b63f714eaf43367dfeef9fa70cb282e4fe6e2d
|
4
|
+
data.tar.gz: fe0c23aeff8d9b2473d41182119740ca8e9ff94e84abf903720454a9e58a5290
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13d9eb9c134947762ab702394539f713d4ea20ec304111170804fa9c5d2b01c56b0cb6854f8cde0af84d461e796f064dbce0ab57d63f7c6583647e410d255b30
|
7
|
+
data.tar.gz: 9f495fb0ff1a4f357f8922ed8dbaafcdf436a89c4669d751ccc7d5193c4eef1577771917adc9ee64a06d03ca6149fef441e97cf3e06ebcdd8eaedec724873c9b
|
data/README.md
CHANGED
@@ -50,9 +50,10 @@ BestType.configure({
|
|
50
50
|
BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
|
51
51
|
BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
|
52
52
|
BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
|
53
|
-
|
54
53
|
```
|
55
54
|
|
55
|
+
**Note: Case-insensitive string comparisons are used when checking against file extensions and mime types.**
|
56
|
+
|
56
57
|
### Recommended Setup For Rails
|
57
58
|
|
58
59
|
Add best_type to your Gemfile:
|
@@ -93,7 +94,7 @@ development:
|
|
93
94
|
'good/type': 'Good'
|
94
95
|
mime_type_to_pcdm_type_overrides:
|
95
96
|
'good/type': 'Goodly'
|
96
|
-
|
97
|
+
|
97
98
|
test:
|
98
99
|
extension_to_mime_type_overrides:
|
99
100
|
'better': 'better/type'
|
@@ -1,12 +1,17 @@
|
|
1
1
|
extension_to_mime_type_overrides:
|
2
|
+
# NOTE: Always use lower case keys
|
2
3
|
'test': 'test/type'
|
3
4
|
'mp4': 'video/mp4'
|
4
5
|
'vtt': 'text/vtt'
|
5
6
|
'm4v': 'video/x-m4v'
|
7
|
+
'mts': 'video/MP2T'
|
8
|
+
'ac3': 'audio/ac3'
|
6
9
|
mime_type_to_dc_type_overrides:
|
10
|
+
# NOTE: Always use lower case keys
|
7
11
|
'test/type': 'Test'
|
8
12
|
'application/mxf': 'MovingImage'
|
9
13
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
|
10
14
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
|
11
15
|
mime_type_to_pcdm_type_overrides:
|
16
|
+
# NOTE: Always use lower case keys
|
12
17
|
'test/type': 'Test'
|
data/lib/best_type/config.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Metrics/AbcSize
|
4
|
+
# rubocop:disable Metrics/MethodLength
|
5
|
+
|
1
6
|
module BestType
|
2
7
|
class Config
|
3
|
-
|
4
8
|
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
|
5
9
|
|
6
10
|
def initialize(user_config_options = {})
|
@@ -14,23 +18,34 @@ module BestType
|
|
14
18
|
@mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
|
15
19
|
|
16
20
|
stringify_user_config_options_keys!(user_config_options)
|
17
|
-
|
18
|
-
|
19
|
-
|
21
|
+
if user_config_options.key?('extension_to_mime_type_overrides')
|
22
|
+
add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides'])
|
23
|
+
end
|
24
|
+
if user_config_options.key?('mime_type_to_dc_type_overrides')
|
25
|
+
add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides'])
|
26
|
+
end
|
27
|
+
return unless user_config_options.key?('mime_type_to_pcdm_type_overrides')
|
28
|
+
|
29
|
+
add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides'])
|
20
30
|
end
|
21
31
|
|
22
32
|
private
|
23
33
|
|
34
|
+
# Returns a new Hash with downcased keys
|
35
|
+
def downcase_hash_keys(hsh)
|
36
|
+
hsh.transform_keys(&:downcase)
|
37
|
+
end
|
38
|
+
|
24
39
|
def add_extension_to_mime_type_overrides(overrides)
|
25
|
-
@extension_to_mime_type_overrides.merge!(overrides)
|
40
|
+
@extension_to_mime_type_overrides.merge!(downcase_hash_keys(overrides))
|
26
41
|
end
|
27
42
|
|
28
43
|
def add_mime_type_to_dc_type_overrides(overrides)
|
29
|
-
@mime_type_to_dc_type_overrides.merge!(overrides)
|
44
|
+
@mime_type_to_dc_type_overrides.merge!(downcase_hash_keys(overrides))
|
30
45
|
end
|
31
46
|
|
32
47
|
def add_mime_type_to_pcdm_type_overrides(overrides)
|
33
|
-
@mime_type_to_pcdm_type_overrides.merge!(overrides)
|
48
|
+
@mime_type_to_pcdm_type_overrides.merge!(downcase_hash_keys(overrides))
|
34
49
|
end
|
35
50
|
|
36
51
|
def stringify_user_config_options_keys!(user_config_options)
|
@@ -43,6 +58,5 @@ module BestType
|
|
43
58
|
end
|
44
59
|
user_config_options_keys
|
45
60
|
end
|
46
|
-
|
47
61
|
end
|
48
62
|
end
|
@@ -1,19 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Metrics/MethodLength
|
4
|
+
|
1
5
|
module BestType
|
2
6
|
class DcTypeLookup
|
3
|
-
|
4
7
|
attr_reader :config
|
5
8
|
|
6
|
-
COLLECTION = 'Collection'
|
7
|
-
DATASET = 'Dataset'
|
8
|
-
EVENT = 'Event'
|
9
|
-
INTERACTIVE_RESOURCE = 'InteractiveResource'
|
10
|
-
MOVING_IMAGE = 'MovingImage'
|
11
|
-
PHYSICAL_OBJECT = 'PhysicalObject'
|
12
|
-
SERVICE = 'Service'
|
13
|
-
SOFTWARE = 'Software'
|
14
|
-
SOUND = 'Sound'
|
15
|
-
STILL_IMAGE = 'StillImage'
|
16
|
-
TEXT = 'Text'
|
9
|
+
COLLECTION = 'Collection'
|
10
|
+
DATASET = 'Dataset'
|
11
|
+
EVENT = 'Event'
|
12
|
+
INTERACTIVE_RESOURCE = 'InteractiveResource'
|
13
|
+
MOVING_IMAGE = 'MovingImage'
|
14
|
+
PHYSICAL_OBJECT = 'PhysicalObject'
|
15
|
+
SERVICE = 'Service'
|
16
|
+
SOFTWARE = 'Software'
|
17
|
+
SOUND = 'Sound'
|
18
|
+
STILL_IMAGE = 'StillImage'
|
19
|
+
TEXT = 'Text'
|
17
20
|
|
18
21
|
# these include values that will not be derived from MIME/content types
|
19
22
|
VALID_TYPES = [
|
@@ -37,10 +40,16 @@ module BestType
|
|
37
40
|
end
|
38
41
|
|
39
42
|
def for_file_name(file_name_or_path)
|
43
|
+
# Normalize format of file_name_or_path
|
44
|
+
file_name_or_path = file_name_or_path.downcase
|
45
|
+
|
40
46
|
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
41
47
|
end
|
42
48
|
|
43
49
|
def for_mime_type(mime_type)
|
50
|
+
# Normalize format of mime_type
|
51
|
+
mime_type = mime_type.downcase
|
52
|
+
|
44
53
|
# Check config overrides first
|
45
54
|
dc_type = @config.mime_type_to_dc_type_overrides.fetch(mime_type, nil)
|
46
55
|
return dc_type unless dc_type.nil?
|
@@ -50,14 +59,13 @@ module BestType
|
|
50
59
|
/^video/ => MOVING_IMAGE,
|
51
60
|
/^audio/ => SOUND,
|
52
61
|
/^text/ => TEXT,
|
53
|
-
|
54
|
-
|
62
|
+
%r{^application/(pdf|msword)} => TEXT,
|
63
|
+
%r{excel|spreadsheet|xls|application/sql} => DATASET,
|
55
64
|
/^application/ => SOFTWARE
|
56
65
|
}
|
57
66
|
|
58
67
|
dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
|
59
68
|
dc_type.nil? ? FALLBACK_DC_TYPE : dc_type.last
|
60
69
|
end
|
61
|
-
|
62
70
|
end
|
63
71
|
end
|
@@ -1,19 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'mime-types'
|
2
4
|
|
3
5
|
module BestType
|
4
6
|
class MimeTypeLookup
|
5
|
-
|
6
7
|
attr_reader :config
|
7
8
|
|
8
|
-
FALLBACK_MIME_TYPE_VALUE = 'application/octet-stream'
|
9
|
+
FALLBACK_MIME_TYPE_VALUE = 'application/octet-stream'
|
9
10
|
|
10
11
|
def initialize(config)
|
11
12
|
@config = config
|
12
13
|
end
|
13
14
|
|
14
15
|
def for_file_name(file_name_or_path)
|
16
|
+
# Normalize format of file_name_or_path
|
17
|
+
file_name_or_path = file_name_or_path.downcase
|
18
|
+
|
15
19
|
extension = File.extname(file_name_or_path)
|
16
|
-
extension = extension[1
|
20
|
+
extension = extension[1..] unless extension.empty?
|
17
21
|
|
18
22
|
# Check config overrides first
|
19
23
|
unless extension.empty?
|
@@ -25,6 +29,5 @@ module BestType
|
|
25
29
|
detected_mime_types = MIME::Types.of(file_name_or_path)
|
26
30
|
detected_mime_types.empty? ? FALLBACK_MIME_TYPE_VALUE : detected_mime_types.first.content_type
|
27
31
|
end
|
28
|
-
|
29
32
|
end
|
30
33
|
end
|
@@ -1,28 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# rubocop:disable Metrics/AbcSize
|
4
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
5
|
+
# rubocop:disable Metrics/MethodLength
|
6
|
+
|
1
7
|
module BestType
|
2
8
|
class PcdmTypeLookup
|
3
|
-
|
4
9
|
attr_reader :config
|
5
10
|
|
6
11
|
# https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
|
7
|
-
ARCHIVE =
|
8
|
-
AUDIO =
|
9
|
-
DATABASE =
|
10
|
-
DATASET =
|
11
|
-
EMAIL =
|
12
|
-
FONT =
|
13
|
-
HTML =
|
14
|
-
IMAGE =
|
15
|
-
PAGE_DESCRIPTION =
|
16
|
-
PRESENTATION =
|
17
|
-
SOFTWARE =
|
18
|
-
SOURCE_CODE =
|
19
|
-
SPREADSHEET =
|
20
|
-
STRUCTURED_TEXT =
|
21
|
-
TEXT =
|
22
|
-
UNKNOWN =
|
23
|
-
UNSTRUCTURED_TEXT =
|
24
|
-
VIDEO =
|
25
|
-
WEBSITE =
|
12
|
+
ARCHIVE = 'Archive'
|
13
|
+
AUDIO = 'Audio'
|
14
|
+
DATABASE = 'Database'
|
15
|
+
DATASET = 'Dataset'
|
16
|
+
EMAIL = 'Email'
|
17
|
+
FONT = 'Font'
|
18
|
+
HTML = 'HTML'
|
19
|
+
IMAGE = 'Image'
|
20
|
+
PAGE_DESCRIPTION = 'PageDescription'
|
21
|
+
PRESENTATION = 'Presentation'
|
22
|
+
SOFTWARE = 'Software'
|
23
|
+
SOURCE_CODE = 'SourceCode'
|
24
|
+
SPREADSHEET = 'Spreadsheet'
|
25
|
+
STRUCTURED_TEXT = 'StructuredText'
|
26
|
+
TEXT = 'Text'
|
27
|
+
UNKNOWN = 'Unknown'
|
28
|
+
UNSTRUCTURED_TEXT = 'UnstructuredText'
|
29
|
+
VIDEO = 'Video'
|
30
|
+
WEBSITE = 'Website'
|
26
31
|
|
27
32
|
# these include values that will not be derived from MIME/content types
|
28
33
|
VALID_TYPES = [
|
@@ -46,10 +51,16 @@ module BestType
|
|
46
51
|
end
|
47
52
|
|
48
53
|
def for_file_name(file_name_or_path)
|
54
|
+
# Normalize format of file_name_or_path
|
55
|
+
file_name_or_path = file_name_or_path.downcase
|
56
|
+
|
49
57
|
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
50
58
|
end
|
51
59
|
|
52
60
|
def for_mime_type(mime_type)
|
61
|
+
# Normalize format of mime_type
|
62
|
+
mime_type = mime_type.downcase
|
63
|
+
|
53
64
|
# Check config overrides first
|
54
65
|
file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
|
55
66
|
return file_type unless file_type.nil?
|
@@ -59,58 +70,56 @@ module BestType
|
|
59
70
|
/^video/i => VIDEO,
|
60
71
|
/^audio/i => AUDIO,
|
61
72
|
/^text/i => {
|
62
|
-
|
63
|
-
|
73
|
+
%r{/css}i => SOURCE_CODE,
|
74
|
+
%r{/html}i => HTML,
|
64
75
|
/.+/ => TEXT
|
65
76
|
},
|
66
77
|
/excel|spreadsheet|xls/i => SPREADSHEET,
|
67
|
-
/
|
78
|
+
%r{application/sql}i => DATABASE,
|
68
79
|
/csv/i => DATASET,
|
69
80
|
/octet.stream/i => UNKNOWN,
|
70
81
|
/^application/i => {
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
82
|
+
%r{/access}i => DATABASE,
|
83
|
+
%r{/css}i => SOURCE_CODE,
|
84
|
+
%r{/html}i => HTML,
|
85
|
+
%r{/x-iwork-keynote}i => PRESENTATION,
|
86
|
+
%r{/x-iwork-numbers}i => SPREADSHEET,
|
87
|
+
%r{/x-iwork-pages}i => PAGE_DESCRIPTION,
|
88
|
+
%r{/mbox}i => EMAIL,
|
89
|
+
%r{/mp4}i => VIDEO,
|
90
|
+
%r{/mp4a}i => AUDIO,
|
91
|
+
%r{/msaccess}i => DATABASE,
|
92
|
+
%r{/mxf}i => VIDEO,
|
93
|
+
%r{/(pdf|msword)}i => PAGE_DESCRIPTION,
|
94
|
+
%r{/postscript}i => PAGE_DESCRIPTION,
|
95
|
+
%r{/powerpoint}i => PRESENTATION,
|
96
|
+
%r{/rtf}i => PAGE_DESCRIPTION,
|
97
|
+
%r{/sql}i => DATABASE,
|
98
|
+
%r{/swf} => VIDEO,
|
99
|
+
%r{/vnd.ms-asf}i => VIDEO,
|
100
|
+
%r{/vnd.ms-word}i => PAGE_DESCRIPTION,
|
101
|
+
%r{/vnd.ms-wpl}i => PAGE_DESCRIPTION,
|
102
|
+
%r{/vnd.oasis.opendocument.text}i => PAGE_DESCRIPTION,
|
103
|
+
%r{/vnd.openxmlformats-officedocument.presentation}i => PRESENTATION,
|
104
|
+
%r{/vnd.openxmlformats-officedocument.wordprocessingml}i => PAGE_DESCRIPTION,
|
105
|
+
%r{/vnd.ms-powerpoint}i => PRESENTATION,
|
106
|
+
%r{/vnd.sun.xml.calc}i => SPREADSHEET,
|
107
|
+
%r{/vnd.sun.xml.impress}i => PRESENTATION,
|
108
|
+
%r{/vnd.sun.xml.writer}i => PAGE_DESCRIPTION,
|
109
|
+
%r{/xml}i => STRUCTURED_TEXT,
|
110
|
+
%r{/x.mspublisher}i => PAGE_DESCRIPTION,
|
111
|
+
%r{/x.shockwave-flash} => VIDEO,
|
112
|
+
%r{/x.spss}i => DATASET,
|
113
|
+
%r{/zip}i => ARCHIVE,
|
103
114
|
/.+/ => UNKNOWN
|
104
115
|
}
|
105
116
|
}
|
106
117
|
|
107
118
|
file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
|
108
119
|
return fallback_type unless file_type
|
109
|
-
|
110
|
-
|
111
|
-
end
|
120
|
+
|
121
|
+
file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern } if file_type&.last.is_a? Hash
|
112
122
|
file_type.nil? ? fallback_type : file_type.last
|
113
123
|
end
|
114
|
-
|
115
124
|
end
|
116
125
|
end
|
data/lib/best_type/version.rb
CHANGED
data/lib/best_type.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'best_type/version'
|
2
4
|
require 'best_type/config'
|
3
5
|
require 'best_type/mime_type_lookup'
|
@@ -20,7 +22,7 @@ module BestType
|
|
20
22
|
@pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
|
21
23
|
end
|
22
24
|
|
23
|
-
def self.config(reload
|
25
|
+
def self.config(reload: false, user_config_options: {})
|
24
26
|
if @config.nil? || reload
|
25
27
|
@semaphore.synchronize do
|
26
28
|
@config = BestType::Config.new(user_config_options)
|
@@ -32,7 +34,6 @@ module BestType
|
|
32
34
|
end
|
33
35
|
|
34
36
|
def self.configure(opts = {})
|
35
|
-
config(true, opts)
|
37
|
+
config(reload: true, user_config_options: opts)
|
36
38
|
end
|
37
|
-
|
38
39
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: best_type
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '3.
|
19
|
+
version: '3.4'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '3.
|
26
|
+
version: '3.4'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,33 +53,19 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.7'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: rubocul
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
61
|
+
version: 4.0.11
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rubocop-rspec
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: 1.20.1
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: 1.20.1
|
68
|
+
version: 4.0.11
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
70
|
name: simplecov
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -129,7 +115,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
115
|
- !ruby/object:Gem::Version
|
130
116
|
version: '0'
|
131
117
|
requirements: []
|
132
|
-
rubygems_version: 3.
|
118
|
+
rubygems_version: 3.3.26
|
133
119
|
signing_key:
|
134
120
|
specification_version: 4
|
135
121
|
summary: A library for selecting the best mime type or dc type for a file.
|