best_type 0.0.3 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +19 -3
- data/config/internal_config_options.yml +6 -0
- data/lib/best_type.rb +5 -0
- data/lib/best_type/config.rb +9 -3
- data/lib/best_type/dc_type_lookup.rb +34 -8
- data/lib/best_type/pcdm_type_lookup.rb +116 -0
- data/lib/best_type/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 29fe983eac1114ba38d161af60db4bc5baaed14ffc7f9fa226ec0ce9f6b9658b
|
4
|
+
data.tar.gz: 2d8c71566c13afaf5f48253ad659a4aad8c4f345403e0067ad6bc08b793f6451
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 905b6b2f3bf244668bc1962eea0561cd267e7aaee333c2c4817f56ca917b7181c0d55cddfbaa952032ed839aa75422d1c3a75ee0b5b17fecf7ea826a8a4f8c28
|
7
|
+
data.tar.gz: a96af2728b2d1c5c15fdbb4a3a065be91d11352f81c8ab1a6ee38a517e737bc6c97625538a17a985015b58ee21332c13fded4160964f42c0189ea44bf155d7cf
|
data/README.md
CHANGED
@@ -28,6 +28,12 @@ BestType.dc_type.for_file_name('/path/to/some/file.jpg') # 'StillImage'
|
|
28
28
|
|
29
29
|
# detect dc type for mime types
|
30
30
|
BestType.dc_type.for_mime_type('image/jpeg') # 'StillImage'
|
31
|
+
|
32
|
+
# detect dc type for file names (including full file path)
|
33
|
+
BestType.pcdm_type.for_file_name('/path/to/some/file.jpg') # 'Image'
|
34
|
+
|
35
|
+
# detect dc type for mime types
|
36
|
+
BestType.pcdm_type.for_mime_type('image/jpeg') # 'Image'
|
31
37
|
```
|
32
38
|
|
33
39
|
### Add Custom Overrides
|
@@ -36,12 +42,14 @@ BestType.configure({
|
|
36
42
|
extension_to_mime_type_overrides:
|
37
43
|
'custom': 'custom/type'
|
38
44
|
mime_type_to_dc_type_overrides:
|
39
|
-
'custom/type': '
|
45
|
+
'custom/type': 'CustomDC'
|
46
|
+
mime_type_to_pcdm_type_overrides:
|
47
|
+
'custom/type': 'CustomPCDM'
|
40
48
|
})
|
41
49
|
|
42
50
|
BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
|
43
|
-
BestType.dc_type.for_file_name('myfile.custom') # '
|
44
|
-
BestType.
|
51
|
+
BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
|
52
|
+
BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
|
45
53
|
|
46
54
|
```
|
47
55
|
|
@@ -63,6 +71,8 @@ BestType.configure({
|
|
63
71
|
'custom': 'custom/type'
|
64
72
|
mime_type_to_dc_type_overrides:
|
65
73
|
'custom/type': 'Custom'
|
74
|
+
mime_type_to_pcdm_type_overrides:
|
75
|
+
'custom/type': 'Custom'
|
66
76
|
})
|
67
77
|
```
|
68
78
|
|
@@ -81,18 +91,24 @@ development:
|
|
81
91
|
'good': 'good/type'
|
82
92
|
mime_type_to_dc_type_overrides:
|
83
93
|
'good/type': 'Good'
|
94
|
+
mime_type_to_pcdm_type_overrides:
|
95
|
+
'good/type': 'Goodly'
|
84
96
|
|
85
97
|
test:
|
86
98
|
extension_to_mime_type_overrides:
|
87
99
|
'better': 'better/type'
|
88
100
|
mime_type_to_dc_type_overrides:
|
89
101
|
'better/type': 'Better'
|
102
|
+
mime_type_to_pcdm_type_overrides:
|
103
|
+
'best/type': 'Betterly'
|
90
104
|
|
91
105
|
production:
|
92
106
|
extension_to_mime_type_overrides:
|
93
107
|
'best': 'best/type'
|
94
108
|
mime_type_to_dc_type_overrides:
|
95
109
|
'best/type': 'Best'
|
110
|
+
mime_type_to_pcdm_type_overrides:
|
111
|
+
'best/type': 'Bestly'
|
96
112
|
```
|
97
113
|
|
98
114
|
### Running Tests (for developers):
|
@@ -1,6 +1,12 @@
|
|
1
1
|
extension_to_mime_type_overrides:
|
2
2
|
'test': 'test/type'
|
3
3
|
'mp4': 'video/mp4'
|
4
|
+
'vtt': 'text/vtt'
|
5
|
+
'm4v': 'video/x-m4v'
|
4
6
|
mime_type_to_dc_type_overrides:
|
5
7
|
'test/type': 'Test'
|
6
8
|
'application/mxf': 'MovingImage'
|
9
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
|
10
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
|
11
|
+
mime_type_to_pcdm_type_overrides:
|
12
|
+
'test/type': 'Test'
|
data/lib/best_type.rb
CHANGED
@@ -2,6 +2,7 @@ require 'best_type/version'
|
|
2
2
|
require 'best_type/config'
|
3
3
|
require 'best_type/mime_type_lookup'
|
4
4
|
require 'best_type/dc_type_lookup'
|
5
|
+
require 'best_type/pcdm_type_lookup'
|
5
6
|
require 'yaml'
|
6
7
|
|
7
8
|
module BestType
|
@@ -15,6 +16,10 @@ module BestType
|
|
15
16
|
@dc_type ||= BestType::DcTypeLookup.new(mime_type)
|
16
17
|
end
|
17
18
|
|
19
|
+
def self.pcdm_type
|
20
|
+
@pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
|
21
|
+
end
|
22
|
+
|
18
23
|
def self.config(reload = false, user_config_options = {})
|
19
24
|
if @config.nil? || reload
|
20
25
|
@semaphore.synchronize do
|
data/lib/best_type/config.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module BestType
|
2
2
|
class Config
|
3
3
|
|
4
|
-
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides
|
4
|
+
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
|
5
5
|
|
6
6
|
def initialize(user_config_options = {})
|
7
7
|
# Get defaults from internal_custom_mapping.yml in gem
|
@@ -9,12 +9,14 @@ module BestType
|
|
9
9
|
internal_config_file_path = File.join(gem_dir, 'config/internal_config_options.yml')
|
10
10
|
internal_config_options = YAML.load_file(internal_config_file_path)
|
11
11
|
|
12
|
-
@extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides']
|
13
|
-
@mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides']
|
12
|
+
@extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides'] || {}
|
13
|
+
@mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides'] || {}
|
14
|
+
@mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
|
14
15
|
|
15
16
|
stringify_user_config_options_keys!(user_config_options)
|
16
17
|
add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides']) if user_config_options.key?('extension_to_mime_type_overrides')
|
17
18
|
add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides']) if user_config_options.key?('mime_type_to_dc_type_overrides')
|
19
|
+
add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides']) if user_config_options.key?('mime_type_to_pcdm_type_overrides')
|
18
20
|
end
|
19
21
|
|
20
22
|
private
|
@@ -27,6 +29,10 @@ module BestType
|
|
27
29
|
@mime_type_to_dc_type_overrides.merge!(overrides)
|
28
30
|
end
|
29
31
|
|
32
|
+
def add_mime_type_to_pcdm_type_overrides(overrides)
|
33
|
+
@mime_type_to_pcdm_type_overrides.merge!(overrides)
|
34
|
+
end
|
35
|
+
|
30
36
|
def stringify_user_config_options_keys!(user_config_options)
|
31
37
|
user_config_options_keys = user_config_options.keys
|
32
38
|
user_config_options_keys.each do |key|
|
@@ -3,13 +3,39 @@ module BestType
|
|
3
3
|
|
4
4
|
attr_reader :config
|
5
5
|
|
6
|
-
|
6
|
+
COLLECTION = 'Collection'.freeze
|
7
|
+
DATASET = 'Dataset'.freeze
|
8
|
+
EVENT = 'Event'.freeze
|
9
|
+
INTERACTIVE_RESOURCE = 'InteractiveResource'.freeze
|
10
|
+
MOVING_IMAGE = 'MovingImage'.freeze
|
11
|
+
PHYSICAL_OBJECT = 'PhysicalObject'.freeze
|
12
|
+
SERVICE = 'Service'.freeze
|
13
|
+
SOFTWARE = 'Software'.freeze
|
14
|
+
SOUND = 'Sound'.freeze
|
15
|
+
STILL_IMAGE = 'StillImage'.freeze
|
16
|
+
TEXT = 'Text'.freeze
|
17
|
+
|
18
|
+
# these include values that will not be derived from MIME/content types
|
19
|
+
VALID_TYPES = [
|
20
|
+
COLLECTION, EVENT, INTERACTIVE_RESOURCE, MOVING_IMAGE, PHYSICAL_OBJECT,
|
21
|
+
SERVICE, SOFTWARE, SOUND, STILL_IMAGE, TEXT
|
22
|
+
].freeze
|
23
|
+
|
24
|
+
FALLBACK_DC_TYPE = SOFTWARE
|
7
25
|
|
8
26
|
def initialize(mime_type_lookup_instance)
|
9
27
|
@mime_type_lookup = mime_type_lookup_instance
|
10
28
|
@config = @mime_type_lookup.config
|
11
29
|
end
|
12
30
|
|
31
|
+
def fallback_type
|
32
|
+
FALLBACK_DC_TYPE
|
33
|
+
end
|
34
|
+
|
35
|
+
def valid_type?(value)
|
36
|
+
VALID_TYPES.include? value
|
37
|
+
end
|
38
|
+
|
13
39
|
def for_file_name(file_name_or_path)
|
14
40
|
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
15
41
|
end
|
@@ -20,13 +46,13 @@ module BestType
|
|
20
46
|
return dc_type unless dc_type.nil?
|
21
47
|
|
22
48
|
mimes_to_dc = {
|
23
|
-
/^image/ =>
|
24
|
-
/^video/ =>
|
25
|
-
/^audio/ =>
|
26
|
-
/^text/ =>
|
27
|
-
/^application\/(pdf|msword)/ =>
|
28
|
-
/excel|spreadsheet|xls|application\/sql/ =>
|
29
|
-
/^application/ =>
|
49
|
+
/^image/ => STILL_IMAGE,
|
50
|
+
/^video/ => MOVING_IMAGE,
|
51
|
+
/^audio/ => SOUND,
|
52
|
+
/^text/ => TEXT,
|
53
|
+
/^application\/(pdf|msword)/ => TEXT,
|
54
|
+
/excel|spreadsheet|xls|application\/sql/ => DATASET,
|
55
|
+
/^application/ => SOFTWARE
|
30
56
|
}
|
31
57
|
|
32
58
|
dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
|
@@ -0,0 +1,116 @@
|
|
1
|
+
module BestType
|
2
|
+
class PcdmTypeLookup
|
3
|
+
|
4
|
+
attr_reader :config
|
5
|
+
|
6
|
+
# https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
|
7
|
+
ARCHIVE = "Archive".freeze
|
8
|
+
AUDIO = "Audio".freeze
|
9
|
+
DATABASE = "Database".freeze
|
10
|
+
DATASET = "Dataset".freeze
|
11
|
+
EMAIL = "Email".freeze
|
12
|
+
FONT = "Font".freeze
|
13
|
+
HTML = "HTML".freeze
|
14
|
+
IMAGE = "Image".freeze
|
15
|
+
PAGE_DESCRIPTION = "PageDescription".freeze
|
16
|
+
PRESENTATION = "Presentation".freeze
|
17
|
+
SOFTWARE = "Software".freeze
|
18
|
+
SOURCE_CODE = "SourceCode".freeze
|
19
|
+
SPREADSHEET = "Spreadsheet".freeze
|
20
|
+
STRUCTURED_TEXT = "StructuredText".freeze
|
21
|
+
TEXT = "Text".freeze
|
22
|
+
UNKNOWN = "Unknown".freeze
|
23
|
+
UNSTRUCTURED_TEXT = "UnstructuredText".freeze
|
24
|
+
VIDEO = "Video".freeze
|
25
|
+
WEBSITE = "Website".freeze
|
26
|
+
|
27
|
+
# these include values that will not be derived from MIME/content types
|
28
|
+
VALID_TYPES = [
|
29
|
+
ARCHIVE, AUDIO, DATABASE, DATASET, EMAIL, FONT, HTML, IMAGE, PAGE_DESCRIPTION, PRESENTATION, SOFTWARE,
|
30
|
+
SOURCE_CODE, SPREADSHEET, STRUCTURED_TEXT, TEXT, UNKNOWN, UNSTRUCTURED_TEXT, VIDEO, WEBSITE
|
31
|
+
].freeze
|
32
|
+
|
33
|
+
FALLBACK_TYPE = UNKNOWN
|
34
|
+
|
35
|
+
def initialize(mime_type_lookup_instance)
|
36
|
+
@mime_type_lookup = mime_type_lookup_instance
|
37
|
+
@config = @mime_type_lookup.config
|
38
|
+
end
|
39
|
+
|
40
|
+
def fallback_type
|
41
|
+
FALLBACK_TYPE
|
42
|
+
end
|
43
|
+
|
44
|
+
def valid_type?(value)
|
45
|
+
VALID_TYPES.include? value
|
46
|
+
end
|
47
|
+
|
48
|
+
def for_file_name(file_name_or_path)
|
49
|
+
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_mime_type(mime_type)
|
53
|
+
# Check config overrides first
|
54
|
+
file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
|
55
|
+
return file_type unless file_type.nil?
|
56
|
+
|
57
|
+
mimes_to_type = {
|
58
|
+
/^image/i => IMAGE,
|
59
|
+
/^video/i => VIDEO,
|
60
|
+
/^audio/i => AUDIO,
|
61
|
+
/^text/i => {
|
62
|
+
/\/css/i => SOURCE_CODE,
|
63
|
+
/\/html/i => HTML,
|
64
|
+
/.+/ => TEXT
|
65
|
+
},
|
66
|
+
/excel|spreadsheet|xls/i => SPREADSHEET,
|
67
|
+
/application\/sql/i => DATABASE,
|
68
|
+
/csv/i => DATASET,
|
69
|
+
/octet.stream/i => UNKNOWN,
|
70
|
+
/^application/i => {
|
71
|
+
/\/access/i => DATABASE,
|
72
|
+
/\/css/i => SOURCE_CODE,
|
73
|
+
/\/html/i => HTML,
|
74
|
+
/\/x-iwork-keynote/i => PRESENTATION,
|
75
|
+
/\/x-iwork-numbers/i => SPREADSHEET,
|
76
|
+
/\/x-iwork-pages/i => PAGE_DESCRIPTION,
|
77
|
+
/\/mbox/i => EMAIL,
|
78
|
+
/\/mp4/i => VIDEO,
|
79
|
+
/\/mp4a/i => AUDIO,
|
80
|
+
/\/msaccess/i => DATABASE,
|
81
|
+
/\/mxf/i => VIDEO,
|
82
|
+
/\/(pdf|msword)/i => PAGE_DESCRIPTION,
|
83
|
+
/\/postscript/i => PAGE_DESCRIPTION,
|
84
|
+
/\/powerpoint/i => PRESENTATION,
|
85
|
+
/\/rtf/i => PAGE_DESCRIPTION,
|
86
|
+
/\/sql/i => DATABASE,
|
87
|
+
/\/swf/ => VIDEO,
|
88
|
+
/\/vnd.ms-asf/i => VIDEO,
|
89
|
+
/\/vnd.ms-word/i => PAGE_DESCRIPTION,
|
90
|
+
/\/vnd.ms-wpl/i => PAGE_DESCRIPTION,
|
91
|
+
/\/vnd.oasis.opendocument.text/i => PAGE_DESCRIPTION,
|
92
|
+
/\/vnd.openxmlformats-officedocument.presentation/i => PRESENTATION,
|
93
|
+
/\/vnd.openxmlformats-officedocument.wordprocessingml/i => PAGE_DESCRIPTION,
|
94
|
+
/\/vnd.ms-powerpoint/i => PRESENTATION,
|
95
|
+
/\/vnd.sun.xml.calc/i => SPREADSHEET,
|
96
|
+
/\/vnd.sun.xml.impress/i => PRESENTATION,
|
97
|
+
/\/vnd.sun.xml.writer/i => PAGE_DESCRIPTION,
|
98
|
+
/\/xml/i => STRUCTURED_TEXT,
|
99
|
+
/\/x.mspublisher/i => PAGE_DESCRIPTION,
|
100
|
+
/\/x.shockwave-flash/ => VIDEO,
|
101
|
+
/\/x.spss/i => DATASET,
|
102
|
+
/\/zip/i => ARCHIVE,
|
103
|
+
/.+/ => UNKNOWN
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
|
108
|
+
return fallback_type unless file_type
|
109
|
+
if file_type&.last.is_a? Hash
|
110
|
+
file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern }
|
111
|
+
end
|
112
|
+
file_type.nil? ? fallback_type : file_type.last
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
data/lib/best_type/version.rb
CHANGED
metadata
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: best_type
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2018-03-25 00:00:00.000000000 Z
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- lib/best_type/config.rb
|
107
107
|
- lib/best_type/dc_type_lookup.rb
|
108
108
|
- lib/best_type/mime_type_lookup.rb
|
109
|
+
- lib/best_type/pcdm_type_lookup.rb
|
109
110
|
- lib/best_type/version.rb
|
110
111
|
- lib/tasks/best_type.rake
|
111
112
|
- lib/tasks/best_type/ci.rake
|
@@ -113,7 +114,7 @@ homepage: https://github.com/cul/best_type
|
|
113
114
|
licenses:
|
114
115
|
- MIT
|
115
116
|
metadata: {}
|
116
|
-
post_install_message:
|
117
|
+
post_install_message:
|
117
118
|
rdoc_options: []
|
118
119
|
require_paths:
|
119
120
|
- lib
|
@@ -128,9 +129,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
129
|
- !ruby/object:Gem::Version
|
129
130
|
version: '0'
|
130
131
|
requirements: []
|
131
|
-
|
132
|
-
|
133
|
-
signing_key:
|
132
|
+
rubygems_version: 3.0.8
|
133
|
+
signing_key:
|
134
134
|
specification_version: 4
|
135
135
|
summary: A library for selecting the best mime type or dc type for a file.
|
136
136
|
test_files: []
|