best_type 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +19 -3
- data/config/internal_config_options.yml +2 -0
- data/lib/best_type.rb +5 -0
- data/lib/best_type/config.rb +9 -3
- data/lib/best_type/dc_type_lookup.rb +34 -8
- data/lib/best_type/pcdm_type_lookup.rb +113 -0
- data/lib/best_type/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e8b2cfe247c6e8a1a94dada5b9ffc4538b2154d5afcc002a0718007c7be5cd9
|
4
|
+
data.tar.gz: 313248015c4f2f2e364ed4da2197173038c183b3083ae8e26a0e82cd4066de1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa0414efd2ee4510098cf769dd18b54759edeaa9d833a9f9ce2fed3b32a2921325a16d15cba5f4609ffb5b3334c39b9f7d56c94c427c42933c55f7be9db40d0f
|
7
|
+
data.tar.gz: ef907355f8e95552c84d55661b81f4ddb3d6bd4ec9b4901d01f7a98cc3eea27f59ae807c5d933fa3c2281d79b253d81d388e69af3295fb2eead756347c083eee
|
data/README.md
CHANGED
@@ -28,6 +28,12 @@ BestType.dc_type.for_file_name('/path/to/some/file.jpg') # 'StillImage'
|
|
28
28
|
|
29
29
|
# detect dc type for mime types
|
30
30
|
BestType.dc_type.for_mime_type('image/jpeg') # 'StillImage'
|
31
|
+
|
32
|
+
# detect dc type for file names (including full file path)
|
33
|
+
BestType.pcdm_type.for_file_name('/path/to/some/file.jpg') # 'Image'
|
34
|
+
|
35
|
+
# detect dc type for mime types
|
36
|
+
BestType.pcdm_type.for_mime_type('image/jpeg') # 'Image'
|
31
37
|
```
|
32
38
|
|
33
39
|
### Add Custom Overrides
|
@@ -36,12 +42,14 @@ BestType.configure({
|
|
36
42
|
extension_to_mime_type_overrides:
|
37
43
|
'custom': 'custom/type'
|
38
44
|
mime_type_to_dc_type_overrides:
|
39
|
-
'custom/type': '
|
45
|
+
'custom/type': 'CustomDC'
|
46
|
+
mime_type_to_pcdm_type_overrides:
|
47
|
+
'custom/type': 'CustomPCDM'
|
40
48
|
})
|
41
49
|
|
42
50
|
BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
|
43
|
-
BestType.dc_type.for_file_name('myfile.custom') # '
|
44
|
-
BestType.
|
51
|
+
BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
|
52
|
+
BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
|
45
53
|
|
46
54
|
```
|
47
55
|
|
@@ -63,6 +71,8 @@ BestType.configure({
|
|
63
71
|
'custom': 'custom/type'
|
64
72
|
mime_type_to_dc_type_overrides:
|
65
73
|
'custom/type': 'Custom'
|
74
|
+
mime_type_to_pcdm_type_overrides:
|
75
|
+
'custom/type': 'Custom'
|
66
76
|
})
|
67
77
|
```
|
68
78
|
|
@@ -81,18 +91,24 @@ development:
|
|
81
91
|
'good': 'good/type'
|
82
92
|
mime_type_to_dc_type_overrides:
|
83
93
|
'good/type': 'Good'
|
94
|
+
mime_type_to_pcdm_type_overrides:
|
95
|
+
'good/type': 'Goodly'
|
84
96
|
|
85
97
|
test:
|
86
98
|
extension_to_mime_type_overrides:
|
87
99
|
'better': 'better/type'
|
88
100
|
mime_type_to_dc_type_overrides:
|
89
101
|
'better/type': 'Better'
|
102
|
+
mime_type_to_pcdm_type_overrides:
|
103
|
+
'best/type': 'Betterly'
|
90
104
|
|
91
105
|
production:
|
92
106
|
extension_to_mime_type_overrides:
|
93
107
|
'best': 'best/type'
|
94
108
|
mime_type_to_dc_type_overrides:
|
95
109
|
'best/type': 'Best'
|
110
|
+
mime_type_to_pcdm_type_overrides:
|
111
|
+
'best/type': 'Bestly'
|
96
112
|
```
|
97
113
|
|
98
114
|
### Running Tests (for developers):
|
@@ -6,3 +6,5 @@ mime_type_to_dc_type_overrides:
|
|
6
6
|
'application/mxf': 'MovingImage'
|
7
7
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
|
8
8
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
|
9
|
+
mime_type_to_pcdm_type_overrides:
|
10
|
+
'test/type': 'Test'
|
data/lib/best_type.rb
CHANGED
@@ -2,6 +2,7 @@ require 'best_type/version'
|
|
2
2
|
require 'best_type/config'
|
3
3
|
require 'best_type/mime_type_lookup'
|
4
4
|
require 'best_type/dc_type_lookup'
|
5
|
+
require 'best_type/pcdm_type_lookup'
|
5
6
|
require 'yaml'
|
6
7
|
|
7
8
|
module BestType
|
@@ -15,6 +16,10 @@ module BestType
|
|
15
16
|
@dc_type ||= BestType::DcTypeLookup.new(mime_type)
|
16
17
|
end
|
17
18
|
|
19
|
+
def self.pcdm_type
|
20
|
+
@pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
|
21
|
+
end
|
22
|
+
|
18
23
|
def self.config(reload = false, user_config_options = {})
|
19
24
|
if @config.nil? || reload
|
20
25
|
@semaphore.synchronize do
|
data/lib/best_type/config.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module BestType
|
2
2
|
class Config
|
3
3
|
|
4
|
-
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides
|
4
|
+
attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
|
5
5
|
|
6
6
|
def initialize(user_config_options = {})
|
7
7
|
# Get defaults from internal_custom_mapping.yml in gem
|
@@ -9,12 +9,14 @@ module BestType
|
|
9
9
|
internal_config_file_path = File.join(gem_dir, 'config/internal_config_options.yml')
|
10
10
|
internal_config_options = YAML.load_file(internal_config_file_path)
|
11
11
|
|
12
|
-
@extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides']
|
13
|
-
@mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides']
|
12
|
+
@extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides'] || {}
|
13
|
+
@mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides'] || {}
|
14
|
+
@mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
|
14
15
|
|
15
16
|
stringify_user_config_options_keys!(user_config_options)
|
16
17
|
add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides']) if user_config_options.key?('extension_to_mime_type_overrides')
|
17
18
|
add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides']) if user_config_options.key?('mime_type_to_dc_type_overrides')
|
19
|
+
add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides']) if user_config_options.key?('mime_type_to_pcdm_type_overrides')
|
18
20
|
end
|
19
21
|
|
20
22
|
private
|
@@ -27,6 +29,10 @@ module BestType
|
|
27
29
|
@mime_type_to_dc_type_overrides.merge!(overrides)
|
28
30
|
end
|
29
31
|
|
32
|
+
def add_mime_type_to_pcdm_type_overrides(overrides)
|
33
|
+
@mime_type_to_pcdm_type_overrides.merge!(overrides)
|
34
|
+
end
|
35
|
+
|
30
36
|
def stringify_user_config_options_keys!(user_config_options)
|
31
37
|
user_config_options_keys = user_config_options.keys
|
32
38
|
user_config_options_keys.each do |key|
|
@@ -3,13 +3,39 @@ module BestType
|
|
3
3
|
|
4
4
|
attr_reader :config
|
5
5
|
|
6
|
-
|
6
|
+
COLLECTION = 'Collection'.freeze
|
7
|
+
DATASET = 'Dataset'.freeze
|
8
|
+
EVENT = 'Event'.freeze
|
9
|
+
INTERACTIVE_RESOURCE = 'InteractiveResource'.freeze
|
10
|
+
MOVING_IMAGE = 'MovingImage'.freeze
|
11
|
+
PHYSICAL_OBJECT = 'PhysicalObject'.freeze
|
12
|
+
SERVICE = 'Service'.freeze
|
13
|
+
SOFTWARE = 'Software'.freeze
|
14
|
+
SOUND = 'Sound'.freeze
|
15
|
+
STILL_IMAGE = 'StillImage'.freeze
|
16
|
+
TEXT = 'Text'.freeze
|
17
|
+
|
18
|
+
# these include values that will not be derived from MIME/content types
|
19
|
+
VALID_TYPES = [
|
20
|
+
COLLECTION, EVENT, INTERACTIVE_RESOURCE, MOVING_IMAGE, PHYSICAL_OBJECT,
|
21
|
+
SERVICE, SOFTWARE, SOUND, STILL_IMAGE, TEXT
|
22
|
+
].freeze
|
23
|
+
|
24
|
+
FALLBACK_DC_TYPE = SOFTWARE
|
7
25
|
|
8
26
|
def initialize(mime_type_lookup_instance)
|
9
27
|
@mime_type_lookup = mime_type_lookup_instance
|
10
28
|
@config = @mime_type_lookup.config
|
11
29
|
end
|
12
30
|
|
31
|
+
def fallback_type
|
32
|
+
FALLBACK_DC_TYPE
|
33
|
+
end
|
34
|
+
|
35
|
+
def valid_type?(value)
|
36
|
+
VALID_TYPES.include? value
|
37
|
+
end
|
38
|
+
|
13
39
|
def for_file_name(file_name_or_path)
|
14
40
|
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
15
41
|
end
|
@@ -20,13 +46,13 @@ module BestType
|
|
20
46
|
return dc_type unless dc_type.nil?
|
21
47
|
|
22
48
|
mimes_to_dc = {
|
23
|
-
/^image/ =>
|
24
|
-
/^video/ =>
|
25
|
-
/^audio/ =>
|
26
|
-
/^text/ =>
|
27
|
-
/^application\/(pdf|msword)/ =>
|
28
|
-
/excel|spreadsheet|xls|application\/sql/ =>
|
29
|
-
/^application/ =>
|
49
|
+
/^image/ => STILL_IMAGE,
|
50
|
+
/^video/ => MOVING_IMAGE,
|
51
|
+
/^audio/ => SOUND,
|
52
|
+
/^text/ => TEXT,
|
53
|
+
/^application\/(pdf|msword)/ => TEXT,
|
54
|
+
/excel|spreadsheet|xls|application\/sql/ => DATASET,
|
55
|
+
/^application/ => SOFTWARE
|
30
56
|
}
|
31
57
|
|
32
58
|
dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module BestType
|
2
|
+
class PcdmTypeLookup
|
3
|
+
|
4
|
+
attr_reader :config
|
5
|
+
|
6
|
+
# https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
|
7
|
+
ARCHIVE = "Archive".freeze
|
8
|
+
AUDIO = "Audio".freeze
|
9
|
+
DATABASE = "Database".freeze
|
10
|
+
DATASET = "Dataset".freeze
|
11
|
+
EMAIL = "Email".freeze
|
12
|
+
FONT = "Font".freeze
|
13
|
+
HTML = "HTML".freeze
|
14
|
+
IMAGE = "Image".freeze
|
15
|
+
PAGE_DESCRIPTION = "PageDescription".freeze
|
16
|
+
PRESENTATION = "Presentation".freeze
|
17
|
+
SOFTWARE = "Software".freeze
|
18
|
+
SOURCE_CODE = "SourceCode".freeze
|
19
|
+
SPREADSHEET = "Spreadsheet".freeze
|
20
|
+
STRUCTURED_TEXT = "StructuredText".freeze
|
21
|
+
TEXT = "Text".freeze
|
22
|
+
UNKNOWN = "Unknown".freeze
|
23
|
+
UNSTRUCTURED_TEXT = "UnstructuredText".freeze
|
24
|
+
VIDEO = "Video".freeze
|
25
|
+
WEBSITE = "Website".freeze
|
26
|
+
|
27
|
+
# these include values that will not be derived from MIME/content types
|
28
|
+
VALID_TYPES = [
|
29
|
+
ARCHIVE, AUDIO, DATABASE, DATASET, EMAIL, FONT, HTML, IMAGE, PAGE_DESCRIPTION, PRESENTATION, SOFTWARE,
|
30
|
+
SOURCE_CODE, SPREADSHEET, STRUCTURED_TEXT, TEXT, UNKNOWN, UNSTRUCTURED_TEXT, VIDEO, WEBSITE
|
31
|
+
].freeze
|
32
|
+
|
33
|
+
FALLBACK_TYPE = UNKNOWN
|
34
|
+
|
35
|
+
def initialize(mime_type_lookup_instance)
|
36
|
+
@mime_type_lookup = mime_type_lookup_instance
|
37
|
+
@config = @mime_type_lookup.config
|
38
|
+
end
|
39
|
+
|
40
|
+
def fallback_type
|
41
|
+
FALLBACK_TYPE
|
42
|
+
end
|
43
|
+
|
44
|
+
def valid_type?(value)
|
45
|
+
VALID_TYPES.include? value
|
46
|
+
end
|
47
|
+
|
48
|
+
def for_file_name(file_name_or_path)
|
49
|
+
for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
|
50
|
+
end
|
51
|
+
|
52
|
+
def for_mime_type(mime_type)
|
53
|
+
# Check config overrides first
|
54
|
+
file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
|
55
|
+
return file_type unless file_type.nil?
|
56
|
+
|
57
|
+
mimes_to_type = {
|
58
|
+
/^image/i => IMAGE,
|
59
|
+
/^video/i => VIDEO,
|
60
|
+
/^audio/i => AUDIO,
|
61
|
+
/^text/i => {
|
62
|
+
/\/css/i => SOURCE_CODE,
|
63
|
+
/\/html/i => HTML,
|
64
|
+
/.+/ => TEXT
|
65
|
+
},
|
66
|
+
/excel|spreadsheet|xls/i => SPREADSHEET,
|
67
|
+
/application\/sql/i => DATABASE,
|
68
|
+
/csv/i => DATASET,
|
69
|
+
/octet.stream/i => UNKNOWN,
|
70
|
+
/^application/i => {
|
71
|
+
/\/access/i => DATABASE,
|
72
|
+
/\/css/i => SOURCE_CODE,
|
73
|
+
/\/html/i => HTML,
|
74
|
+
/\/mbox/i => EMAIL,
|
75
|
+
/\/mp4/i => VIDEO,
|
76
|
+
/\/mp4a/i => AUDIO,
|
77
|
+
/\/msaccess/i => DATABASE,
|
78
|
+
/\/mxf/i => VIDEO,
|
79
|
+
/\/(pdf|msword)/i => PAGE_DESCRIPTION,
|
80
|
+
/\/postscript/i => PAGE_DESCRIPTION,
|
81
|
+
/\/powerpoint/i => PRESENTATION,
|
82
|
+
/\/rtf/i => PAGE_DESCRIPTION,
|
83
|
+
/\/sql/i => DATABASE,
|
84
|
+
/\/swf/ => VIDEO,
|
85
|
+
/\/vnd.ms-asf/i => VIDEO,
|
86
|
+
/\/vnd.ms-word/i => PAGE_DESCRIPTION,
|
87
|
+
/\/vnd.ms-wpl/i => PAGE_DESCRIPTION,
|
88
|
+
/\/vnd.oasis.opendocument.text/i => PAGE_DESCRIPTION,
|
89
|
+
/\/vnd.openxmlformats-officedocument.presentation/i => PRESENTATION,
|
90
|
+
/\/vnd.openxmlformats-officedocument.wordprocessingml/i => PAGE_DESCRIPTION,
|
91
|
+
/\/vnd.ms-powerpoint/i => PRESENTATION,
|
92
|
+
/\/vnd.sun.xml.calc/i => SPREADSHEET,
|
93
|
+
/\/vnd.sun.xml.impress/i => PRESENTATION,
|
94
|
+
/\/vnd.sun.xml.writer/i => PAGE_DESCRIPTION,
|
95
|
+
/\/xml/i => STRUCTURED_TEXT,
|
96
|
+
/\/x.mspublisher/i => PAGE_DESCRIPTION,
|
97
|
+
/\/x.shockwave-flash/ => VIDEO,
|
98
|
+
/\/x.spss/i => DATASET,
|
99
|
+
/\/zip/i => ARCHIVE,
|
100
|
+
/.+/ => UNKNOWN
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
|
105
|
+
return fallback_type unless file_type
|
106
|
+
if file_type&.last.is_a? Hash
|
107
|
+
file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern }
|
108
|
+
end
|
109
|
+
file_type.nil? ? fallback_type : file_type.last
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
data/lib/best_type/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: best_type
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- lib/best_type/config.rb
|
107
107
|
- lib/best_type/dc_type_lookup.rb
|
108
108
|
- lib/best_type/mime_type_lookup.rb
|
109
|
+
- lib/best_type/pcdm_type_lookup.rb
|
109
110
|
- lib/best_type/version.rb
|
110
111
|
- lib/tasks/best_type.rake
|
111
112
|
- lib/tasks/best_type/ci.rake
|
@@ -128,8 +129,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
129
|
- !ruby/object:Gem::Version
|
129
130
|
version: '0'
|
130
131
|
requirements: []
|
131
|
-
|
132
|
-
rubygems_version: 2.7.7
|
132
|
+
rubygems_version: 3.0.6
|
133
133
|
signing_key:
|
134
134
|
specification_version: 4
|
135
135
|
summary: A library for selecting the best mime type or dc type for a file.
|