best_type 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 065e858091f8c79dc0a6b0ae0ef5a4afeeaa300007a866e0b2072fd1cffee991
4
- data.tar.gz: 348dd2f451a751baba7af76ad3f40c9dd394f1921fee3eb2196a65693cf10073
3
+ metadata.gz: 9e8b2cfe247c6e8a1a94dada5b9ffc4538b2154d5afcc002a0718007c7be5cd9
4
+ data.tar.gz: 313248015c4f2f2e364ed4da2197173038c183b3083ae8e26a0e82cd4066de1f
5
5
  SHA512:
6
- metadata.gz: a39c2b4c490befb5f4f2b554e39c2f26af4773dbfda28c9ccdc878ad9731c8a8cb5eb37cae301a13427de2d2235a7d608493b61c791ed57d29bf1136e605bb37
7
- data.tar.gz: f5989894ec08dbe385b866990fe4477da5028efbbc5533391681b1b165b05daa39087e11098344425ca04d260cb034c7fc3912e43447437c1d35226c6408447b
6
+ metadata.gz: aa0414efd2ee4510098cf769dd18b54759edeaa9d833a9f9ce2fed3b32a2921325a16d15cba5f4609ffb5b3334c39b9f7d56c94c427c42933c55f7be9db40d0f
7
+ data.tar.gz: ef907355f8e95552c84d55661b81f4ddb3d6bd4ec9b4901d01f7a98cc3eea27f59ae807c5d933fa3c2281d79b253d81d388e69af3295fb2eead756347c083eee
data/README.md CHANGED
@@ -28,6 +28,12 @@ BestType.dc_type.for_file_name('/path/to/some/file.jpg') # 'StillImage'
28
28
 
29
29
  # detect dc type for mime types
30
30
  BestType.dc_type.for_mime_type('image/jpeg') # 'StillImage'
31
+
32
+ # detect dc type for file names (including full file path)
33
+ BestType.pcdm_type.for_file_name('/path/to/some/file.jpg') # 'Image'
34
+
35
+ # detect dc type for mime types
36
+ BestType.pcdm_type.for_mime_type('image/jpeg') # 'Image'
31
37
  ```
32
38
 
33
39
  ### Add Custom Overrides
@@ -36,12 +42,14 @@ BestType.configure({
36
42
  extension_to_mime_type_overrides:
37
43
  'custom': 'custom/type'
38
44
  mime_type_to_dc_type_overrides:
39
- 'custom/type': 'Custom'
45
+ 'custom/type': 'CustomDC'
46
+ mime_type_to_pcdm_type_overrides:
47
+ 'custom/type': 'CustomPCDM'
40
48
  })
41
49
 
42
50
  BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
43
- BestType.dc_type.for_file_name('myfile.custom') # 'Custom'
44
- BestType.dc_type.for_mime_type('custom/type') # 'Custom'
51
+ BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
52
+ BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
45
53
 
46
54
  ```
47
55
 
@@ -63,6 +71,8 @@ BestType.configure({
63
71
  'custom': 'custom/type'
64
72
  mime_type_to_dc_type_overrides:
65
73
  'custom/type': 'Custom'
74
+ mime_type_to_pcdm_type_overrides:
75
+ 'custom/type': 'Custom'
66
76
  })
67
77
  ```
68
78
 
@@ -81,18 +91,24 @@ development:
81
91
  'good': 'good/type'
82
92
  mime_type_to_dc_type_overrides:
83
93
  'good/type': 'Good'
94
+ mime_type_to_pcdm_type_overrides:
95
+ 'good/type': 'Goodly'
84
96
 
85
97
  test:
86
98
  extension_to_mime_type_overrides:
87
99
  'better': 'better/type'
88
100
  mime_type_to_dc_type_overrides:
89
101
  'better/type': 'Better'
102
+ mime_type_to_pcdm_type_overrides:
103
+ 'best/type': 'Betterly'
90
104
 
91
105
  production:
92
106
  extension_to_mime_type_overrides:
93
107
  'best': 'best/type'
94
108
  mime_type_to_dc_type_overrides:
95
109
  'best/type': 'Best'
110
+ mime_type_to_pcdm_type_overrides:
111
+ 'best/type': 'Bestly'
96
112
  ```
97
113
 
98
114
  ### Running Tests (for developers):
@@ -6,3 +6,5 @@ mime_type_to_dc_type_overrides:
6
6
  'application/mxf': 'MovingImage'
7
7
  'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
8
8
  'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
9
+ mime_type_to_pcdm_type_overrides:
10
+ 'test/type': 'Test'
@@ -2,6 +2,7 @@ require 'best_type/version'
2
2
  require 'best_type/config'
3
3
  require 'best_type/mime_type_lookup'
4
4
  require 'best_type/dc_type_lookup'
5
+ require 'best_type/pcdm_type_lookup'
5
6
  require 'yaml'
6
7
 
7
8
  module BestType
@@ -15,6 +16,10 @@ module BestType
15
16
  @dc_type ||= BestType::DcTypeLookup.new(mime_type)
16
17
  end
17
18
 
19
+ def self.pcdm_type
20
+ @pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
21
+ end
22
+
18
23
  def self.config(reload = false, user_config_options = {})
19
24
  if @config.nil? || reload
20
25
  @semaphore.synchronize do
@@ -1,7 +1,7 @@
1
1
  module BestType
2
2
  class Config
3
3
 
4
- attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides
4
+ attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
5
5
 
6
6
  def initialize(user_config_options = {})
7
7
  # Get defaults from internal_custom_mapping.yml in gem
@@ -9,12 +9,14 @@ module BestType
9
9
  internal_config_file_path = File.join(gem_dir, 'config/internal_config_options.yml')
10
10
  internal_config_options = YAML.load_file(internal_config_file_path)
11
11
 
12
- @extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides']
13
- @mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides']
12
+ @extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides'] || {}
13
+ @mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides'] || {}
14
+ @mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
14
15
 
15
16
  stringify_user_config_options_keys!(user_config_options)
16
17
  add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides']) if user_config_options.key?('extension_to_mime_type_overrides')
17
18
  add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides']) if user_config_options.key?('mime_type_to_dc_type_overrides')
19
+ add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides']) if user_config_options.key?('mime_type_to_pcdm_type_overrides')
18
20
  end
19
21
 
20
22
  private
@@ -27,6 +29,10 @@ module BestType
27
29
  @mime_type_to_dc_type_overrides.merge!(overrides)
28
30
  end
29
31
 
32
+ def add_mime_type_to_pcdm_type_overrides(overrides)
33
+ @mime_type_to_pcdm_type_overrides.merge!(overrides)
34
+ end
35
+
30
36
  def stringify_user_config_options_keys!(user_config_options)
31
37
  user_config_options_keys = user_config_options.keys
32
38
  user_config_options_keys.each do |key|
@@ -3,13 +3,39 @@ module BestType
3
3
 
4
4
  attr_reader :config
5
5
 
6
- FALLBACK_DC_TYPE = 'Software'.freeze
6
+ COLLECTION = 'Collection'.freeze
7
+ DATASET = 'Dataset'.freeze
8
+ EVENT = 'Event'.freeze
9
+ INTERACTIVE_RESOURCE = 'InteractiveResource'.freeze
10
+ MOVING_IMAGE = 'MovingImage'.freeze
11
+ PHYSICAL_OBJECT = 'PhysicalObject'.freeze
12
+ SERVICE = 'Service'.freeze
13
+ SOFTWARE = 'Software'.freeze
14
+ SOUND = 'Sound'.freeze
15
+ STILL_IMAGE = 'StillImage'.freeze
16
+ TEXT = 'Text'.freeze
17
+
18
+ # these include values that will not be derived from MIME/content types
19
+ VALID_TYPES = [
20
+ COLLECTION, EVENT, INTERACTIVE_RESOURCE, MOVING_IMAGE, PHYSICAL_OBJECT,
21
+ SERVICE, SOFTWARE, SOUND, STILL_IMAGE, TEXT
22
+ ].freeze
23
+
24
+ FALLBACK_DC_TYPE = SOFTWARE
7
25
 
8
26
  def initialize(mime_type_lookup_instance)
9
27
  @mime_type_lookup = mime_type_lookup_instance
10
28
  @config = @mime_type_lookup.config
11
29
  end
12
30
 
31
+ def fallback_type
32
+ FALLBACK_DC_TYPE
33
+ end
34
+
35
+ def valid_type?(value)
36
+ VALID_TYPES.include? value
37
+ end
38
+
13
39
  def for_file_name(file_name_or_path)
14
40
  for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
15
41
  end
@@ -20,13 +46,13 @@ module BestType
20
46
  return dc_type unless dc_type.nil?
21
47
 
22
48
  mimes_to_dc = {
23
- /^image/ => 'StillImage',
24
- /^video/ => 'MovingImage',
25
- /^audio/ => 'Sound',
26
- /^text/ => 'Text',
27
- /^application\/(pdf|msword)/ => 'Text',
28
- /excel|spreadsheet|xls|application\/sql/ => 'Dataset',
29
- /^application/ => 'Software'
49
+ /^image/ => STILL_IMAGE,
50
+ /^video/ => MOVING_IMAGE,
51
+ /^audio/ => SOUND,
52
+ /^text/ => TEXT,
53
+ /^application\/(pdf|msword)/ => TEXT,
54
+ /excel|spreadsheet|xls|application\/sql/ => DATASET,
55
+ /^application/ => SOFTWARE
30
56
  }
31
57
 
32
58
  dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
@@ -0,0 +1,113 @@
1
+ module BestType
2
+ class PcdmTypeLookup
3
+
4
+ attr_reader :config
5
+
6
+ # https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
7
+ ARCHIVE = "Archive".freeze
8
+ AUDIO = "Audio".freeze
9
+ DATABASE = "Database".freeze
10
+ DATASET = "Dataset".freeze
11
+ EMAIL = "Email".freeze
12
+ FONT = "Font".freeze
13
+ HTML = "HTML".freeze
14
+ IMAGE = "Image".freeze
15
+ PAGE_DESCRIPTION = "PageDescription".freeze
16
+ PRESENTATION = "Presentation".freeze
17
+ SOFTWARE = "Software".freeze
18
+ SOURCE_CODE = "SourceCode".freeze
19
+ SPREADSHEET = "Spreadsheet".freeze
20
+ STRUCTURED_TEXT = "StructuredText".freeze
21
+ TEXT = "Text".freeze
22
+ UNKNOWN = "Unknown".freeze
23
+ UNSTRUCTURED_TEXT = "UnstructuredText".freeze
24
+ VIDEO = "Video".freeze
25
+ WEBSITE = "Website".freeze
26
+
27
+ # these include values that will not be derived from MIME/content types
28
+ VALID_TYPES = [
29
+ ARCHIVE, AUDIO, DATABASE, DATASET, EMAIL, FONT, HTML, IMAGE, PAGE_DESCRIPTION, PRESENTATION, SOFTWARE,
30
+ SOURCE_CODE, SPREADSHEET, STRUCTURED_TEXT, TEXT, UNKNOWN, UNSTRUCTURED_TEXT, VIDEO, WEBSITE
31
+ ].freeze
32
+
33
+ FALLBACK_TYPE = UNKNOWN
34
+
35
+ def initialize(mime_type_lookup_instance)
36
+ @mime_type_lookup = mime_type_lookup_instance
37
+ @config = @mime_type_lookup.config
38
+ end
39
+
40
+ def fallback_type
41
+ FALLBACK_TYPE
42
+ end
43
+
44
+ def valid_type?(value)
45
+ VALID_TYPES.include? value
46
+ end
47
+
48
+ def for_file_name(file_name_or_path)
49
+ for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
50
+ end
51
+
52
+ def for_mime_type(mime_type)
53
+ # Check config overrides first
54
+ file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
55
+ return file_type unless file_type.nil?
56
+
57
+ mimes_to_type = {
58
+ /^image/i => IMAGE,
59
+ /^video/i => VIDEO,
60
+ /^audio/i => AUDIO,
61
+ /^text/i => {
62
+ /\/css/i => SOURCE_CODE,
63
+ /\/html/i => HTML,
64
+ /.+/ => TEXT
65
+ },
66
+ /excel|spreadsheet|xls/i => SPREADSHEET,
67
+ /application\/sql/i => DATABASE,
68
+ /csv/i => DATASET,
69
+ /octet.stream/i => UNKNOWN,
70
+ /^application/i => {
71
+ /\/access/i => DATABASE,
72
+ /\/css/i => SOURCE_CODE,
73
+ /\/html/i => HTML,
74
+ /\/mbox/i => EMAIL,
75
+ /\/mp4/i => VIDEO,
76
+ /\/mp4a/i => AUDIO,
77
+ /\/msaccess/i => DATABASE,
78
+ /\/mxf/i => VIDEO,
79
+ /\/(pdf|msword)/i => PAGE_DESCRIPTION,
80
+ /\/postscript/i => PAGE_DESCRIPTION,
81
+ /\/powerpoint/i => PRESENTATION,
82
+ /\/rtf/i => PAGE_DESCRIPTION,
83
+ /\/sql/i => DATABASE,
84
+ /\/swf/ => VIDEO,
85
+ /\/vnd.ms-asf/i => VIDEO,
86
+ /\/vnd.ms-word/i => PAGE_DESCRIPTION,
87
+ /\/vnd.ms-wpl/i => PAGE_DESCRIPTION,
88
+ /\/vnd.oasis.opendocument.text/i => PAGE_DESCRIPTION,
89
+ /\/vnd.openxmlformats-officedocument.presentation/i => PRESENTATION,
90
+ /\/vnd.openxmlformats-officedocument.wordprocessingml/i => PAGE_DESCRIPTION,
91
+ /\/vnd.ms-powerpoint/i => PRESENTATION,
92
+ /\/vnd.sun.xml.calc/i => SPREADSHEET,
93
+ /\/vnd.sun.xml.impress/i => PRESENTATION,
94
+ /\/vnd.sun.xml.writer/i => PAGE_DESCRIPTION,
95
+ /\/xml/i => STRUCTURED_TEXT,
96
+ /\/x.mspublisher/i => PAGE_DESCRIPTION,
97
+ /\/x.shockwave-flash/ => VIDEO,
98
+ /\/x.spss/i => DATASET,
99
+ /\/zip/i => ARCHIVE,
100
+ /.+/ => UNKNOWN
101
+ }
102
+ }
103
+
104
+ file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
105
+ return fallback_type unless file_type
106
+ if file_type&.last.is_a? Hash
107
+ file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern }
108
+ end
109
+ file_type.nil? ? fallback_type : file_type.last
110
+ end
111
+
112
+ end
113
+ end
@@ -1,6 +1,6 @@
1
1
  module BestType
2
2
 
3
- VERSION = '0.0.4'.freeze
3
+ VERSION = '0.0.5'.freeze
4
4
 
5
5
  def self.version
6
6
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: best_type
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric O'Hanlon
@@ -106,6 +106,7 @@ files:
106
106
  - lib/best_type/config.rb
107
107
  - lib/best_type/dc_type_lookup.rb
108
108
  - lib/best_type/mime_type_lookup.rb
109
+ - lib/best_type/pcdm_type_lookup.rb
109
110
  - lib/best_type/version.rb
110
111
  - lib/tasks/best_type.rake
111
112
  - lib/tasks/best_type/ci.rake
@@ -128,8 +129,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
129
  - !ruby/object:Gem::Version
129
130
  version: '0'
130
131
  requirements: []
131
- rubyforge_project:
132
- rubygems_version: 2.7.7
132
+ rubygems_version: 3.0.6
133
133
  signing_key:
134
134
  specification_version: 4
135
135
  summary: A library for selecting the best mime type or dc type for a file.