best_type 0.0.3 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 9b2371da6f225b1bd22acd81086ea29b9e219f02
4
- data.tar.gz: 27b8760b8632f7716eece5a0889327ae57c3da9a
2
+ SHA256:
3
+ metadata.gz: 29fe983eac1114ba38d161af60db4bc5baaed14ffc7f9fa226ec0ce9f6b9658b
4
+ data.tar.gz: 2d8c71566c13afaf5f48253ad659a4aad8c4f345403e0067ad6bc08b793f6451
5
5
  SHA512:
6
- metadata.gz: 7d8a536b778ade2b05994d6e958975a5672ffd6953946e8f677faa801d014c1ab4c13399a3b4678d2fd9c8c3477716b14867f2dd5620fc3e5d14e5cf8650da6e
7
- data.tar.gz: a6f9920f32d467b5f9f18e5598da3363cbc85e62981639ec99e64ea98b7068146f7729207c588d35a2a419250f9834dce1cd11b1b16ca2711b35580df06f14bd
6
+ metadata.gz: 905b6b2f3bf244668bc1962eea0561cd267e7aaee333c2c4817f56ca917b7181c0d55cddfbaa952032ed839aa75422d1c3a75ee0b5b17fecf7ea826a8a4f8c28
7
+ data.tar.gz: a96af2728b2d1c5c15fdbb4a3a065be91d11352f81c8ab1a6ee38a517e737bc6c97625538a17a985015b58ee21332c13fded4160964f42c0189ea44bf155d7cf
data/README.md CHANGED
@@ -28,6 +28,12 @@ BestType.dc_type.for_file_name('/path/to/some/file.jpg') # 'StillImage'
28
28
 
29
29
  # detect dc type for mime types
30
30
  BestType.dc_type.for_mime_type('image/jpeg') # 'StillImage'
31
+
32
+ # detect dc type for file names (including full file path)
33
+ BestType.pcdm_type.for_file_name('/path/to/some/file.jpg') # 'Image'
34
+
35
+ # detect dc type for mime types
36
+ BestType.pcdm_type.for_mime_type('image/jpeg') # 'Image'
31
37
  ```
32
38
 
33
39
  ### Add Custom Overrides
@@ -36,12 +42,14 @@ BestType.configure({
36
42
  extension_to_mime_type_overrides:
37
43
  'custom': 'custom/type'
38
44
  mime_type_to_dc_type_overrides:
39
- 'custom/type': 'Custom'
45
+ 'custom/type': 'CustomDC'
46
+ mime_type_to_pcdm_type_overrides:
47
+ 'custom/type': 'CustomPCDM'
40
48
  })
41
49
 
42
50
  BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
43
- BestType.dc_type.for_file_name('myfile.custom') # 'Custom'
44
- BestType.dc_type.for_mime_type('custom/type') # 'Custom'
51
+ BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
52
+ BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
45
53
 
46
54
  ```
47
55
 
@@ -63,6 +71,8 @@ BestType.configure({
63
71
  'custom': 'custom/type'
64
72
  mime_type_to_dc_type_overrides:
65
73
  'custom/type': 'Custom'
74
+ mime_type_to_pcdm_type_overrides:
75
+ 'custom/type': 'Custom'
66
76
  })
67
77
  ```
68
78
 
@@ -81,18 +91,24 @@ development:
81
91
  'good': 'good/type'
82
92
  mime_type_to_dc_type_overrides:
83
93
  'good/type': 'Good'
94
+ mime_type_to_pcdm_type_overrides:
95
+ 'good/type': 'Goodly'
84
96
 
85
97
  test:
86
98
  extension_to_mime_type_overrides:
87
99
  'better': 'better/type'
88
100
  mime_type_to_dc_type_overrides:
89
101
  'better/type': 'Better'
102
+ mime_type_to_pcdm_type_overrides:
103
+ 'best/type': 'Betterly'
90
104
 
91
105
  production:
92
106
  extension_to_mime_type_overrides:
93
107
  'best': 'best/type'
94
108
  mime_type_to_dc_type_overrides:
95
109
  'best/type': 'Best'
110
+ mime_type_to_pcdm_type_overrides:
111
+ 'best/type': 'Bestly'
96
112
  ```
97
113
 
98
114
  ### Running Tests (for developers):
@@ -1,6 +1,12 @@
1
1
  extension_to_mime_type_overrides:
2
2
  'test': 'test/type'
3
3
  'mp4': 'video/mp4'
4
+ 'vtt': 'text/vtt'
5
+ 'm4v': 'video/x-m4v'
4
6
  mime_type_to_dc_type_overrides:
5
7
  'test/type': 'Test'
6
8
  'application/mxf': 'MovingImage'
9
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
10
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
11
+ mime_type_to_pcdm_type_overrides:
12
+ 'test/type': 'Test'
data/lib/best_type.rb CHANGED
@@ -2,6 +2,7 @@ require 'best_type/version'
2
2
  require 'best_type/config'
3
3
  require 'best_type/mime_type_lookup'
4
4
  require 'best_type/dc_type_lookup'
5
+ require 'best_type/pcdm_type_lookup'
5
6
  require 'yaml'
6
7
 
7
8
  module BestType
@@ -15,6 +16,10 @@ module BestType
15
16
  @dc_type ||= BestType::DcTypeLookup.new(mime_type)
16
17
  end
17
18
 
19
+ def self.pcdm_type
20
+ @pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
21
+ end
22
+
18
23
  def self.config(reload = false, user_config_options = {})
19
24
  if @config.nil? || reload
20
25
  @semaphore.synchronize do
@@ -1,7 +1,7 @@
1
1
  module BestType
2
2
  class Config
3
3
 
4
- attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides
4
+ attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
5
5
 
6
6
  def initialize(user_config_options = {})
7
7
  # Get defaults from internal_custom_mapping.yml in gem
@@ -9,12 +9,14 @@ module BestType
9
9
  internal_config_file_path = File.join(gem_dir, 'config/internal_config_options.yml')
10
10
  internal_config_options = YAML.load_file(internal_config_file_path)
11
11
 
12
- @extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides']
13
- @mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides']
12
+ @extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides'] || {}
13
+ @mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides'] || {}
14
+ @mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
14
15
 
15
16
  stringify_user_config_options_keys!(user_config_options)
16
17
  add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides']) if user_config_options.key?('extension_to_mime_type_overrides')
17
18
  add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides']) if user_config_options.key?('mime_type_to_dc_type_overrides')
19
+ add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides']) if user_config_options.key?('mime_type_to_pcdm_type_overrides')
18
20
  end
19
21
 
20
22
  private
@@ -27,6 +29,10 @@ module BestType
27
29
  @mime_type_to_dc_type_overrides.merge!(overrides)
28
30
  end
29
31
 
32
+ def add_mime_type_to_pcdm_type_overrides(overrides)
33
+ @mime_type_to_pcdm_type_overrides.merge!(overrides)
34
+ end
35
+
30
36
  def stringify_user_config_options_keys!(user_config_options)
31
37
  user_config_options_keys = user_config_options.keys
32
38
  user_config_options_keys.each do |key|
@@ -3,13 +3,39 @@ module BestType
3
3
 
4
4
  attr_reader :config
5
5
 
6
- FALLBACK_DC_TYPE = 'Software'.freeze
6
+ COLLECTION = 'Collection'.freeze
7
+ DATASET = 'Dataset'.freeze
8
+ EVENT = 'Event'.freeze
9
+ INTERACTIVE_RESOURCE = 'InteractiveResource'.freeze
10
+ MOVING_IMAGE = 'MovingImage'.freeze
11
+ PHYSICAL_OBJECT = 'PhysicalObject'.freeze
12
+ SERVICE = 'Service'.freeze
13
+ SOFTWARE = 'Software'.freeze
14
+ SOUND = 'Sound'.freeze
15
+ STILL_IMAGE = 'StillImage'.freeze
16
+ TEXT = 'Text'.freeze
17
+
18
+ # these include values that will not be derived from MIME/content types
19
+ VALID_TYPES = [
20
+ COLLECTION, EVENT, INTERACTIVE_RESOURCE, MOVING_IMAGE, PHYSICAL_OBJECT,
21
+ SERVICE, SOFTWARE, SOUND, STILL_IMAGE, TEXT
22
+ ].freeze
23
+
24
+ FALLBACK_DC_TYPE = SOFTWARE
7
25
 
8
26
  def initialize(mime_type_lookup_instance)
9
27
  @mime_type_lookup = mime_type_lookup_instance
10
28
  @config = @mime_type_lookup.config
11
29
  end
12
30
 
31
+ def fallback_type
32
+ FALLBACK_DC_TYPE
33
+ end
34
+
35
+ def valid_type?(value)
36
+ VALID_TYPES.include? value
37
+ end
38
+
13
39
  def for_file_name(file_name_or_path)
14
40
  for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
15
41
  end
@@ -20,13 +46,13 @@ module BestType
20
46
  return dc_type unless dc_type.nil?
21
47
 
22
48
  mimes_to_dc = {
23
- /^image/ => 'StillImage',
24
- /^video/ => 'MovingImage',
25
- /^audio/ => 'Sound',
26
- /^text/ => 'Text',
27
- /^application\/(pdf|msword)/ => 'Text',
28
- /excel|spreadsheet|xls|application\/sql/ => 'Dataset',
29
- /^application/ => 'Software'
49
+ /^image/ => STILL_IMAGE,
50
+ /^video/ => MOVING_IMAGE,
51
+ /^audio/ => SOUND,
52
+ /^text/ => TEXT,
53
+ /^application\/(pdf|msword)/ => TEXT,
54
+ /excel|spreadsheet|xls|application\/sql/ => DATASET,
55
+ /^application/ => SOFTWARE
30
56
  }
31
57
 
32
58
  dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
@@ -0,0 +1,116 @@
1
+ module BestType
2
+ class PcdmTypeLookup
3
+
4
+ attr_reader :config
5
+
6
+ # https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
7
+ ARCHIVE = "Archive".freeze
8
+ AUDIO = "Audio".freeze
9
+ DATABASE = "Database".freeze
10
+ DATASET = "Dataset".freeze
11
+ EMAIL = "Email".freeze
12
+ FONT = "Font".freeze
13
+ HTML = "HTML".freeze
14
+ IMAGE = "Image".freeze
15
+ PAGE_DESCRIPTION = "PageDescription".freeze
16
+ PRESENTATION = "Presentation".freeze
17
+ SOFTWARE = "Software".freeze
18
+ SOURCE_CODE = "SourceCode".freeze
19
+ SPREADSHEET = "Spreadsheet".freeze
20
+ STRUCTURED_TEXT = "StructuredText".freeze
21
+ TEXT = "Text".freeze
22
+ UNKNOWN = "Unknown".freeze
23
+ UNSTRUCTURED_TEXT = "UnstructuredText".freeze
24
+ VIDEO = "Video".freeze
25
+ WEBSITE = "Website".freeze
26
+
27
+ # these include values that will not be derived from MIME/content types
28
+ VALID_TYPES = [
29
+ ARCHIVE, AUDIO, DATABASE, DATASET, EMAIL, FONT, HTML, IMAGE, PAGE_DESCRIPTION, PRESENTATION, SOFTWARE,
30
+ SOURCE_CODE, SPREADSHEET, STRUCTURED_TEXT, TEXT, UNKNOWN, UNSTRUCTURED_TEXT, VIDEO, WEBSITE
31
+ ].freeze
32
+
33
+ FALLBACK_TYPE = UNKNOWN
34
+
35
+ def initialize(mime_type_lookup_instance)
36
+ @mime_type_lookup = mime_type_lookup_instance
37
+ @config = @mime_type_lookup.config
38
+ end
39
+
40
+ def fallback_type
41
+ FALLBACK_TYPE
42
+ end
43
+
44
+ def valid_type?(value)
45
+ VALID_TYPES.include? value
46
+ end
47
+
48
+ def for_file_name(file_name_or_path)
49
+ for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
50
+ end
51
+
52
+ def for_mime_type(mime_type)
53
+ # Check config overrides first
54
+ file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
55
+ return file_type unless file_type.nil?
56
+
57
+ mimes_to_type = {
58
+ /^image/i => IMAGE,
59
+ /^video/i => VIDEO,
60
+ /^audio/i => AUDIO,
61
+ /^text/i => {
62
+ /\/css/i => SOURCE_CODE,
63
+ /\/html/i => HTML,
64
+ /.+/ => TEXT
65
+ },
66
+ /excel|spreadsheet|xls/i => SPREADSHEET,
67
+ /application\/sql/i => DATABASE,
68
+ /csv/i => DATASET,
69
+ /octet.stream/i => UNKNOWN,
70
+ /^application/i => {
71
+ /\/access/i => DATABASE,
72
+ /\/css/i => SOURCE_CODE,
73
+ /\/html/i => HTML,
74
+ /\/x-iwork-keynote/i => PRESENTATION,
75
+ /\/x-iwork-numbers/i => SPREADSHEET,
76
+ /\/x-iwork-pages/i => PAGE_DESCRIPTION,
77
+ /\/mbox/i => EMAIL,
78
+ /\/mp4/i => VIDEO,
79
+ /\/mp4a/i => AUDIO,
80
+ /\/msaccess/i => DATABASE,
81
+ /\/mxf/i => VIDEO,
82
+ /\/(pdf|msword)/i => PAGE_DESCRIPTION,
83
+ /\/postscript/i => PAGE_DESCRIPTION,
84
+ /\/powerpoint/i => PRESENTATION,
85
+ /\/rtf/i => PAGE_DESCRIPTION,
86
+ /\/sql/i => DATABASE,
87
+ /\/swf/ => VIDEO,
88
+ /\/vnd.ms-asf/i => VIDEO,
89
+ /\/vnd.ms-word/i => PAGE_DESCRIPTION,
90
+ /\/vnd.ms-wpl/i => PAGE_DESCRIPTION,
91
+ /\/vnd.oasis.opendocument.text/i => PAGE_DESCRIPTION,
92
+ /\/vnd.openxmlformats-officedocument.presentation/i => PRESENTATION,
93
+ /\/vnd.openxmlformats-officedocument.wordprocessingml/i => PAGE_DESCRIPTION,
94
+ /\/vnd.ms-powerpoint/i => PRESENTATION,
95
+ /\/vnd.sun.xml.calc/i => SPREADSHEET,
96
+ /\/vnd.sun.xml.impress/i => PRESENTATION,
97
+ /\/vnd.sun.xml.writer/i => PAGE_DESCRIPTION,
98
+ /\/xml/i => STRUCTURED_TEXT,
99
+ /\/x.mspublisher/i => PAGE_DESCRIPTION,
100
+ /\/x.shockwave-flash/ => VIDEO,
101
+ /\/x.spss/i => DATASET,
102
+ /\/zip/i => ARCHIVE,
103
+ /.+/ => UNKNOWN
104
+ }
105
+ }
106
+
107
+ file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
108
+ return fallback_type unless file_type
109
+ if file_type&.last.is_a? Hash
110
+ file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern }
111
+ end
112
+ file_type.nil? ? fallback_type : file_type.last
113
+ end
114
+
115
+ end
116
+ end
@@ -1,6 +1,6 @@
1
1
  module BestType
2
2
 
3
- VERSION = '0.0.3'.freeze
3
+ VERSION = '0.0.10'.freeze
4
4
 
5
5
  def self.version
6
6
  VERSION
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: best_type
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric O'Hanlon
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2018-03-25 00:00:00.000000000 Z
@@ -106,6 +106,7 @@ files:
106
106
  - lib/best_type/config.rb
107
107
  - lib/best_type/dc_type_lookup.rb
108
108
  - lib/best_type/mime_type_lookup.rb
109
+ - lib/best_type/pcdm_type_lookup.rb
109
110
  - lib/best_type/version.rb
110
111
  - lib/tasks/best_type.rake
111
112
  - lib/tasks/best_type/ci.rake
@@ -113,7 +114,7 @@ homepage: https://github.com/cul/best_type
113
114
  licenses:
114
115
  - MIT
115
116
  metadata: {}
116
- post_install_message:
117
+ post_install_message:
117
118
  rdoc_options: []
118
119
  require_paths:
119
120
  - lib
@@ -128,9 +129,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
129
  - !ruby/object:Gem::Version
129
130
  version: '0'
130
131
  requirements: []
131
- rubyforge_project:
132
- rubygems_version: 2.6.14
133
- signing_key:
132
+ rubygems_version: 3.0.8
133
+ signing_key:
134
134
  specification_version: 4
135
135
  summary: A library for selecting the best mime type or dc type for a file.
136
136
  test_files: []