best_type 0.0.3 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 9b2371da6f225b1bd22acd81086ea29b9e219f02
4
- data.tar.gz: 27b8760b8632f7716eece5a0889327ae57c3da9a
2
+ SHA256:
3
+ metadata.gz: 29fe983eac1114ba38d161af60db4bc5baaed14ffc7f9fa226ec0ce9f6b9658b
4
+ data.tar.gz: 2d8c71566c13afaf5f48253ad659a4aad8c4f345403e0067ad6bc08b793f6451
5
5
  SHA512:
6
- metadata.gz: 7d8a536b778ade2b05994d6e958975a5672ffd6953946e8f677faa801d014c1ab4c13399a3b4678d2fd9c8c3477716b14867f2dd5620fc3e5d14e5cf8650da6e
7
- data.tar.gz: a6f9920f32d467b5f9f18e5598da3363cbc85e62981639ec99e64ea98b7068146f7729207c588d35a2a419250f9834dce1cd11b1b16ca2711b35580df06f14bd
6
+ metadata.gz: 905b6b2f3bf244668bc1962eea0561cd267e7aaee333c2c4817f56ca917b7181c0d55cddfbaa952032ed839aa75422d1c3a75ee0b5b17fecf7ea826a8a4f8c28
7
+ data.tar.gz: a96af2728b2d1c5c15fdbb4a3a065be91d11352f81c8ab1a6ee38a517e737bc6c97625538a17a985015b58ee21332c13fded4160964f42c0189ea44bf155d7cf
data/README.md CHANGED
@@ -28,6 +28,12 @@ BestType.dc_type.for_file_name('/path/to/some/file.jpg') # 'StillImage'
28
28
 
29
29
  # detect dc type for mime types
30
30
  BestType.dc_type.for_mime_type('image/jpeg') # 'StillImage'
31
+
32
+ # detect dc type for file names (including full file path)
33
+ BestType.pcdm_type.for_file_name('/path/to/some/file.jpg') # 'Image'
34
+
35
+ # detect dc type for mime types
36
+ BestType.pcdm_type.for_mime_type('image/jpeg') # 'Image'
31
37
  ```
32
38
 
33
39
  ### Add Custom Overrides
@@ -36,12 +42,14 @@ BestType.configure({
36
42
  extension_to_mime_type_overrides:
37
43
  'custom': 'custom/type'
38
44
  mime_type_to_dc_type_overrides:
39
- 'custom/type': 'Custom'
45
+ 'custom/type': 'CustomDC'
46
+ mime_type_to_pcdm_type_overrides:
47
+ 'custom/type': 'CustomPCDM'
40
48
  })
41
49
 
42
50
  BestType.mime_type.for_file_name('myfile.custom') # 'custom/type'
43
- BestType.dc_type.for_file_name('myfile.custom') # 'Custom'
44
- BestType.dc_type.for_mime_type('custom/type') # 'Custom'
51
+ BestType.dc_type.for_file_name('myfile.custom') # 'CustomDC'
52
+ BestType.pcdm_type.for_mime_type('custom/type') # 'CustomPCDM'
45
53
 
46
54
  ```
47
55
 
@@ -63,6 +71,8 @@ BestType.configure({
63
71
  'custom': 'custom/type'
64
72
  mime_type_to_dc_type_overrides:
65
73
  'custom/type': 'Custom'
74
+ mime_type_to_pcdm_type_overrides:
75
+ 'custom/type': 'Custom'
66
76
  })
67
77
  ```
68
78
 
@@ -81,18 +91,24 @@ development:
81
91
  'good': 'good/type'
82
92
  mime_type_to_dc_type_overrides:
83
93
  'good/type': 'Good'
94
+ mime_type_to_pcdm_type_overrides:
95
+ 'good/type': 'Goodly'
84
96
 
85
97
  test:
86
98
  extension_to_mime_type_overrides:
87
99
  'better': 'better/type'
88
100
  mime_type_to_dc_type_overrides:
89
101
  'better/type': 'Better'
102
+ mime_type_to_pcdm_type_overrides:
103
+ 'best/type': 'Betterly'
90
104
 
91
105
  production:
92
106
  extension_to_mime_type_overrides:
93
107
  'best': 'best/type'
94
108
  mime_type_to_dc_type_overrides:
95
109
  'best/type': 'Best'
110
+ mime_type_to_pcdm_type_overrides:
111
+ 'best/type': 'Bestly'
96
112
  ```
97
113
 
98
114
  ### Running Tests (for developers):
@@ -1,6 +1,12 @@
1
1
  extension_to_mime_type_overrides:
2
2
  'test': 'test/type'
3
3
  'mp4': 'video/mp4'
4
+ 'vtt': 'text/vtt'
5
+ 'm4v': 'video/x-m4v'
4
6
  mime_type_to_dc_type_overrides:
5
7
  'test/type': 'Test'
6
8
  'application/mxf': 'MovingImage'
9
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'Text'
10
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'Dataset'
11
+ mime_type_to_pcdm_type_overrides:
12
+ 'test/type': 'Test'
data/lib/best_type.rb CHANGED
@@ -2,6 +2,7 @@ require 'best_type/version'
2
2
  require 'best_type/config'
3
3
  require 'best_type/mime_type_lookup'
4
4
  require 'best_type/dc_type_lookup'
5
+ require 'best_type/pcdm_type_lookup'
5
6
  require 'yaml'
6
7
 
7
8
  module BestType
@@ -15,6 +16,10 @@ module BestType
15
16
  @dc_type ||= BestType::DcTypeLookup.new(mime_type)
16
17
  end
17
18
 
19
+ def self.pcdm_type
20
+ @pcdm_type ||= BestType::PcdmTypeLookup.new(mime_type)
21
+ end
22
+
18
23
  def self.config(reload = false, user_config_options = {})
19
24
  if @config.nil? || reload
20
25
  @semaphore.synchronize do
@@ -1,7 +1,7 @@
1
1
  module BestType
2
2
  class Config
3
3
 
4
- attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides
4
+ attr_reader :extension_to_mime_type_overrides, :mime_type_to_dc_type_overrides, :mime_type_to_pcdm_type_overrides
5
5
 
6
6
  def initialize(user_config_options = {})
7
7
  # Get defaults from internal_custom_mapping.yml in gem
@@ -9,12 +9,14 @@ module BestType
9
9
  internal_config_file_path = File.join(gem_dir, 'config/internal_config_options.yml')
10
10
  internal_config_options = YAML.load_file(internal_config_file_path)
11
11
 
12
- @extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides']
13
- @mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides']
12
+ @extension_to_mime_type_overrides = internal_config_options['extension_to_mime_type_overrides'] || {}
13
+ @mime_type_to_dc_type_overrides = internal_config_options['mime_type_to_dc_type_overrides'] || {}
14
+ @mime_type_to_pcdm_type_overrides = internal_config_options['mime_type_to_pcdm_type_overrides'] || {}
14
15
 
15
16
  stringify_user_config_options_keys!(user_config_options)
16
17
  add_extension_to_mime_type_overrides(user_config_options['extension_to_mime_type_overrides']) if user_config_options.key?('extension_to_mime_type_overrides')
17
18
  add_mime_type_to_dc_type_overrides(user_config_options['mime_type_to_dc_type_overrides']) if user_config_options.key?('mime_type_to_dc_type_overrides')
19
+ add_mime_type_to_pcdm_type_overrides(user_config_options['mime_type_to_pcdm_type_overrides']) if user_config_options.key?('mime_type_to_pcdm_type_overrides')
18
20
  end
19
21
 
20
22
  private
@@ -27,6 +29,10 @@ module BestType
27
29
  @mime_type_to_dc_type_overrides.merge!(overrides)
28
30
  end
29
31
 
32
+ def add_mime_type_to_pcdm_type_overrides(overrides)
33
+ @mime_type_to_pcdm_type_overrides.merge!(overrides)
34
+ end
35
+
30
36
  def stringify_user_config_options_keys!(user_config_options)
31
37
  user_config_options_keys = user_config_options.keys
32
38
  user_config_options_keys.each do |key|
@@ -3,13 +3,39 @@ module BestType
3
3
 
4
4
  attr_reader :config
5
5
 
6
- FALLBACK_DC_TYPE = 'Software'.freeze
6
+ COLLECTION = 'Collection'.freeze
7
+ DATASET = 'Dataset'.freeze
8
+ EVENT = 'Event'.freeze
9
+ INTERACTIVE_RESOURCE = 'InteractiveResource'.freeze
10
+ MOVING_IMAGE = 'MovingImage'.freeze
11
+ PHYSICAL_OBJECT = 'PhysicalObject'.freeze
12
+ SERVICE = 'Service'.freeze
13
+ SOFTWARE = 'Software'.freeze
14
+ SOUND = 'Sound'.freeze
15
+ STILL_IMAGE = 'StillImage'.freeze
16
+ TEXT = 'Text'.freeze
17
+
18
+ # these include values that will not be derived from MIME/content types
19
+ VALID_TYPES = [
20
+ COLLECTION, EVENT, INTERACTIVE_RESOURCE, MOVING_IMAGE, PHYSICAL_OBJECT,
21
+ SERVICE, SOFTWARE, SOUND, STILL_IMAGE, TEXT
22
+ ].freeze
23
+
24
+ FALLBACK_DC_TYPE = SOFTWARE
7
25
 
8
26
  def initialize(mime_type_lookup_instance)
9
27
  @mime_type_lookup = mime_type_lookup_instance
10
28
  @config = @mime_type_lookup.config
11
29
  end
12
30
 
31
+ def fallback_type
32
+ FALLBACK_DC_TYPE
33
+ end
34
+
35
+ def valid_type?(value)
36
+ VALID_TYPES.include? value
37
+ end
38
+
13
39
  def for_file_name(file_name_or_path)
14
40
  for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
15
41
  end
@@ -20,13 +46,13 @@ module BestType
20
46
  return dc_type unless dc_type.nil?
21
47
 
22
48
  mimes_to_dc = {
23
- /^image/ => 'StillImage',
24
- /^video/ => 'MovingImage',
25
- /^audio/ => 'Sound',
26
- /^text/ => 'Text',
27
- /^application\/(pdf|msword)/ => 'Text',
28
- /excel|spreadsheet|xls|application\/sql/ => 'Dataset',
29
- /^application/ => 'Software'
49
+ /^image/ => STILL_IMAGE,
50
+ /^video/ => MOVING_IMAGE,
51
+ /^audio/ => SOUND,
52
+ /^text/ => TEXT,
53
+ /^application\/(pdf|msword)/ => TEXT,
54
+ /excel|spreadsheet|xls|application\/sql/ => DATASET,
55
+ /^application/ => SOFTWARE
30
56
  }
31
57
 
32
58
  dc_type = mimes_to_dc.find { |pattern, _type_val| mime_type =~ pattern }
@@ -0,0 +1,116 @@
1
+ module BestType
2
+ class PcdmTypeLookup
3
+
4
+ attr_reader :config
5
+
6
+ # https://github.com/duraspace/pcdm/blob/master/pcdm-ext/file-format-types.rdf
7
+ ARCHIVE = "Archive".freeze
8
+ AUDIO = "Audio".freeze
9
+ DATABASE = "Database".freeze
10
+ DATASET = "Dataset".freeze
11
+ EMAIL = "Email".freeze
12
+ FONT = "Font".freeze
13
+ HTML = "HTML".freeze
14
+ IMAGE = "Image".freeze
15
+ PAGE_DESCRIPTION = "PageDescription".freeze
16
+ PRESENTATION = "Presentation".freeze
17
+ SOFTWARE = "Software".freeze
18
+ SOURCE_CODE = "SourceCode".freeze
19
+ SPREADSHEET = "Spreadsheet".freeze
20
+ STRUCTURED_TEXT = "StructuredText".freeze
21
+ TEXT = "Text".freeze
22
+ UNKNOWN = "Unknown".freeze
23
+ UNSTRUCTURED_TEXT = "UnstructuredText".freeze
24
+ VIDEO = "Video".freeze
25
+ WEBSITE = "Website".freeze
26
+
27
+ # these include values that will not be derived from MIME/content types
28
+ VALID_TYPES = [
29
+ ARCHIVE, AUDIO, DATABASE, DATASET, EMAIL, FONT, HTML, IMAGE, PAGE_DESCRIPTION, PRESENTATION, SOFTWARE,
30
+ SOURCE_CODE, SPREADSHEET, STRUCTURED_TEXT, TEXT, UNKNOWN, UNSTRUCTURED_TEXT, VIDEO, WEBSITE
31
+ ].freeze
32
+
33
+ FALLBACK_TYPE = UNKNOWN
34
+
35
+ def initialize(mime_type_lookup_instance)
36
+ @mime_type_lookup = mime_type_lookup_instance
37
+ @config = @mime_type_lookup.config
38
+ end
39
+
40
+ def fallback_type
41
+ FALLBACK_TYPE
42
+ end
43
+
44
+ def valid_type?(value)
45
+ VALID_TYPES.include? value
46
+ end
47
+
48
+ def for_file_name(file_name_or_path)
49
+ for_mime_type(@mime_type_lookup.for_file_name(file_name_or_path))
50
+ end
51
+
52
+ def for_mime_type(mime_type)
53
+ # Check config overrides first
54
+ file_type = @config.mime_type_to_pcdm_type_overrides.fetch(mime_type, nil)
55
+ return file_type unless file_type.nil?
56
+
57
+ mimes_to_type = {
58
+ /^image/i => IMAGE,
59
+ /^video/i => VIDEO,
60
+ /^audio/i => AUDIO,
61
+ /^text/i => {
62
+ /\/css/i => SOURCE_CODE,
63
+ /\/html/i => HTML,
64
+ /.+/ => TEXT
65
+ },
66
+ /excel|spreadsheet|xls/i => SPREADSHEET,
67
+ /application\/sql/i => DATABASE,
68
+ /csv/i => DATASET,
69
+ /octet.stream/i => UNKNOWN,
70
+ /^application/i => {
71
+ /\/access/i => DATABASE,
72
+ /\/css/i => SOURCE_CODE,
73
+ /\/html/i => HTML,
74
+ /\/x-iwork-keynote/i => PRESENTATION,
75
+ /\/x-iwork-numbers/i => SPREADSHEET,
76
+ /\/x-iwork-pages/i => PAGE_DESCRIPTION,
77
+ /\/mbox/i => EMAIL,
78
+ /\/mp4/i => VIDEO,
79
+ /\/mp4a/i => AUDIO,
80
+ /\/msaccess/i => DATABASE,
81
+ /\/mxf/i => VIDEO,
82
+ /\/(pdf|msword)/i => PAGE_DESCRIPTION,
83
+ /\/postscript/i => PAGE_DESCRIPTION,
84
+ /\/powerpoint/i => PRESENTATION,
85
+ /\/rtf/i => PAGE_DESCRIPTION,
86
+ /\/sql/i => DATABASE,
87
+ /\/swf/ => VIDEO,
88
+ /\/vnd.ms-asf/i => VIDEO,
89
+ /\/vnd.ms-word/i => PAGE_DESCRIPTION,
90
+ /\/vnd.ms-wpl/i => PAGE_DESCRIPTION,
91
+ /\/vnd.oasis.opendocument.text/i => PAGE_DESCRIPTION,
92
+ /\/vnd.openxmlformats-officedocument.presentation/i => PRESENTATION,
93
+ /\/vnd.openxmlformats-officedocument.wordprocessingml/i => PAGE_DESCRIPTION,
94
+ /\/vnd.ms-powerpoint/i => PRESENTATION,
95
+ /\/vnd.sun.xml.calc/i => SPREADSHEET,
96
+ /\/vnd.sun.xml.impress/i => PRESENTATION,
97
+ /\/vnd.sun.xml.writer/i => PAGE_DESCRIPTION,
98
+ /\/xml/i => STRUCTURED_TEXT,
99
+ /\/x.mspublisher/i => PAGE_DESCRIPTION,
100
+ /\/x.shockwave-flash/ => VIDEO,
101
+ /\/x.spss/i => DATASET,
102
+ /\/zip/i => ARCHIVE,
103
+ /.+/ => UNKNOWN
104
+ }
105
+ }
106
+
107
+ file_type = mimes_to_type.detect { |pattern, _type_val| mime_type =~ pattern }
108
+ return fallback_type unless file_type
109
+ if file_type&.last.is_a? Hash
110
+ file_type = file_type.last.detect { |pattern, _type_val| mime_type =~ pattern }
111
+ end
112
+ file_type.nil? ? fallback_type : file_type.last
113
+ end
114
+
115
+ end
116
+ end
@@ -1,6 +1,6 @@
1
1
  module BestType
2
2
 
3
- VERSION = '0.0.3'.freeze
3
+ VERSION = '0.0.10'.freeze
4
4
 
5
5
  def self.version
6
6
  VERSION
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: best_type
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric O'Hanlon
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2018-03-25 00:00:00.000000000 Z
@@ -106,6 +106,7 @@ files:
106
106
  - lib/best_type/config.rb
107
107
  - lib/best_type/dc_type_lookup.rb
108
108
  - lib/best_type/mime_type_lookup.rb
109
+ - lib/best_type/pcdm_type_lookup.rb
109
110
  - lib/best_type/version.rb
110
111
  - lib/tasks/best_type.rake
111
112
  - lib/tasks/best_type/ci.rake
@@ -113,7 +114,7 @@ homepage: https://github.com/cul/best_type
113
114
  licenses:
114
115
  - MIT
115
116
  metadata: {}
116
- post_install_message:
117
+ post_install_message:
117
118
  rdoc_options: []
118
119
  require_paths:
119
120
  - lib
@@ -128,9 +129,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
129
  - !ruby/object:Gem::Version
129
130
  version: '0'
130
131
  requirements: []
131
- rubyforge_project:
132
- rubygems_version: 2.6.14
133
- signing_key:
132
+ rubygems_version: 3.0.8
133
+ signing_key:
134
134
  specification_version: 4
135
135
  summary: A library for selecting the best mime type or dc type for a file.
136
136
  test_files: []