kithe 2.0.0.pre.alpha2 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -4
  3. data/app/indexing/kithe/indexable/record_index_updater.rb +1 -1
  4. data/app/jobs/kithe/create_derivatives_job.rb +2 -2
  5. data/app/models/kithe/asset.rb +82 -154
  6. data/app/models/kithe/asset/derivative_creator.rb +32 -62
  7. data/app/models/kithe/asset/derivative_definition.rb +12 -13
  8. data/app/models/kithe/asset/set_shrine_uploader.rb +64 -0
  9. data/app/models/kithe/collection.rb +0 -6
  10. data/app/models/kithe/model.rb +0 -21
  11. data/app/models/kithe/work.rb +0 -5
  12. data/app/uploaders/kithe/asset_uploader.rb +15 -78
  13. data/lib/kithe.rb +22 -20
  14. data/{app/models → lib}/kithe/config_base.rb +6 -1
  15. data/lib/kithe/engine.rb +14 -3
  16. data/lib/kithe/indexable_settings.rb +1 -1
  17. data/lib/kithe/patch_fx.rb +39 -0
  18. data/lib/kithe/version.rb +4 -1
  19. data/lib/shrine/plugins/kithe_checksum_signatures.rb +41 -0
  20. data/lib/shrine/plugins/kithe_controllable_backgrounding.rb +53 -0
  21. data/lib/shrine/plugins/kithe_derivative_definitions.rb +101 -0
  22. data/lib/shrine/plugins/kithe_derivatives.rb +54 -0
  23. data/lib/shrine/plugins/kithe_determine_mime_type.rb +39 -0
  24. data/lib/shrine/plugins/kithe_persisted_derivatives.rb +161 -0
  25. data/lib/shrine/plugins/kithe_promotion_callbacks.rb +4 -0
  26. data/lib/shrine/plugins/kithe_promotion_directives.rb +33 -3
  27. data/lib/shrine/plugins/kithe_storage_location.rb +53 -4
  28. data/lib/tasks/kithe_tasks.rake +22 -15
  29. data/spec/dummy/app/models/plain_active_record.rb +3 -0
  30. data/spec/dummy/config/database.yml +6 -0
  31. data/spec/dummy/db/schema.rb +102 -0
  32. data/spec/dummy/log/development.log +3616 -0
  33. data/spec/dummy/log/test.log +86464 -0
  34. data/spec/dummy/tmp/development_secret.txt +1 -1
  35. data/spec/indexing/indexable_spec.rb +1 -1
  36. data/spec/models/kithe/asset/asset_derivatives_spec.rb +137 -0
  37. data/spec/models/kithe/asset/asset_promotion_hooks_spec.rb +26 -5
  38. data/spec/models/kithe/asset/set_shrine_uploader_spec.rb +39 -0
  39. data/spec/models/kithe/asset_spec.rb +9 -59
  40. data/spec/models/kithe/model_spec.rb +0 -32
  41. data/spec/models/kithe_spec.rb +10 -0
  42. data/spec/shrine/kithe_accept_remote_url_spec.rb +49 -0
  43. data/spec/shrine/kithe_checksum_signatures_spec.rb +63 -0
  44. data/spec/shrine/kithe_derivative_definitions_spec.rb +303 -0
  45. data/spec/shrine/kithe_persisted_derivatives_spec.rb +424 -0
  46. data/spec/shrine/kithe_storage_location_spec.rb +43 -15
  47. data/spec/spec_helper.rb +0 -19
  48. data/spec/test_support/images/3x3_pixel.jpg +0 -0
  49. data/spec/test_support/shrine_spec_support.rb +2 -1
  50. metadata +60 -36
  51. data/app/models/kithe/asset/derivative_updater.rb +0 -119
  52. data/app/models/kithe/derivative.rb +0 -15
  53. data/app/uploaders/kithe/derivative_uploader.rb +0 -48
  54. data/spec/dummy/db/structure.sql +0 -309
  55. data/spec/models/kithe/asset/asset_create_derivatives_spec.rb +0 -320
  56. data/spec/models/kithe/derivative_spec.rb +0 -168
@@ -2,38 +2,37 @@
2
2
  # class, it's what's created when you call Kithe::Asset#define_derivative
3
3
  class Kithe::Asset::DerivativeDefinition
4
4
  attr_reader :key, :content_type, :default_create, :proc, :storage_key
5
- def initialize(key:, storage_key:, proc:, content_type: nil, default_create: true)
6
- @key = key
5
+ def initialize(key:, proc:, content_type: nil, default_create: true)
6
+ @key = key.to_sym
7
7
  @content_type = content_type
8
- @storage_key = storage_key
9
8
  @default_create = default_create
10
9
  @proc = proc
11
10
  end
12
11
 
13
- def call(original_file:,record:)
14
- if proc_accepts_record_keyword?
15
- proc.call(original_file, record: record)
12
+ def call(original_file:,attacher:)
13
+ if proc_accepts_keyword?(:attacher)
14
+ proc.call(original_file, attacher: attacher)
16
15
  else
17
16
  proc.call(original_file)
18
17
  end
19
18
  end
20
19
 
21
20
  # Do content-type restrictions defined for this definition match a given asset?
22
- def applies_to?(asset)
21
+ def applies_to_content_type?(original_content_type)
23
22
  return true if content_type.nil?
24
23
 
25
- return true if content_type == asset.content_type
24
+ return true if content_type == original_content_type
26
25
 
27
- return false if asset.content_type.nil?
26
+ return false if original_content_type.nil?
28
27
 
29
- return true if (content_type.kind_of?(Array) && content_type.include?(asset.content_type))
28
+ return true if (content_type.kind_of?(Array) && content_type.include?(original_content_type))
30
29
 
31
- content_type == asset.content_type.sub(%r{/.+\Z}, '')
30
+ content_type == original_content_type.sub(%r{/.+\Z}, '')
32
31
  end
33
32
 
34
33
  private
35
34
 
36
- def proc_accepts_record_keyword?
37
- proc.parameters.include?([:key, :record]) || proc.parameters.include?([:keyreq, :record])
35
+ def proc_accepts_keyword?(kwarg)
36
+ proc.parameters.include?([:key, kwarg]) || proc.parameters.include?([:keyreq, kwarg]) || proc.parameters.find {|a| a.first == :keyrest}
38
37
  end
39
38
  end
@@ -0,0 +1,64 @@
1
+ # Our Kithe::Asset model class is meant to be a superclass of a local application asset class, which we
2
+ # can call `Asset`, although an app can call it whatever they like.
3
+ #
4
+ # Kithe::Asset sets it's own shrine uploader class, with a typical shrine:
5
+ #
6
+ # include Kithe::AssetUploader::Attachment(:file)
7
+ #
8
+ # An application Asset subclass will inherit this uploader, which is convenient for getting
9
+ # started quickly. But an application will likely want to define its own local uploader
10
+ # class, to define it's own metadata, derivatives, and any other custom beahvior.
11
+ #
12
+ # There isn't an obvious built-into-shrine way to do that, but it turns out simply overriding
13
+ # class and instance `*_attacher` methods seems to work out well. See:
14
+ # https://discourse.shrinerb.com/t/model-sub-classes-with-uploader-sub-classes/208
15
+ #
16
+ # So a local application can define it's own shrine uploader, which is highly recommended to
17
+ # be a sub-class of Kithe::AssetUploader to ensure it has required and useful
18
+ # Kithe behavior:
19
+ #
20
+ # # ./app/uploaders/asset_uploader.rb
21
+ # class AssetUploader < Kithe::AssetUploader
22
+ # # maybe we want some custom metadata
23
+ # add_metadata :something do |io|
24
+ # whatever
25
+ # end
26
+ # end
27
+ #
28
+ # And then set it in ti's custom local Asset class:
29
+ #
30
+ # # ./app/models/asset.rb
31
+ # class Asset < Kithe::Asset
32
+ # set_shrine_uploader(AssetUploader)
33
+ # end
34
+ #
35
+ # If a local app has it's own inheritance hieararchy of children below that (eg) Asset class,
36
+ # they can each (optionally) also override with a custom Uploader. It is recommended that
37
+ # the Uploader inheritance hieararchy match the model inheritance hieararchy, to have
38
+ # all behavior consistent. For instance:
39
+ #
40
+ # class AudioAssetUploader < AssetUploader
41
+ # end
42
+ #
43
+ # class AudioAsset < Asset
44
+ # set_shrine_uploader(AudioAssetUploader)
45
+ # end
46
+ #
47
+ module Kithe::Asset::SetShrineUploader
48
+ extend ActiveSupport::Concern
49
+
50
+ class_methods do
51
+ def set_shrine_uploader(uploader_class)
52
+ subclass_attachment = uploader_class::Attachment.new(:file)
53
+
54
+ define_singleton_method :file_attacher do |**options|
55
+ subclass_attachment.send(:class_attacher, **options)
56
+ end
57
+
58
+ define_method :file_attacher do |**options|
59
+ subclass_attachment.send(:attacher, self, **options)
60
+ end
61
+ end
62
+ end
63
+
64
+ end
@@ -1,14 +1,8 @@
1
1
  class Kithe::Collection < Kithe::Model
2
- # Collections don't have derivatives, but we want to allow Rails eager loading
3
- # of association on hetereogenous fetches of Kithe::Model, so this is clever.
4
- has_many :derivatives, -> { none }
5
- private :derivatives, :derivatives=, :derivative_ids, :derivative_ids=
6
-
7
2
  after_initialize do
8
3
  self.kithe_model_type = "collection" if self.kithe_model_type.nil?
9
4
  end
10
5
  before_validation do
11
6
  self.kithe_model_type = "collection" if self.kithe_model_type.nil?
12
7
  end
13
-
14
8
  end
@@ -10,15 +10,6 @@ class Kithe::Model < ActiveRecord::Base
10
10
  include AttrJson::Record::Dirty
11
11
  include Kithe::Indexable
12
12
 
13
- # A handy scope for eager-loading all representatives and all of their derivatives.
14
- #
15
- # Works on hetereogenous collections of Works and Assets -- the Assets need
16
- # :derivatives directly referenced (since they don't really have a leaf_representative assoc),
17
- # the works need :leaf_representative => :derivatives.
18
- #
19
- # Loading all three of these on result sets of hundreds of values is still relatively quick.
20
- scope :with_representative_derivatives, -> { includes(:derivatives, leaf_representative: :derivatives) }
21
-
22
13
  # While Rails STI means the actual specific class is in `type`, sometimes
23
14
  # it can be convenient to fetch on a top category of Kithe::Model without using
24
15
  # Rails STI.
@@ -100,18 +91,6 @@ class Kithe::Model < ActiveRecord::Base
100
91
  in_memory
101
92
  end
102
93
 
103
- # hacky :(
104
- def derivatives(*args)
105
- raise TypeError.new("Only valid on Kithe::Asset") unless self.kind_of?(Kithe::Asset)
106
- super
107
- end
108
- # hacky :(
109
- def derivatives=(*args)
110
- raise TypeError.new("Only valid on Kithe::Asset") unless self.kind_of?(Kithe::Asset)
111
- super
112
- end
113
-
114
-
115
94
  # insist that leaf_representative is an Asset, otherwise return nil.
116
95
  # nil means there is no _asset_ leaf, and lets caller rely on leaf being
117
96
  # an asset.
@@ -1,9 +1,4 @@
1
1
  class Kithe::Work < Kithe::Model
2
- # Works don't have derivatives, but we want to allow Rails eager loading
3
- # of association on hetereogenous fetches of Kithe::Model, so this is clever.
4
- has_many :derivatives, -> { none }
5
- private :derivatives, :derivatives=, :derivative_ids, :derivative_ids=
6
-
7
2
  after_initialize do
8
3
  self.kithe_model_type = "work" if self.kithe_model_type.nil?
9
4
  end
@@ -20,7 +20,7 @@ module Kithe
20
20
  # FUTURE: Look at using client-side-calculated checksums to verify end-to-end.
21
21
  # https://github.com/shrinerb/shrine/wiki/Using-Checksums-in-Direct-Uploads
22
22
  #
23
- # When magicc-byte analyzer can't determine mime type, will fall back to `mediainfo`
23
+ # When magic-byte analyzer can't determine mime type, will fall back to `mediainfo`
24
24
  # CLI _if_ `Kithe.use_mediainfo` is true (defaults to true if mediainfo CLI is
25
25
  # available). (We need better ways to customize uploader.)
26
26
  class AssetUploader < Shrine
@@ -34,100 +34,37 @@ module Kithe
34
34
  # promotion, possibly in the background.
35
35
  plugin :refresh_metadata
36
36
 
37
- # Marcel analyzer is pure-ruby and fast. It's from Basecamp and is what
38
- # ActiveStorage uses. It is very similar to :mimemagic (and uses mimemagic
39
- # under the hood), but mimemagic seems not to be maintained with up to date
40
- # magic db? https://github.com/minad/mimemagic/pull/66
41
- plugin :determine_mime_type, analyzer: -> (io, analyzers) do
42
- mime_type = analyzers[:marcel].call(io)
43
-
44
- # But marcel is not able to catch some of our MP3s as audio/mpeg,
45
- # let's try mediainfo command line. mediainfo is one of the tools
46
- # the Harvard Fits tool uses. https://github.com/MediaArea/MediaInfo
47
- if Kithe.use_mediainfo && mime_type == "application/octet-stream" || mime_type.blank?
48
- mime_type = Kithe::MediainfoAnalyzer.new.call(io)
49
- end
50
-
51
- mime_type = "application/octet-stream" if mime_type.blank?
52
-
53
- mime_type
54
- end
55
-
56
37
  # Will save height and width to metadata for image types. (Won't for non-image types)
57
38
  # ignore errors (often due to storing a non-image file), consistent with shrine 2.x behavior.
58
39
  plugin :store_dimensions, on_error: :ignore
59
40
 
60
- # promotion and deletion will (sometimes) be in background.
61
- plugin :backgrounding
62
-
63
- # Useful in case consumers want it, and doesn't harm anything to be available.
64
- # https://github.com/shrinerb/shrine/blob/master/doc/plugins/rack_response.md
65
- plugin :rack_response
41
+ plugin :infer_extension, inferrer: :mini_mime
66
42
 
67
- # Normally we promote in background with backgrounding, but the set_promotion_directives
68
- # feature can be used to make promotion not happen at all, or happen in foreground.
69
- # asset.file_attacher.set_promotion_directives(promote: false)
70
- # asset.file_attacher.set_promotion_directives(promote: "inline")
71
- Attacher.promote_block do
72
- Kithe::TimingPromotionDirective.new(key: :promote, directives: self.promotion_directives) do |directive|
73
- if directive.inline?
74
- promote
75
- elsif directive.background?
76
- # What shrine normally expects for backgrounding, plus promotion_directives
77
- Kithe::AssetPromoteJob.perform_later(self.class.name, record.class.name, record.id, name.to_s, file_data, self.promotion_directives)
78
- end
79
- end
80
- end
43
+ # Just leave it here for otheres please
44
+ plugin :add_metadata
81
45
 
82
- # Delete using shrine backgrounding, but can be effected
83
- # by promotion_directives[:delete], similar to promotion above.
84
- # Yeah, not really a "promotion" directive, oh well.
85
- Attacher.destroy_block do
86
- Kithe::TimingPromotionDirective.new(key: :delete, directives: self.promotion_directives) do |directive|
87
- if directive.inline?
88
- destroy
89
- elsif directive.background?
90
- # What shrine normally expects for backgrounding
91
- Kithe::AssetDeleteJob.perform_later(self.class.name, data)
92
- end
93
- end
94
- end
95
46
 
96
- plugin :add_metadata
97
47
 
98
- # Makes files stored as /asset/#{asset_pk}/#{random_uuid}.#{original_suffix}
99
- plugin :kithe_storage_location
100
48
 
101
- # Allows you to assign hashes like:
102
- # { "id" => "http://url", "storage" => "remote_url", headers: { "Authorization" => "Bearer whatever"}}
103
- # (headers optional), for fetching remote urls on promotion. Useful with browse-everything.
104
- # WARNING: There's no whitelist, will accept any url. Is this a problem?
105
- plugin :kithe_accept_remote_url
49
+ # kithe-standard logic for sniffing mime type.
50
+ plugin :kithe_determine_mime_type
106
51
 
107
- # We want to store md5 and sha1 checksums (legacy compat), as well as
108
- # sha512 (more recent digital preservation recommendation: https://ocfl.io/draft/spec/#digests)
109
- #
110
- # We only calculate them on `store` action to avoid double-computation, and because for
111
- # direct uploads/backgrounding, we haven't actually gotten the file in our hands to compute
112
- # checksums until then anyway.
113
- plugin :signature
114
- add_metadata do |io, context|
115
- if context[:action] != :cache
116
- {
117
- md5: calculate_signature(io, :md5),
118
- sha1: calculate_signature(io, :sha1),
119
- sha512: calculate_signature(io, :sha512)
120
- }
121
- end
122
- end
123
- metadata_method :md5, :sha1, :sha512
52
+ # Determines storage path/location/id, so files will be stored as:
53
+ # /asset/#{asset_pk}/#{random_uuid}.#{original_suffix}
54
+ plugin :kithe_storage_location
124
55
 
56
+ # Set up logic for shrine backgrounding, which in kithe can be set by promotion_directives
57
+ plugin :kithe_controllable_backgrounding
125
58
 
126
59
  # Gives us (set_)promotion_directives methods on our attacher to
127
60
  # house lifecycle directives, about whether promotion, deletion,
128
61
  # derivatives happen in foreground, background, or not at all.
129
62
  plugin :kithe_promotion_directives
130
63
 
64
+ # Makes our before/after promotion callbacks get called.
131
65
  plugin :kithe_promotion_callbacks
66
+
67
+ # some configuration and convenience methods for shrine derivatives.
68
+ plugin :kithe_derivatives
132
69
  end
133
70
  end
@@ -68,28 +68,30 @@ module Kithe
68
68
  # The settings need to live here not in Kithe::Indexable, to avoid terrible
69
69
  # Rails dev-mode class-reloading weirdnesses. This module is not reloaded.
70
70
  class << self
71
- attr_accessor :indexable_settings
71
+ attr_writer :indexable_settings
72
72
  end
73
- self.indexable_settings = IndexableSettings.new(
74
- solr_url: "http://localhost:8983/solr/default",
75
- model_name_solr_field: "model_name_ssi",
76
- solr_id_value_attribute: "id",
77
- writer_class_name: "Traject::SolrJsonWriter",
78
- writer_settings: {
79
- # as default we tell the solrjsonwriter to use no threads,
80
- # no batching. softCommit on every update. Least surprising
81
- # default configuration.
82
- "solr_writer.thread_pool" => 0,
83
- "solr_writer.batch_size" => 1,
84
- "solr_writer.solr_update_args" => { softCommit: true },
85
- "solr_writer.http_timeout" => 3,
86
- "logger" => Rails.logger,
73
+ def self.indexable_settings
74
+ @indexable_settings ||= IndexableSettings.new(
75
+ solr_url: "http://localhost:8983/solr/default",
76
+ model_name_solr_field: "model_name_ssi",
77
+ solr_id_value_attribute: "id",
78
+ writer_class_name: "Traject::SolrJsonWriter",
79
+ writer_settings: {
80
+ # as default we tell the solrjsonwriter to use no threads,
81
+ # no batching. softCommit on every update. Least surprising
82
+ # default configuration.
83
+ "solr_writer.thread_pool" => 0,
84
+ "solr_writer.batch_size" => 1,
85
+ "solr_writer.solr_update_args" => { softCommit: true },
86
+ "solr_writer.http_timeout" => 3,
87
+ "logger" => Rails.logger,
87
88
 
88
- # MAYBE? no skippable exceptions please
89
- # "solr_writer.skippable_exceptions" => []
90
- },
91
- disable_callbacks: false
92
- )
89
+ # MAYBE? no skippable exceptions please
90
+ # "solr_writer.skippable_exceptions" => []
91
+ },
92
+ disable_callbacks: false
93
+ )
94
+ end
93
95
 
94
96
  class << self
95
97
  # Currently used by Kithe::AssetUploader, a bit of a hacky
@@ -9,7 +9,7 @@ module Kithe
9
9
  #
10
10
  # You may also want to consider [railsconfig](https://github.com/railsconfig/config)
11
11
  #
12
- # Kithe::Config:
12
+ # Kithe::ConfigBase:
13
13
  #
14
14
  # * uses an explicit declared list of allowable config keys, no silent typos
15
15
  # * can read from a local YAML file or ENV, by default letting ENV override local YAML file values.
@@ -97,6 +97,11 @@ module Kithe
97
97
  # This doesn't use any locking for concurrent initial loads, which is technically not
98
98
  # great, but probably shouldn't be a problem in practice, especially in MRI. Trying to
99
99
  # do proper locking with lazy load was too hard for me right now.
100
+ #
101
+ # ## Auto-loading
102
+ #
103
+ # This is intentionally NOT in an auto-loaded directory, so it can be used more
104
+ # easily in Rails initialization without problems. https://github.com/rails/rails/issues/40904
100
105
  class ConfigBase
101
106
  include Singleton
102
107
 
@@ -1,5 +1,19 @@
1
1
  require 'shrine'
2
2
 
3
+ # Gem "F(x)" or `fx` gem will get schema.rb to include locally-defined custom postgres functions
4
+ # and triggers, like we use. So apps can keep using schema.rb instead of structure.sql,
5
+ # and still have our custom functions preserved. We need to require it explicitly
6
+ # since it'll be an indirect dependency of the end app.
7
+ #
8
+ # But we need to patch it to create functions first so we can use them as default values
9
+ # https://github.com/teoljungberg/fx/issues/33
10
+ # https://github.com/teoljungberg/fx/pull/53
11
+ require 'fx'
12
+ require 'kithe/patch_fx'
13
+
14
+ # not auto-loaded, let's just load it for backwards compat though
15
+ require "kithe/config_base"
16
+
3
17
  module Kithe
4
18
  class Engine < ::Rails::Engine
5
19
  config.generators do |g|
@@ -8,8 +22,5 @@ module Kithe
8
22
  g.assets false
9
23
  g.helper false
10
24
  end
11
-
12
- # should only affect kithe development
13
- config.active_record.schema_format = :sql
14
25
  end
15
26
  end
@@ -8,7 +8,7 @@ module Kithe
8
8
  @writer_class_name = writer_class_name
9
9
  @writer_settings = writer_settings
10
10
  @model_name_solr_field = model_name_solr_field
11
- @solr_id_value_attribute = solr_id_value_attribute
11
+ @solr_id_value_attribute = solr_id_value_attribute || 'id'
12
12
  end
13
13
 
14
14
  # Use configured solr_url, and merge together with configured
@@ -0,0 +1,39 @@
1
+ # fx is a gem that lets Rails schema.rb capture postgres functions and triggers
2
+ #
3
+ # For it to work for our use case, we need it to define functions BEFORE tables when
4
+ # doing a `rake db:schema:load`, so we can refer to functions as default values in our
5
+ # tables.
6
+ #
7
+ # This is a known issue in fx, with a PR, but isn't yet merged/released, so we hack
8
+ # in a patch to force it. Better than forking.
9
+ #
10
+ # Based on: https://github.com/teoljungberg/fx/pull/53/
11
+ #
12
+ # We try to write future-compat code assuming that will be merged eventually....
13
+
14
+ require 'fx'
15
+
16
+ if Fx.configuration.respond_to?(:dump_functions_at_beginning_of_schema)
17
+ # we have the feature!
18
+
19
+ Fx.configure do |config|
20
+ config.dump_functions_at_beginning_of_schema = true
21
+ end
22
+
23
+ else
24
+ # Fx does not have the feature, we have to patch it in
25
+
26
+ require 'fx/schema_dumper/function'
27
+
28
+ module Fx
29
+ module SchemaDumper
30
+ module Function
31
+ def tables(stream)
32
+ functions(stream)
33
+ super
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ end