kithe 2.0.0.pre.alpha2 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -4
  3. data/app/indexing/kithe/indexable/record_index_updater.rb +1 -1
  4. data/app/jobs/kithe/create_derivatives_job.rb +2 -2
  5. data/app/models/kithe/asset.rb +82 -154
  6. data/app/models/kithe/asset/derivative_creator.rb +32 -62
  7. data/app/models/kithe/asset/derivative_definition.rb +12 -13
  8. data/app/models/kithe/asset/set_shrine_uploader.rb +64 -0
  9. data/app/models/kithe/collection.rb +0 -6
  10. data/app/models/kithe/model.rb +0 -21
  11. data/app/models/kithe/work.rb +0 -5
  12. data/app/uploaders/kithe/asset_uploader.rb +15 -78
  13. data/lib/kithe.rb +22 -20
  14. data/{app/models → lib}/kithe/config_base.rb +6 -1
  15. data/lib/kithe/engine.rb +14 -3
  16. data/lib/kithe/indexable_settings.rb +1 -1
  17. data/lib/kithe/patch_fx.rb +39 -0
  18. data/lib/kithe/version.rb +4 -1
  19. data/lib/shrine/plugins/kithe_checksum_signatures.rb +41 -0
  20. data/lib/shrine/plugins/kithe_controllable_backgrounding.rb +53 -0
  21. data/lib/shrine/plugins/kithe_derivative_definitions.rb +101 -0
  22. data/lib/shrine/plugins/kithe_derivatives.rb +54 -0
  23. data/lib/shrine/plugins/kithe_determine_mime_type.rb +39 -0
  24. data/lib/shrine/plugins/kithe_persisted_derivatives.rb +161 -0
  25. data/lib/shrine/plugins/kithe_promotion_callbacks.rb +4 -0
  26. data/lib/shrine/plugins/kithe_promotion_directives.rb +33 -3
  27. data/lib/shrine/plugins/kithe_storage_location.rb +53 -4
  28. data/lib/tasks/kithe_tasks.rake +22 -15
  29. data/spec/dummy/app/models/plain_active_record.rb +3 -0
  30. data/spec/dummy/config/database.yml +6 -0
  31. data/spec/dummy/db/schema.rb +102 -0
  32. data/spec/dummy/log/development.log +3616 -0
  33. data/spec/dummy/log/test.log +86464 -0
  34. data/spec/dummy/tmp/development_secret.txt +1 -1
  35. data/spec/indexing/indexable_spec.rb +1 -1
  36. data/spec/models/kithe/asset/asset_derivatives_spec.rb +137 -0
  37. data/spec/models/kithe/asset/asset_promotion_hooks_spec.rb +26 -5
  38. data/spec/models/kithe/asset/set_shrine_uploader_spec.rb +39 -0
  39. data/spec/models/kithe/asset_spec.rb +9 -59
  40. data/spec/models/kithe/model_spec.rb +0 -32
  41. data/spec/models/kithe_spec.rb +10 -0
  42. data/spec/shrine/kithe_accept_remote_url_spec.rb +49 -0
  43. data/spec/shrine/kithe_checksum_signatures_spec.rb +63 -0
  44. data/spec/shrine/kithe_derivative_definitions_spec.rb +303 -0
  45. data/spec/shrine/kithe_persisted_derivatives_spec.rb +424 -0
  46. data/spec/shrine/kithe_storage_location_spec.rb +43 -15
  47. data/spec/spec_helper.rb +0 -19
  48. data/spec/test_support/images/3x3_pixel.jpg +0 -0
  49. data/spec/test_support/shrine_spec_support.rb +2 -1
  50. metadata +60 -36
  51. data/app/models/kithe/asset/derivative_updater.rb +0 -119
  52. data/app/models/kithe/derivative.rb +0 -15
  53. data/app/uploaders/kithe/derivative_uploader.rb +0 -48
  54. data/spec/dummy/db/structure.sql +0 -309
  55. data/spec/models/kithe/asset/asset_create_derivatives_spec.rb +0 -320
  56. data/spec/models/kithe/derivative_spec.rb +0 -168
@@ -2,38 +2,37 @@
2
2
  # class, it's what's created when you call Kithe::Asset#define_derivative
3
3
  class Kithe::Asset::DerivativeDefinition
4
4
  attr_reader :key, :content_type, :default_create, :proc, :storage_key
5
- def initialize(key:, storage_key:, proc:, content_type: nil, default_create: true)
6
- @key = key
5
+ def initialize(key:, proc:, content_type: nil, default_create: true)
6
+ @key = key.to_sym
7
7
  @content_type = content_type
8
- @storage_key = storage_key
9
8
  @default_create = default_create
10
9
  @proc = proc
11
10
  end
12
11
 
13
- def call(original_file:,record:)
14
- if proc_accepts_record_keyword?
15
- proc.call(original_file, record: record)
12
+ def call(original_file:,attacher:)
13
+ if proc_accepts_keyword?(:attacher)
14
+ proc.call(original_file, attacher: attacher)
16
15
  else
17
16
  proc.call(original_file)
18
17
  end
19
18
  end
20
19
 
21
20
  # Do content-type restrictions defined for this definition match a given asset?
22
- def applies_to?(asset)
21
+ def applies_to_content_type?(original_content_type)
23
22
  return true if content_type.nil?
24
23
 
25
- return true if content_type == asset.content_type
24
+ return true if content_type == original_content_type
26
25
 
27
- return false if asset.content_type.nil?
26
+ return false if original_content_type.nil?
28
27
 
29
- return true if (content_type.kind_of?(Array) && content_type.include?(asset.content_type))
28
+ return true if (content_type.kind_of?(Array) && content_type.include?(original_content_type))
30
29
 
31
- content_type == asset.content_type.sub(%r{/.+\Z}, '')
30
+ content_type == original_content_type.sub(%r{/.+\Z}, '')
32
31
  end
33
32
 
34
33
  private
35
34
 
36
- def proc_accepts_record_keyword?
37
- proc.parameters.include?([:key, :record]) || proc.parameters.include?([:keyreq, :record])
35
+ def proc_accepts_keyword?(kwarg)
36
+ proc.parameters.include?([:key, kwarg]) || proc.parameters.include?([:keyreq, kwarg]) || proc.parameters.find {|a| a.first == :keyrest}
38
37
  end
39
38
  end
@@ -0,0 +1,64 @@
1
+ # Our Kithe::Asset model class is meant to be a superclass of a local application asset class, which we
2
+ # can call `Asset`, although an app can call it whatever they like.
3
+ #
4
+ # Kithe::Asset sets it's own shrine uploader class, with a typical shrine:
5
+ #
6
+ # include Kithe::AssetUploader::Attachment(:file)
7
+ #
8
+ # An application Asset subclass will inherit this uploader, which is convenient for getting
9
+ # started quickly. But an application will likely want to define its own local uploader
10
+ # class, to define it's own metadata, derivatives, and any other custom beahvior.
11
+ #
12
+ # There isn't an obvious built-into-shrine way to do that, but it turns out simply overriding
13
+ # class and instance `*_attacher` methods seems to work out well. See:
14
+ # https://discourse.shrinerb.com/t/model-sub-classes-with-uploader-sub-classes/208
15
+ #
16
+ # So a local application can define it's own shrine uploader, which is highly recommended to
17
+ # be a sub-class of Kithe::AssetUploader to ensure it has required and useful
18
+ # Kithe behavior:
19
+ #
20
+ # # ./app/uploaders/asset_uploader.rb
21
+ # class AssetUploader < Kithe::AssetUploader
22
+ # # maybe we want some custom metadata
23
+ # add_metadata :something do |io|
24
+ # whatever
25
+ # end
26
+ # end
27
+ #
28
+ # And then set it in ti's custom local Asset class:
29
+ #
30
+ # # ./app/models/asset.rb
31
+ # class Asset < Kithe::Asset
32
+ # set_shrine_uploader(AssetUploader)
33
+ # end
34
+ #
35
+ # If a local app has it's own inheritance hieararchy of children below that (eg) Asset class,
36
+ # they can each (optionally) also override with a custom Uploader. It is recommended that
37
+ # the Uploader inheritance hieararchy match the model inheritance hieararchy, to have
38
+ # all behavior consistent. For instance:
39
+ #
40
+ # class AudioAssetUploader < AssetUploader
41
+ # end
42
+ #
43
+ # class AudioAsset < Asset
44
+ # set_shrine_uploader(AudioAssetUploader)
45
+ # end
46
+ #
47
+ module Kithe::Asset::SetShrineUploader
48
+ extend ActiveSupport::Concern
49
+
50
+ class_methods do
51
+ def set_shrine_uploader(uploader_class)
52
+ subclass_attachment = uploader_class::Attachment.new(:file)
53
+
54
+ define_singleton_method :file_attacher do |**options|
55
+ subclass_attachment.send(:class_attacher, **options)
56
+ end
57
+
58
+ define_method :file_attacher do |**options|
59
+ subclass_attachment.send(:attacher, self, **options)
60
+ end
61
+ end
62
+ end
63
+
64
+ end
@@ -1,14 +1,8 @@
1
1
  class Kithe::Collection < Kithe::Model
2
- # Collections don't have derivatives, but we want to allow Rails eager loading
3
- # of association on hetereogenous fetches of Kithe::Model, so this is clever.
4
- has_many :derivatives, -> { none }
5
- private :derivatives, :derivatives=, :derivative_ids, :derivative_ids=
6
-
7
2
  after_initialize do
8
3
  self.kithe_model_type = "collection" if self.kithe_model_type.nil?
9
4
  end
10
5
  before_validation do
11
6
  self.kithe_model_type = "collection" if self.kithe_model_type.nil?
12
7
  end
13
-
14
8
  end
@@ -10,15 +10,6 @@ class Kithe::Model < ActiveRecord::Base
10
10
  include AttrJson::Record::Dirty
11
11
  include Kithe::Indexable
12
12
 
13
- # A handy scope for eager-loading all representatives and all of their derivatives.
14
- #
15
- # Works on hetereogenous collections of Works and Assets -- the Assets need
16
- # :derivatives directly referenced (since they don't really have a leaf_representative assoc),
17
- # the works need :leaf_representative => :derivatives.
18
- #
19
- # Loading all three of these on result sets of hundreds of values is still relatively quick.
20
- scope :with_representative_derivatives, -> { includes(:derivatives, leaf_representative: :derivatives) }
21
-
22
13
  # While Rails STI means the actual specific class is in `type`, sometimes
23
14
  # it can be convenient to fetch on a top category of Kithe::Model without using
24
15
  # Rails STI.
@@ -100,18 +91,6 @@ class Kithe::Model < ActiveRecord::Base
100
91
  in_memory
101
92
  end
102
93
 
103
- # hacky :(
104
- def derivatives(*args)
105
- raise TypeError.new("Only valid on Kithe::Asset") unless self.kind_of?(Kithe::Asset)
106
- super
107
- end
108
- # hacky :(
109
- def derivatives=(*args)
110
- raise TypeError.new("Only valid on Kithe::Asset") unless self.kind_of?(Kithe::Asset)
111
- super
112
- end
113
-
114
-
115
94
  # insist that leaf_representative is an Asset, otherwise return nil.
116
95
  # nil means there is no _asset_ leaf, and lets caller rely on leaf being
117
96
  # an asset.
@@ -1,9 +1,4 @@
1
1
  class Kithe::Work < Kithe::Model
2
- # Works don't have derivatives, but we want to allow Rails eager loading
3
- # of association on hetereogenous fetches of Kithe::Model, so this is clever.
4
- has_many :derivatives, -> { none }
5
- private :derivatives, :derivatives=, :derivative_ids, :derivative_ids=
6
-
7
2
  after_initialize do
8
3
  self.kithe_model_type = "work" if self.kithe_model_type.nil?
9
4
  end
@@ -20,7 +20,7 @@ module Kithe
20
20
  # FUTURE: Look at using client-side-calculated checksums to verify end-to-end.
21
21
  # https://github.com/shrinerb/shrine/wiki/Using-Checksums-in-Direct-Uploads
22
22
  #
23
- # When magicc-byte analyzer can't determine mime type, will fall back to `mediainfo`
23
+ # When magic-byte analyzer can't determine mime type, will fall back to `mediainfo`
24
24
  # CLI _if_ `Kithe.use_mediainfo` is true (defaults to true if mediainfo CLI is
25
25
  # available). (We need better ways to customize uploader.)
26
26
  class AssetUploader < Shrine
@@ -34,100 +34,37 @@ module Kithe
34
34
  # promotion, possibly in the background.
35
35
  plugin :refresh_metadata
36
36
 
37
- # Marcel analyzer is pure-ruby and fast. It's from Basecamp and is what
38
- # ActiveStorage uses. It is very similar to :mimemagic (and uses mimemagic
39
- # under the hood), but mimemagic seems not to be maintained with up to date
40
- # magic db? https://github.com/minad/mimemagic/pull/66
41
- plugin :determine_mime_type, analyzer: -> (io, analyzers) do
42
- mime_type = analyzers[:marcel].call(io)
43
-
44
- # But marcel is not able to catch some of our MP3s as audio/mpeg,
45
- # let's try mediainfo command line. mediainfo is one of the tools
46
- # the Harvard Fits tool uses. https://github.com/MediaArea/MediaInfo
47
- if Kithe.use_mediainfo && mime_type == "application/octet-stream" || mime_type.blank?
48
- mime_type = Kithe::MediainfoAnalyzer.new.call(io)
49
- end
50
-
51
- mime_type = "application/octet-stream" if mime_type.blank?
52
-
53
- mime_type
54
- end
55
-
56
37
  # Will save height and width to metadata for image types. (Won't for non-image types)
57
38
  # ignore errors (often due to storing a non-image file), consistent with shrine 2.x behavior.
58
39
  plugin :store_dimensions, on_error: :ignore
59
40
 
60
- # promotion and deletion will (sometimes) be in background.
61
- plugin :backgrounding
62
-
63
- # Useful in case consumers want it, and doesn't harm anything to be available.
64
- # https://github.com/shrinerb/shrine/blob/master/doc/plugins/rack_response.md
65
- plugin :rack_response
41
+ plugin :infer_extension, inferrer: :mini_mime
66
42
 
67
- # Normally we promote in background with backgrounding, but the set_promotion_directives
68
- # feature can be used to make promotion not happen at all, or happen in foreground.
69
- # asset.file_attacher.set_promotion_directives(promote: false)
70
- # asset.file_attacher.set_promotion_directives(promote: "inline")
71
- Attacher.promote_block do
72
- Kithe::TimingPromotionDirective.new(key: :promote, directives: self.promotion_directives) do |directive|
73
- if directive.inline?
74
- promote
75
- elsif directive.background?
76
- # What shrine normally expects for backgrounding, plus promotion_directives
77
- Kithe::AssetPromoteJob.perform_later(self.class.name, record.class.name, record.id, name.to_s, file_data, self.promotion_directives)
78
- end
79
- end
80
- end
43
+ # Just leave it here for otheres please
44
+ plugin :add_metadata
81
45
 
82
- # Delete using shrine backgrounding, but can be effected
83
- # by promotion_directives[:delete], similar to promotion above.
84
- # Yeah, not really a "promotion" directive, oh well.
85
- Attacher.destroy_block do
86
- Kithe::TimingPromotionDirective.new(key: :delete, directives: self.promotion_directives) do |directive|
87
- if directive.inline?
88
- destroy
89
- elsif directive.background?
90
- # What shrine normally expects for backgrounding
91
- Kithe::AssetDeleteJob.perform_later(self.class.name, data)
92
- end
93
- end
94
- end
95
46
 
96
- plugin :add_metadata
97
47
 
98
- # Makes files stored as /asset/#{asset_pk}/#{random_uuid}.#{original_suffix}
99
- plugin :kithe_storage_location
100
48
 
101
- # Allows you to assign hashes like:
102
- # { "id" => "http://url", "storage" => "remote_url", headers: { "Authorization" => "Bearer whatever"}}
103
- # (headers optional), for fetching remote urls on promotion. Useful with browse-everything.
104
- # WARNING: There's no whitelist, will accept any url. Is this a problem?
105
- plugin :kithe_accept_remote_url
49
+ # kithe-standard logic for sniffing mime type.
50
+ plugin :kithe_determine_mime_type
106
51
 
107
- # We want to store md5 and sha1 checksums (legacy compat), as well as
108
- # sha512 (more recent digital preservation recommendation: https://ocfl.io/draft/spec/#digests)
109
- #
110
- # We only calculate them on `store` action to avoid double-computation, and because for
111
- # direct uploads/backgrounding, we haven't actually gotten the file in our hands to compute
112
- # checksums until then anyway.
113
- plugin :signature
114
- add_metadata do |io, context|
115
- if context[:action] != :cache
116
- {
117
- md5: calculate_signature(io, :md5),
118
- sha1: calculate_signature(io, :sha1),
119
- sha512: calculate_signature(io, :sha512)
120
- }
121
- end
122
- end
123
- metadata_method :md5, :sha1, :sha512
52
+ # Determines storage path/location/id, so files will be stored as:
53
+ # /asset/#{asset_pk}/#{random_uuid}.#{original_suffix}
54
+ plugin :kithe_storage_location
124
55
 
56
+ # Set up logic for shrine backgrounding, which in kithe can be set by promotion_directives
57
+ plugin :kithe_controllable_backgrounding
125
58
 
126
59
  # Gives us (set_)promotion_directives methods on our attacher to
127
60
  # house lifecycle directives, about whether promotion, deletion,
128
61
  # derivatives happen in foreground, background, or not at all.
129
62
  plugin :kithe_promotion_directives
130
63
 
64
+ # Makes our before/after promotion callbacks get called.
131
65
  plugin :kithe_promotion_callbacks
66
+
67
+ # some configuration and convenience methods for shrine derivatives.
68
+ plugin :kithe_derivatives
132
69
  end
133
70
  end
@@ -68,28 +68,30 @@ module Kithe
68
68
  # The settings need to live here not in Kithe::Indexable, to avoid terrible
69
69
  # Rails dev-mode class-reloading weirdnesses. This module is not reloaded.
70
70
  class << self
71
- attr_accessor :indexable_settings
71
+ attr_writer :indexable_settings
72
72
  end
73
- self.indexable_settings = IndexableSettings.new(
74
- solr_url: "http://localhost:8983/solr/default",
75
- model_name_solr_field: "model_name_ssi",
76
- solr_id_value_attribute: "id",
77
- writer_class_name: "Traject::SolrJsonWriter",
78
- writer_settings: {
79
- # as default we tell the solrjsonwriter to use no threads,
80
- # no batching. softCommit on every update. Least surprising
81
- # default configuration.
82
- "solr_writer.thread_pool" => 0,
83
- "solr_writer.batch_size" => 1,
84
- "solr_writer.solr_update_args" => { softCommit: true },
85
- "solr_writer.http_timeout" => 3,
86
- "logger" => Rails.logger,
73
+ def self.indexable_settings
74
+ @indexable_settings ||= IndexableSettings.new(
75
+ solr_url: "http://localhost:8983/solr/default",
76
+ model_name_solr_field: "model_name_ssi",
77
+ solr_id_value_attribute: "id",
78
+ writer_class_name: "Traject::SolrJsonWriter",
79
+ writer_settings: {
80
+ # as default we tell the solrjsonwriter to use no threads,
81
+ # no batching. softCommit on every update. Least surprising
82
+ # default configuration.
83
+ "solr_writer.thread_pool" => 0,
84
+ "solr_writer.batch_size" => 1,
85
+ "solr_writer.solr_update_args" => { softCommit: true },
86
+ "solr_writer.http_timeout" => 3,
87
+ "logger" => Rails.logger,
87
88
 
88
- # MAYBE? no skippable exceptions please
89
- # "solr_writer.skippable_exceptions" => []
90
- },
91
- disable_callbacks: false
92
- )
89
+ # MAYBE? no skippable exceptions please
90
+ # "solr_writer.skippable_exceptions" => []
91
+ },
92
+ disable_callbacks: false
93
+ )
94
+ end
93
95
 
94
96
  class << self
95
97
  # Currently used by Kithe::AssetUploader, a bit of a hacky
@@ -9,7 +9,7 @@ module Kithe
9
9
  #
10
10
  # You may also want to consider [railsconfig](https://github.com/railsconfig/config)
11
11
  #
12
- # Kithe::Config:
12
+ # Kithe::ConfigBase:
13
13
  #
14
14
  # * uses an explicit declared list of allowable config keys, no silent typos
15
15
  # * can read from a local YAML file or ENV, by default letting ENV override local YAML file values.
@@ -97,6 +97,11 @@ module Kithe
97
97
  # This doesn't use any locking for concurrent initial loads, which is technically not
98
98
  # great, but probably shouldn't be a problem in practice, especially in MRI. Trying to
99
99
  # do proper locking with lazy load was too hard for me right now.
100
+ #
101
+ # ## Auto-loading
102
+ #
103
+ # This is intentionally NOT in an auto-loaded directory, so it can be used more
104
+ # easily in Rails initialization without problems. https://github.com/rails/rails/issues/40904
100
105
  class ConfigBase
101
106
  include Singleton
102
107
 
@@ -1,5 +1,19 @@
1
1
  require 'shrine'
2
2
 
3
+ # Gem "F(x)" or `fx` gem will get schema.rb to include locally-defined custom postgres functions
4
+ # and triggers, like we use. So apps can keep using schema.rb instead of structure.sql,
5
+ # and still have our custom functions preserved. We need to require it explicitly
6
+ # since it'll be an indirect dependency of the end app.
7
+ #
8
+ # But we need to patch it to create functions first so we can use them as default values
9
+ # https://github.com/teoljungberg/fx/issues/33
10
+ # https://github.com/teoljungberg/fx/pull/53
11
+ require 'fx'
12
+ require 'kithe/patch_fx'
13
+
14
+ # not auto-loaded, let's just load it for backwards compat though
15
+ require "kithe/config_base"
16
+
3
17
  module Kithe
4
18
  class Engine < ::Rails::Engine
5
19
  config.generators do |g|
@@ -8,8 +22,5 @@ module Kithe
8
22
  g.assets false
9
23
  g.helper false
10
24
  end
11
-
12
- # should only affect kithe development
13
- config.active_record.schema_format = :sql
14
25
  end
15
26
  end
@@ -8,7 +8,7 @@ module Kithe
8
8
  @writer_class_name = writer_class_name
9
9
  @writer_settings = writer_settings
10
10
  @model_name_solr_field = model_name_solr_field
11
- @solr_id_value_attribute = solr_id_value_attribute
11
+ @solr_id_value_attribute = solr_id_value_attribute || 'id'
12
12
  end
13
13
 
14
14
  # Use configured solr_url, and merge together with configured
@@ -0,0 +1,39 @@
1
+ # fx is a gem that lets Rails schema.rb capture postgres functions and triggers
2
+ #
3
+ # For it to work for our use case, we need it to define functions BEFORE tables when
4
+ # doing a `rake db:schema:load`, so we can refer to functions as default values in our
5
+ # tables.
6
+ #
7
+ # This is a known issue in fx, with a PR, but isn't yet merged/released, so we hack
8
+ # in a patch to force it. Better than forking.
9
+ #
10
+ # Based on: https://github.com/teoljungberg/fx/pull/53/
11
+ #
12
+ # We try to write future-compat code assuming that will be merged eventually....
13
+
14
+ require 'fx'
15
+
16
+ if Fx.configuration.respond_to?(:dump_functions_at_beginning_of_schema)
17
+ # we have the feature!
18
+
19
+ Fx.configure do |config|
20
+ config.dump_functions_at_beginning_of_schema = true
21
+ end
22
+
23
+ else
24
+ # Fx does not have the feature, we have to patch it in
25
+
26
+ require 'fx/schema_dumper/function'
27
+
28
+ module Fx
29
+ module SchemaDumper
30
+ module Function
31
+ def tables(stream)
32
+ functions(stream)
33
+ super
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ end