curation_concerns-models 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/Rakefile +1 -1
  4. data/app/actors/concerns/curation_concerns/manages_embargoes_actor.rb +11 -19
  5. data/app/actors/curation_concerns/base_actor.rb +41 -45
  6. data/app/actors/curation_concerns/embargo_actor.rb +19 -0
  7. data/app/actors/curation_concerns/file_set_actor.rb +200 -0
  8. data/app/actors/curation_concerns/lease_actor.rb +19 -0
  9. data/app/actors/curation_concerns/work_actor_behavior.rb +55 -58
  10. data/app/indexers/curation_concerns/collection_indexer.rb +10 -0
  11. data/app/indexers/curation_concerns/file_set_indexing_service.rb +24 -0
  12. data/app/{services/curation_concerns/generic_work_indexing_service.rb → indexers/curation_concerns/work_indexing_service.rb} +6 -6
  13. data/app/jobs/active_fedora_id_based_job.rb +5 -12
  14. data/app/jobs/audit_job.rb +11 -17
  15. data/app/jobs/characterize_job.rb +8 -7
  16. data/app/jobs/create_derivatives_job.rb +8 -11
  17. data/app/jobs/import_url_job.rb +12 -25
  18. data/app/jobs/ingest_file_job.rb +16 -0
  19. data/app/jobs/ingest_local_file_job.rb +14 -35
  20. data/app/jobs/resolrize_job.rb +3 -5
  21. data/app/jobs/upload_set_update_job.rb +68 -0
  22. data/app/models/checksum_audit_log.rb +2 -3
  23. data/app/models/concerns/curation_concerns/ability.rb +18 -10
  24. data/app/models/concerns/curation_concerns/basic_metadata.rb +1 -3
  25. data/app/models/concerns/curation_concerns/collection_behavior.rb +13 -14
  26. data/app/models/concerns/curation_concerns/file_set/belongs_to_upload_sets.rb +15 -0
  27. data/app/models/concerns/curation_concerns/{generic_file → file_set}/belongs_to_works.rb +8 -14
  28. data/app/models/concerns/curation_concerns/file_set/derivatives.rb +54 -0
  29. data/app/models/concerns/curation_concerns/{generic_file → file_set}/full_text_indexing.rb +1 -2
  30. data/app/models/concerns/curation_concerns/{generic_file → file_set}/indexing.rb +2 -2
  31. data/app/models/concerns/curation_concerns/{generic_file → file_set}/versions.rb +2 -3
  32. data/app/models/concerns/curation_concerns/file_set_behavior.rb +36 -0
  33. data/app/models/concerns/curation_concerns/generic_file.rb +1 -1
  34. data/app/models/concerns/curation_concerns/has_representative.rb +6 -7
  35. data/app/models/concerns/curation_concerns/human_readable_type.rb +5 -7
  36. data/app/models/concerns/curation_concerns/permissions.rb +2 -2
  37. data/app/models/concerns/curation_concerns/permissions/readable.rb +0 -1
  38. data/app/models/concerns/curation_concerns/permissions/writable.rb +10 -51
  39. data/app/models/concerns/curation_concerns/serializers.rb +3 -5
  40. data/app/models/concerns/curation_concerns/solr_document_behavior.rb +37 -40
  41. data/app/models/concerns/curation_concerns/upload_set_behavior.rb +38 -0
  42. data/app/models/concerns/curation_concerns/user.rb +4 -51
  43. data/app/models/concerns/curation_concerns/with_file_sets.rb +28 -0
  44. data/app/models/concerns/curation_concerns/{generic_work_behavior.rb → work_behavior.rb} +12 -6
  45. data/app/models/curation_concerns/classify_concern.rb +7 -7
  46. data/app/models/curation_concerns/quick_classification_query.rb +6 -7
  47. data/app/models/single_use_link.rb +34 -0
  48. data/app/models/upload_set.rb +3 -0
  49. data/app/services/curation_concerns/derivative_path.rb +32 -0
  50. data/app/services/curation_concerns/{generic_file_audit_service.rb → file_set_audit_service.rb} +17 -18
  51. data/app/services/curation_concerns/indexes_thumbnails.rb +14 -0
  52. data/app/services/curation_concerns/local_file_service.rb +10 -0
  53. data/app/services/curation_concerns/lock_manager.rb +40 -0
  54. data/app/services/curation_concerns/noid.rb +1 -1
  55. data/app/services/curation_concerns/persist_derivatives.rb +33 -0
  56. data/app/services/curation_concerns/persist_directly_contained_output_file_service.rb +26 -0
  57. data/app/services/curation_concerns/repository_audit_service.rb +1 -3
  58. data/app/services/curation_concerns/thumbnail_path_service.rb +46 -0
  59. data/app/services/curation_concerns/time_service.rb +7 -0
  60. data/app/services/curation_concerns/versioning_service.rb +11 -12
  61. data/curation_concerns-models.gemspec +6 -6
  62. data/lib/curation_concerns/configuration.rb +154 -0
  63. data/lib/curation_concerns/messages.rb +26 -26
  64. data/lib/curation_concerns/models.rb +5 -14
  65. data/lib/curation_concerns/models/engine.rb +0 -30
  66. data/lib/curation_concerns/models/utils.rb +4 -4
  67. data/lib/curation_concerns/models/version.rb +1 -1
  68. data/lib/generators/curation_concerns/models/abstract_migration_generator.rb +8 -7
  69. data/lib/generators/curation_concerns/models/clamav_generator.rb +3 -3
  70. data/lib/generators/curation_concerns/models/install_generator.rb +13 -20
  71. data/lib/generators/curation_concerns/models/templates/app/models/file_set.rb +4 -0
  72. data/lib/generators/curation_concerns/models/templates/config/clamav.rb +1 -1
  73. data/lib/generators/curation_concerns/models/templates/config/curation_concerns.rb +52 -65
  74. data/lib/generators/curation_concerns/models/templates/config/redis_config.rb +13 -17
  75. data/lib/generators/curation_concerns/models/templates/config/resque_config.rb +2 -1
  76. data/lib/generators/curation_concerns/models/templates/migrations/create_checksum_audit_logs.rb +3 -3
  77. data/lib/generators/curation_concerns/models/templates/migrations/create_single_use_links.rb +12 -0
  78. data/lib/tasks/curation_concerns-models_tasks.rake +4 -62
  79. data/lib/tasks/migrate.rake +1 -1
  80. data/lib/tasks/resque.rake +1 -0
  81. data/lib/tasks/solr_reindex.rake +1 -1
  82. metadata +59 -52
  83. data/app/actors/curation_concerns/generic_file_actor.rb +0 -150
  84. data/app/jobs/active_fedora_pid_based_job.rb +0 -6
  85. data/app/jobs/copy_permissions_job.rb +0 -24
  86. data/app/models/concerns/curation_concerns/generic_file/characterization.rb +0 -89
  87. data/app/models/concerns/curation_concerns/generic_file/content.rb +0 -8
  88. data/app/models/concerns/curation_concerns/generic_file/export.rb +0 -343
  89. data/app/models/concerns/curation_concerns/generic_file_behavior.rb +0 -44
  90. data/app/models/concerns/curation_concerns/with_basic_metadata.rb +0 -98
  91. data/app/models/concerns/curation_concerns/with_generic_files.rb +0 -29
  92. data/app/models/datastreams/fits_datastream.rb +0 -148
  93. data/app/services/curation_concerns/characterization_service.rb +0 -71
  94. data/app/services/curation_concerns/full_text_extraction_service.rb +0 -38
  95. data/app/services/curation_concerns/generic_file_indexing_service.rb +0 -14
  96. data/lib/curation_concerns/models/resque.rb +0 -36
  97. data/lib/generators/curation_concerns/models/fulltext_generator.rb +0 -28
  98. data/lib/generators/curation_concerns/models/templates/app/models/generic_file.rb +0 -4
  99. data/lib/generators/curation_concerns/models/templates/config/resque_admin.rb +0 -10
@@ -1,9 +1,7 @@
1
- class ResolrizeJob
2
- def queue_name
3
- :resolrize
4
- end
1
+ class ResolrizeJob < ActiveJob::Base
2
+ queue_as :resolrize
5
3
 
6
- def run
4
+ def perform
7
5
  ActiveFedora::Base.reindex_everything
8
6
  end
9
7
  end
@@ -0,0 +1,68 @@
1
+ class UploadSetUpdateJob < ActiveJob::Base
2
+ include Hydra::PermissionsQuery
3
+ include CurationConcerns::Messages
4
+
5
+ queue_as :upload_set_update
6
+
7
+ attr_accessor :login, :title, :file_attributes, :upload_set_id, :visibility, :saved, :denied, :work_attributes
8
+
9
+ def perform(login, upload_set_id, title, file_attributes, visibility)
10
+ @login = login
11
+ @title = title || {}
12
+ @file_attributes = file_attributes
13
+ @visibility = visibility
14
+ @work_attributes = file_attributes.merge(visibility: visibility)
15
+ @upload_set_id = upload_set_id
16
+ @saved = []
17
+ @denied = []
18
+
19
+ upload_set = UploadSet.find_or_create(self.upload_set_id)
20
+ user = User.find_by_user_key(self.login)
21
+
22
+ upload_set.file_sets.each do |file|
23
+ update_file(file, user)
24
+ end
25
+
26
+ upload_set.update(status: ["Complete"])
27
+
28
+ if denied.empty?
29
+ unless saved.empty?
30
+ if CurationConcerns.config.callback.set?(:after_upload_set_update_success)
31
+ login = upload_set.depositor
32
+ user = User.find_by_user_key(login)
33
+ CurationConcerns.config.callback.run(:after_upload_set_update_success, user, upload_set, log.created_at)
34
+ end
35
+ return true
36
+ end
37
+ else
38
+ if CurationConcerns.config.callback.set?(:after_upload_set_update_failure)
39
+ login = upload_set.depositor
40
+ user = User.find_by_user_key(login)
41
+ CurationConcerns.config.callback.run(:after_upload_set_update_failure. user, upload_set, log.created_at)
42
+ end
43
+ return false
44
+ end
45
+ end
46
+
47
+ def update_file(file, user)
48
+ unless user.can? :edit, file
49
+ ActiveFedora::Base.logger.error "User #{user.user_key} DENIED access to #{file.id}!"
50
+ denied << file
51
+ return
52
+ end
53
+ # update the file using the actor after setting the title
54
+ file.title = title[file.id] if title[file.id]
55
+ CurationConcerns::FileSetActor.new(file, user).update_metadata(file_attributes.merge(visibility: visibility))
56
+
57
+ # update the work to the same metadata as the file.
58
+ # NOTE: For the moment we are assuming copied metadata. This is likely to change.
59
+ # NOTE2: TODO: stop assuming that files only belong to one work
60
+ work = file.in_works.first
61
+ unless work.nil?
62
+ work.title = title[file.id] if title[file.id]
63
+ CurationConcerns::GenericWorkActor.new(work, user, work_attributes).update
64
+ end
65
+
66
+ saved << file
67
+ end
68
+ end
@@ -1,7 +1,6 @@
1
1
  class ChecksumAuditLog < ActiveRecord::Base
2
-
3
2
  def self.get_audit_log(id, path, version_uri)
4
- ChecksumAuditLog.find_or_create_by(generic_file_id: id, file_id: path, version: version_uri)
3
+ ChecksumAuditLog.find_or_create_by(file_set_id: id, file_id: path, version: version_uri)
5
4
  end
6
5
 
7
6
  # Check to see if there are previous passing logs that we can delete
@@ -16,6 +15,6 @@ class ChecksumAuditLog < ActiveRecord::Base
16
15
  end
17
16
 
18
17
  def self.logs_for(id, path)
19
- ChecksumAuditLog.where(generic_file_id: id, file_id: path).order('created_at desc, id desc')
18
+ ChecksumAuditLog.where(file_set_id: id, file_id: path).order('created_at desc, id desc')
20
19
  end
21
20
  end
@@ -6,29 +6,37 @@ module CurationConcerns
6
6
  end
7
7
 
8
8
  def curation_concerns_permissions
9
-
10
9
  unless current_user.new_record?
11
10
  can :create, CurationConcerns::ClassifyConcern
12
- # TODO: Shouldn't this be in `everyone_can_create_curation_concerns` ?
13
- can :create, ::GenericFile
14
11
  end
15
12
 
16
- if user_groups.include? 'admin'
17
- can [:create, :discover, :show, :read, :edit, :update, :destroy], :all
13
+ # user can version if they can edit
14
+ alias_action :versions, to: :update
15
+
16
+ if admin?
17
+ admin_permissions
18
+ else
19
+ cannot :index, Hydra::AccessControls::Embargo
20
+ cannot :index, Hydra::AccessControls::Lease
18
21
  end
19
22
 
20
23
  can :collect, :all
24
+ end
25
+
26
+ def admin_permissions
27
+ can [:create, :discover, :show, :read, :edit, :update, :destroy], :all
28
+ end
21
29
 
30
+ def admin?
31
+ user_groups.include? 'admin'
22
32
  end
23
33
 
24
34
  # Add this to your ability_logic if you want all logged in users to be able
25
35
  # to submit content
26
36
  def everyone_can_create_curation_concerns
27
- unless current_user.new_record?
28
- can :create, [ CurationConcerns.configuration.curation_concerns ]
29
- can :create, ::Collection
30
- end
37
+ return if current_user.new_record?
38
+ can :create, [::FileSet, ::Collection]
39
+ can :create, [CurationConcerns.config.curation_concerns]
31
40
  end
32
-
33
41
  end
34
42
  end
@@ -3,10 +3,9 @@ module CurationConcerns
3
3
  extend ActiveSupport::Concern
4
4
 
5
5
  included do
6
-
7
6
  property :label, predicate: ActiveFedora::RDF::Fcrepo::Model.downloadFilename, multiple: false
8
7
 
9
- property :depositor, predicate: ::RDF::URI.new("http://id.loc.gov/vocabulary/relators/dpt"), multiple: false do |index|
8
+ property :depositor, predicate: ::RDF::URI.new('http://id.loc.gov/vocabulary/relators/dpt'), multiple: false do |index|
10
9
  index.as :symbol, :stored_searchable
11
10
  end
12
11
 
@@ -82,6 +81,5 @@ module CurationConcerns
82
81
  index.as :stored_searchable
83
82
  end
84
83
  end
85
-
86
84
  end
87
85
  end
@@ -14,34 +14,33 @@ module CurationConcerns
14
14
  end
15
15
 
16
16
  def add_member(collectible)
17
- if can_add_to_members?(collectible)
18
- self.members << collectible
19
- save
20
- end
17
+ return unless can_add_to_members?(collectible)
18
+ members << collectible
19
+ save
21
20
  end
22
21
 
23
22
  def to_s
24
- title.present? ? title : "No Title"
23
+ title.present? ? title : 'No Title'
25
24
  end
26
25
 
27
26
  def bytes
28
27
  members.reduce(0) { |sum, gf| sum + gf.content.size.to_i }
29
28
  end
30
29
 
31
- def to_solr(solr_doc={})
32
- super(solr_doc).tap do |solr_doc|
33
- Solrizer.set_field(solr_doc, 'generic_type', human_readable_type, :facetable)
34
- end
30
+ def can_be_member_of_collection?(collection)
31
+ collection != self
35
32
  end
36
33
 
37
- def can_be_member_of_collection?(collection)
38
- collection == self ? false : true
34
+ module ClassMethods
35
+ def indexer
36
+ CurationConcerns::CollectionIndexer
37
+ end
39
38
  end
40
39
 
41
40
  private
42
41
 
43
- def can_add_to_members?(collectible)
44
- collectible.try(:can_be_member_of_collection?, self)
45
- end
42
+ def can_add_to_members?(collectible)
43
+ collectible.try(:can_be_member_of_collection?, self)
44
+ end
46
45
  end
47
46
  end
@@ -0,0 +1,15 @@
1
+ module CurationConcerns
2
+ module FileSet
3
+ module BelongsToUploadSets
4
+ extend ActiveSupport::Concern
5
+ included do
6
+ belongs_to :upload_set, predicate: ActiveFedora::RDF::Fcrepo::RelsExt.isPartOf
7
+ end
8
+
9
+ # Is this file in the middle of being processed by an UploadSet?
10
+ def processing?
11
+ try(:upload_set).try(:status) == ['processing'.freeze]
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,5 +1,5 @@
1
1
  module CurationConcerns
2
- module GenericFile
2
+ module FileSet
3
3
  module BelongsToWorks
4
4
  extend ActiveSupport::Concern
5
5
 
@@ -8,31 +8,30 @@ module CurationConcerns
8
8
  end
9
9
 
10
10
  def generic_works
11
- self.parent_objects # parent_objects is provided by Hydra::PCDM::ObjectBehavior
11
+ in_objects # in_objects is provided by Hydra::PCDM::ObjectBehavior
12
12
  end
13
13
 
14
+ # OPTIMIZE: We can load this from Solr much faster than loading the objects
14
15
  def generic_work_ids
15
- generic_works.map { |work| work.id }
16
+ generic_works.map(&:id)
16
17
  end
17
18
 
18
19
  # Returns the first parent object
19
20
  # This is a hack to handle things like GenericFiles inheriting access controls from their parent. (see CurationConcerns::ParentContainer in app/controllers/concerns/curation_concers/parent_container.rb)
20
21
  def parent
21
- self.parent_objects.first
22
+ in_objects.first
22
23
  end
23
24
 
24
25
  # Returns the id of first parent object
25
26
  # This is a hack to handle things like GenericFiles inheriting access controls from their parent. (see CurationConcerns::ParentContainer in app/controllers/concerns/curation_concers/parent_container.rb)
26
- def parent_id
27
- parent.id
28
- end
27
+ delegate :id, to: :parent, prefix: true
29
28
 
30
29
  # Files with sibling relationships
31
30
  # Returns all GenericFiles aggregated by any of the GenericWorks that aggregate the current object
32
31
  def related_files
33
32
  generic_works = self.generic_works
34
33
  return [] if generic_works.empty?
35
- generic_works.flat_map {|work| work.generic_files.select {|generic_file| generic_file.id != self.id } }
34
+ generic_works.flat_map { |work| work.file_sets.select { |file_set| file_set.id != id } }
36
35
  end
37
36
 
38
37
  # If any parent works are pointing at this object as their representative, remove that pointer.
@@ -40,14 +39,9 @@ module CurationConcerns
40
39
  generic_works = self.generic_works
41
40
  return if generic_works.empty?
42
41
  generic_works.each do |work|
43
- if work.representative == self.id
44
- work.representative = nil
45
- work.save
46
- end
42
+ work.update(representative_id: nil) if work.representative_id == id
47
43
  end
48
44
  end
49
-
50
45
  end
51
46
  end
52
47
  end
53
-
@@ -0,0 +1,54 @@
1
+ module CurationConcerns
2
+ module FileSet
3
+ module Derivatives
4
+ extend ActiveSupport::Concern
5
+
6
+ included do
7
+ Hydra::Derivatives.source_file_service = CurationConcerns::LocalFileService
8
+ Hydra::Derivatives.output_file_service = CurationConcerns::PersistDerivatives
9
+ Hydra::Derivatives::FullTextExtract.output_file_service = CurationConcerns::PersistDirectlyContainedOutputFileService
10
+ end
11
+
12
+ # This completely overrides the version in Hydra::Works so that we
13
+ # read and write to a local file. It's important that characterization runs
14
+ # before derivatives so that we have a credible mime_type field to work with.
15
+ def create_derivatives(filename)
16
+ case mime_type
17
+ when *self.class.pdf_mime_types
18
+ Hydra::Derivatives::PdfDerivatives.create(filename,
19
+ outputs: [{ label: :thumbnail, format: 'jpg', size: '338x493', url: derivative_url('thumbnail') }])
20
+ Hydra::Derivatives::FullTextExtract.create(filename,
21
+ outputs: [{ url: uri, container: "extracted_text" }])
22
+ when *self.class.office_document_mime_types
23
+ Hydra::Derivatives::DocumentDerivatives.create(filename,
24
+ outputs: [{ label: :thumbnail, format: 'jpg',
25
+ size: '200x150>',
26
+ url: derivative_url('thumbnail') }])
27
+ Hydra::Derivatives::FullTextExtract.create(filename,
28
+ outputs: [{ url: uri, container: "extracted_text" }])
29
+ when *self.class.audio_mime_types
30
+ Hydra::Derivatives::AudioDerivatives.create(filename,
31
+ outputs: [{ label: 'mp3', format: 'mp3', url: derivative_url('mp3') },
32
+ { label: 'ogg', format: 'ogg', url: derivative_url('ogg') }])
33
+ when *self.class.video_mime_types
34
+ Hydra::Derivatives::VideoDerivatives.create(filename,
35
+ outputs: [{ label: :thumbnail, format: 'jpg', url: derivative_url('thumbnail') },
36
+ { label: 'webm', format: 'webm', url: derivative_url('webm') },
37
+ { label: 'mp4', format: 'mp4', url: derivative_url('mp4') }])
38
+ when *self.class.image_mime_types
39
+ Hydra::Derivatives::ImageDerivatives.create(filename,
40
+ outputs: [{ label: :thumbnail, format: 'jpg', size: '200x150>', url: derivative_url('thumbnail') }])
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ # The destination_name parameter has to match up with the file parameter
47
+ # passed to the DownloadsController
48
+ def derivative_url(destination_name)
49
+ path = DerivativePath.derivative_path_for_reference(self, destination_name)
50
+ URI("file://#{path}").to_s
51
+ end
52
+ end
53
+ end
54
+ end
@@ -1,12 +1,11 @@
1
1
  module CurationConcerns
2
- module GenericFile
2
+ module FileSet
3
3
  module FullTextIndexing
4
4
  extend ActiveSupport::Concern
5
5
 
6
6
  included do
7
7
  contains 'full_text'
8
8
  end
9
-
10
9
  end
11
10
  end
12
11
  end
@@ -1,12 +1,12 @@
1
1
  module CurationConcerns
2
- module GenericFile
2
+ module FileSet
3
3
  module Indexing
4
4
  extend ActiveSupport::Concern
5
5
 
6
6
  module ClassMethods
7
7
  # override the default indexing service
8
8
  def indexer
9
- CurationConcerns::GenericFileIndexingService
9
+ CurationConcerns::FileSetIndexingService
10
10
  end
11
11
  end
12
12
  end
@@ -1,16 +1,15 @@
1
1
  module CurationConcerns
2
- module GenericFile
2
+ module FileSet
3
3
  module Versions
4
4
  @@count = 0
5
5
  def record_version_committer(user)
6
- version = self.latest_version
6
+ version = latest_version
7
7
  # content datastream not (yet?) present
8
8
  return if version.nil?
9
9
  @@count += 1
10
10
  # raise "Recording #{@@count} #{version.uri} for #{user.user_key}" if @@count == 3
11
11
  VersionCommitter.create(version_id: version.uri, committer_login: user.user_key)
12
12
  end
13
-
14
13
  end
15
14
  end
16
15
  end
@@ -0,0 +1,36 @@
1
+ module CurationConcerns
2
+ module FileSetBehavior
3
+ extend ActiveSupport::Concern
4
+ include Hydra::Works::FileSetBehavior
5
+ include Hydra::Works::VirusCheck
6
+ include Hydra::Works::Characterization
7
+ include Hydra::WithDepositor
8
+ include CurationConcerns::Serializers
9
+ include CurationConcerns::Noid
10
+ include CurationConcerns::FileSet::Derivatives
11
+ include CurationConcerns::Permissions
12
+ include CurationConcerns::BasicMetadata
13
+ include CurationConcerns::FileSet::FullTextIndexing
14
+ include CurationConcerns::FileSet::Indexing
15
+ include CurationConcerns::FileSet::BelongsToWorks
16
+ include CurationConcerns::FileSet::BelongsToUploadSets
17
+ include CurationConcerns::HumanReadableType
18
+ include Hydra::AccessControls::Embargoable
19
+
20
+ included do
21
+ attr_accessor :file
22
+ end
23
+
24
+ def human_readable_type
25
+ self.class.to_s.demodulize.titleize
26
+ end
27
+
28
+ def representative_id
29
+ to_param
30
+ end
31
+
32
+ def thumbnail_id
33
+ to_param
34
+ end
35
+ end
36
+ end
@@ -2,4 +2,4 @@ module CurationConcerns
2
2
  # Base namespace for GenericFile Behavior modules
3
3
  module GenericFile
4
4
  end
5
- end
5
+ end