curation_concerns-models 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.md +177 -0
- data/README.md +42 -0
- data/Rakefile +1 -0
- data/app/actors/concerns/curation_concerns/manages_embargoes_actor.rb +99 -0
- data/app/actors/curation_concerns/base_actor.rb +90 -0
- data/app/actors/curation_concerns/generic_file_actor.rb +150 -0
- data/app/actors/curation_concerns/work_actor_behavior.rb +88 -0
- data/app/jobs/active_fedora_id_based_job.rb +22 -0
- data/app/jobs/active_fedora_pid_based_job.rb +6 -0
- data/app/jobs/audit_job.rb +58 -0
- data/app/jobs/characterize_job.rb +11 -0
- data/app/jobs/copy_permissions_job.rb +24 -0
- data/app/jobs/create_derivatives_job.rb +15 -0
- data/app/jobs/import_url_job.rb +56 -0
- data/app/jobs/ingest_local_file_job.rb +48 -0
- data/app/jobs/resolrize_job.rb +9 -0
- data/app/models/checksum_audit_log.rb +21 -0
- data/app/models/concerns/curation_concerns/ability.rb +34 -0
- data/app/models/concerns/curation_concerns/basic_metadata.rb +87 -0
- data/app/models/concerns/curation_concerns/collection_behavior.rb +47 -0
- data/app/models/concerns/curation_concerns/generic_file/belongs_to_works.rb +53 -0
- data/app/models/concerns/curation_concerns/generic_file/characterization.rb +89 -0
- data/app/models/concerns/curation_concerns/generic_file/content.rb +8 -0
- data/app/models/concerns/curation_concerns/generic_file/export.rb +343 -0
- data/app/models/concerns/curation_concerns/generic_file/full_text_indexing.rb +12 -0
- data/app/models/concerns/curation_concerns/generic_file/indexing.rb +14 -0
- data/app/models/concerns/curation_concerns/generic_file/versions.rb +16 -0
- data/app/models/concerns/curation_concerns/generic_file.rb +5 -0
- data/app/models/concerns/curation_concerns/generic_file_behavior.rb +44 -0
- data/app/models/concerns/curation_concerns/generic_work_behavior.rb +38 -0
- data/app/models/concerns/curation_concerns/has_representative.rb +14 -0
- data/app/models/concerns/curation_concerns/human_readable_type.rb +23 -0
- data/app/models/concerns/curation_concerns/permissions/readable.rb +19 -0
- data/app/models/concerns/curation_concerns/permissions/writable.rb +75 -0
- data/app/models/concerns/curation_concerns/permissions.rb +7 -0
- data/app/models/concerns/curation_concerns/serializers.rb +15 -0
- data/app/models/concerns/curation_concerns/solr_document_behavior.rb +135 -0
- data/app/models/concerns/curation_concerns/user.rb +65 -0
- data/app/models/concerns/curation_concerns/with_basic_metadata.rb +98 -0
- data/app/models/concerns/curation_concerns/with_generic_files.rb +29 -0
- data/app/models/curation_concerns/classify_concern.rb +47 -0
- data/app/models/curation_concerns/quick_classification_query.rb +31 -0
- data/app/models/datastreams/fits_datastream.rb +148 -0
- data/app/models/version_committer.rb +2 -0
- data/app/services/curation_concerns/characterization_service.rb +71 -0
- data/app/services/curation_concerns/full_text_extraction_service.rb +38 -0
- data/app/services/curation_concerns/generic_file_audit_service.rb +85 -0
- data/app/services/curation_concerns/generic_file_indexing_service.rb +14 -0
- data/app/services/curation_concerns/generic_work_indexing_service.rb +16 -0
- data/app/services/curation_concerns/noid.rb +23 -0
- data/app/services/curation_concerns/repository_audit_service.rb +9 -0
- data/app/services/curation_concerns/versioning_service.rb +27 -0
- data/config/locales/curation_concerns.en.yml +6 -0
- data/curation_concerns-models.gemspec +34 -0
- data/lib/curation_concerns/messages.rb +66 -0
- data/lib/curation_concerns/models/engine.rb +61 -0
- data/lib/curation_concerns/models/resque.rb +36 -0
- data/lib/curation_concerns/models/utils.rb +22 -0
- data/lib/curation_concerns/models/version.rb +5 -0
- data/lib/curation_concerns/models.rb +32 -0
- data/lib/generators/curation_concerns/models/abstract_migration_generator.rb +30 -0
- data/lib/generators/curation_concerns/models/clamav_generator.rb +19 -0
- data/lib/generators/curation_concerns/models/fulltext_generator.rb +28 -0
- data/lib/generators/curation_concerns/models/install_generator.rb +70 -0
- data/lib/generators/curation_concerns/models/templates/app/models/collection.rb +4 -0
- data/lib/generators/curation_concerns/models/templates/app/models/generic_file.rb +4 -0
- data/lib/generators/curation_concerns/models/templates/config/clamav.rb +1 -0
- data/lib/generators/curation_concerns/models/templates/config/curation_concerns.rb +123 -0
- data/lib/generators/curation_concerns/models/templates/config/mime_types.rb +6 -0
- data/lib/generators/curation_concerns/models/templates/config/redis.yml +9 -0
- data/lib/generators/curation_concerns/models/templates/config/redis_config.rb +32 -0
- data/lib/generators/curation_concerns/models/templates/config/resque-pool.yml +1 -0
- data/lib/generators/curation_concerns/models/templates/config/resque_admin.rb +10 -0
- data/lib/generators/curation_concerns/models/templates/config/resque_config.rb +5 -0
- data/lib/generators/curation_concerns/models/templates/migrations/create_checksum_audit_logs.rb +19 -0
- data/lib/generators/curation_concerns/models/templates/migrations/create_version_committers.rb +15 -0
- data/lib/tasks/curation_concerns-models_tasks.rake +75 -0
- data/lib/tasks/migrate.rake +13 -0
- data/lib/tasks/resque.rake +13 -0
- data/lib/tasks/solr_reindex.rake +8 -0
- metadata +282 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
module CurationConcerns
|
|
2
|
+
module SolrDocumentBehavior
|
|
3
|
+
def title_or_label
|
|
4
|
+
title || label
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
##
|
|
8
|
+
# Give our SolrDocument an ActiveModel::Naming appropriate route_key
|
|
9
|
+
def route_key
|
|
10
|
+
get(Solrizer.solr_name('has_model', :symbol)).split(':').last.downcase
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_param
|
|
14
|
+
id
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def to_s
|
|
18
|
+
title_or_label
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
##
|
|
22
|
+
# Offer the source (ActiveFedora-based) model to Rails for some of the
|
|
23
|
+
# Rails methods (e.g. link_to).
|
|
24
|
+
# @example
|
|
25
|
+
# link_to '...', SolrDocument(:id => 'bXXXXXX5').new => <a href="/dams_object/bXXXXXX5">...</a>
|
|
26
|
+
def to_model
|
|
27
|
+
@model ||= begin
|
|
28
|
+
m = ActiveFedora::Base.load_instance_from_solr(id, self)
|
|
29
|
+
m.class == ActiveFedora::Base ? self : m
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def collection?
|
|
34
|
+
hydra_model == 'Collection'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Method to return the ActiveFedora model
|
|
38
|
+
def hydra_model
|
|
39
|
+
self[Solrizer.solr_name('active_fedora_model', Solrizer::Descriptor.new(:string, :stored, :indexed))]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def human_readable_type
|
|
43
|
+
Array(self[Solrizer.solr_name('human_readable_type', :stored_searchable)]).first
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def representative
|
|
47
|
+
Array(self[Solrizer.solr_name('representative', :stored_searchable)]).first
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def date_uploaded
|
|
51
|
+
field = self[Solrizer.solr_name("date_uploaded", :stored_sortable, type: :date)]
|
|
52
|
+
return unless field.present?
|
|
53
|
+
begin
|
|
54
|
+
Date.parse(field).to_formatted_s(:standard)
|
|
55
|
+
rescue
|
|
56
|
+
Rails.logger.info "Unable to parse date: #{field.first.inspect} for #{self['id']}"
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def depositor(default = '')
|
|
61
|
+
val = Array(self[Solrizer.solr_name("depositor")]).first
|
|
62
|
+
val.present? ? val : default
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def title
|
|
66
|
+
Array(self[Solrizer.solr_name('title')]).first
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def description
|
|
70
|
+
Array(self[Solrizer.solr_name('description')]).first
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def label
|
|
74
|
+
Array(self[Solrizer.solr_name('label')]).first
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def file_format
|
|
78
|
+
Array(self[Solrizer.solr_name('file_format')]).first
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def creator
|
|
82
|
+
Array(self[Solrizer.solr_name("creator")]).first
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def tags
|
|
86
|
+
Array(self[Solrizer.solr_name("tag")])
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def resource_type
|
|
90
|
+
Array(self[Solrizer.solr_name("resource_type")])
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def mime_type
|
|
94
|
+
Array(self[Solrizer.solr_name("mime_type")]).first
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def read_groups
|
|
98
|
+
Array(self[::Ability.read_group_field])
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def edit_groups
|
|
102
|
+
Array(self[::Ability.edit_group_field])
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def edit_people
|
|
106
|
+
Array(self[::Ability.edit_user_field])
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def public?
|
|
110
|
+
read_groups.include?('public')
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def registered?
|
|
114
|
+
read_groups.include?('registered')
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def pdf?
|
|
118
|
+
['application/pdf'].include? self.mime_type
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def image?
|
|
122
|
+
['image/png','image/jpeg', 'image/jpg', 'image/jp2', 'image/bmp', 'image/gif', 'image/tiff'].include? self.mime_type
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def video?
|
|
126
|
+
['video/mpeg', 'video/mp4', 'video/webm', 'video/x-msvideo', 'video/avi', 'video/quicktime', 'application/mxf'].include? self.mime_type
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def audio?
|
|
130
|
+
# audio/x-wave is the mime type that fits 0.6.0 returns for a wav file.
|
|
131
|
+
# audio/mpeg is the mime type that fits 0.6.0 returns for an mp3 file.
|
|
132
|
+
['audio/mp3', 'audio/mpeg', 'audio/x-wave', 'audio/x-wav', 'audio/ogg'].include? self.mime_type
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
module CurationConcerns::User
|
|
2
|
+
extend ActiveSupport::Concern
|
|
3
|
+
|
|
4
|
+
# Copied piecemeal from the pcdm branch of sufia-models. More may yet be necessary.
|
|
5
|
+
|
|
6
|
+
included do
|
|
7
|
+
# Connects this user object to Blacklight's Bookmarks and Folders.
|
|
8
|
+
include Blacklight::User
|
|
9
|
+
include Hydra::User
|
|
10
|
+
|
|
11
|
+
delegate :can?, :cannot?, to: :ability
|
|
12
|
+
|
|
13
|
+
attr_accessor :update_directory
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Format the json for select2 which requires just an id and a field called text.
|
|
17
|
+
# If we need an alternate format we should probably look at a json template gem
|
|
18
|
+
def as_json(opts = nil)
|
|
19
|
+
{ id: user_key, text: display_name ? "#{display_name} (#{user_key})" : user_key }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Populate user instance with attributes from remote system (e.g., LDAP)
|
|
23
|
+
# There is no default implementation -- override this in your application
|
|
24
|
+
# def populate_attributes
|
|
25
|
+
# end
|
|
26
|
+
|
|
27
|
+
def email_address
|
|
28
|
+
self.email
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def name
|
|
32
|
+
self.display_name.titleize || raise
|
|
33
|
+
rescue
|
|
34
|
+
self.user_key
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Redefine this for more intuitive keys in Redis
|
|
38
|
+
def to_param
|
|
39
|
+
# hack because rails doesn't like periods in urls.
|
|
40
|
+
user_key.gsub(/\./, '-dot-')
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# The basic groups method, override or will fallback to S ufia::Ldap::User
|
|
44
|
+
# def groups
|
|
45
|
+
# @groups ||= self.group_list ? self.group_list.split(";?;") : []
|
|
46
|
+
# end
|
|
47
|
+
|
|
48
|
+
def ability
|
|
49
|
+
@ability ||= ::Ability.new(self)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
module ClassMethods
|
|
53
|
+
def current
|
|
54
|
+
Thread.current[:user]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def current=(user)
|
|
58
|
+
Thread.current[:user] = user
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# def from_url_component(component)
|
|
62
|
+
# User.find_by_user_key(component.gsub(/-dot-/, '.'))
|
|
63
|
+
# end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
module CurationConcerns
|
|
2
|
+
# This is a direct copy of Sufia::GenericFile::Metadata with a few modifications:
|
|
3
|
+
# * title & description are single-value instead of multivalue
|
|
4
|
+
module DefaultMetadata
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
included do
|
|
8
|
+
|
|
9
|
+
property :label, predicate: ::RDF::DC.title, multiple: false
|
|
10
|
+
|
|
11
|
+
property :depositor, predicate: ::RDF::URI.new("http://id.loc.gov/vocabulary/relators/dpt"), multiple: false do |index|
|
|
12
|
+
index.as :symbol, :stored_searchable
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
property :relative_path, predicate: ::RDF::URI.new('http://scholarsphere.psu.edu/ns#relativePath'), multiple: false
|
|
16
|
+
|
|
17
|
+
property :import_url, predicate: ::RDF::URI.new('http://scholarsphere.psu.edu/ns#importUrl'), multiple: false do |index|
|
|
18
|
+
index.as :symbol
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
property :part_of, predicate: ::RDF::DC.isPartOf
|
|
22
|
+
property :resource_type, predicate: ::RDF::DC.type do |index|
|
|
23
|
+
index.as :stored_searchable, :facetable
|
|
24
|
+
end
|
|
25
|
+
property :title, predicate: ::RDF::DC.title, multiple:false do |index|
|
|
26
|
+
index.as :stored_searchable, :facetable
|
|
27
|
+
end
|
|
28
|
+
property :creator, predicate: ::RDF::DC.creator do |index|
|
|
29
|
+
index.as :stored_searchable, :facetable
|
|
30
|
+
end
|
|
31
|
+
property :contributor, predicate: ::RDF::DC.contributor do |index|
|
|
32
|
+
index.as :stored_searchable, :facetable
|
|
33
|
+
end
|
|
34
|
+
property :description, predicate: ::RDF::DC.description, multiple: false do |index|
|
|
35
|
+
index.type :text
|
|
36
|
+
index.as :stored_searchable
|
|
37
|
+
end
|
|
38
|
+
property :tag, predicate: ::RDF::DC.relation do |index|
|
|
39
|
+
index.as :stored_searchable, :facetable
|
|
40
|
+
end
|
|
41
|
+
property :rights, predicate: ::RDF::DC.rights do |index|
|
|
42
|
+
index.as :stored_searchable
|
|
43
|
+
end
|
|
44
|
+
property :publisher, predicate: ::RDF::DC.publisher do |index|
|
|
45
|
+
index.as :stored_searchable, :facetable
|
|
46
|
+
end
|
|
47
|
+
property :date_created, predicate: ::RDF::DC.created do |index|
|
|
48
|
+
index.as :stored_searchable
|
|
49
|
+
end
|
|
50
|
+
property :date_uploaded, predicate: ::RDF::DC.dateSubmitted, multiple: false do |index|
|
|
51
|
+
index.type :date
|
|
52
|
+
index.as :stored_sortable
|
|
53
|
+
end
|
|
54
|
+
property :date_modified, predicate: ::RDF::DC.modified, multiple: false do |index|
|
|
55
|
+
index.type :date
|
|
56
|
+
index.as :stored_sortable
|
|
57
|
+
end
|
|
58
|
+
property :subject, predicate: ::RDF::DC.subject do |index|
|
|
59
|
+
index.as :stored_searchable, :facetable
|
|
60
|
+
end
|
|
61
|
+
property :language, predicate: ::RDF::DC.language do |index|
|
|
62
|
+
index.as :stored_searchable, :facetable
|
|
63
|
+
end
|
|
64
|
+
property :identifier, predicate: ::RDF::DC.identifier do |index|
|
|
65
|
+
index.as :stored_searchable
|
|
66
|
+
end
|
|
67
|
+
property :based_near, predicate: ::RDF::FOAF.based_near do |index|
|
|
68
|
+
index.as :stored_searchable, :facetable
|
|
69
|
+
end
|
|
70
|
+
property :related_url, predicate: ::RDF::RDFS.seeAlso do |index|
|
|
71
|
+
index.as :stored_searchable
|
|
72
|
+
end
|
|
73
|
+
property :bibliographic_citation, predicate: ::RDF::DC.bibliographicCitation do |index|
|
|
74
|
+
index.as :stored_searchable
|
|
75
|
+
end
|
|
76
|
+
property :source, predicate: ::RDF::DC.source do |index|
|
|
77
|
+
index.as :stored_searchable
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# TODO: Move this somewhere more appropriate
|
|
81
|
+
begin
|
|
82
|
+
LocalAuthority.register_vocabulary(self, "subject", "lc_subjects")
|
|
83
|
+
LocalAuthority.register_vocabulary(self, "language", "lexvo_languages")
|
|
84
|
+
LocalAuthority.register_vocabulary(self, "tag", "lc_genres")
|
|
85
|
+
rescue
|
|
86
|
+
puts "tables for vocabularies missing"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Add a schema.org itemtype
|
|
91
|
+
def itemtype
|
|
92
|
+
# Look up the first non-empty resource type value in a hash from the config
|
|
93
|
+
CurationConcerns.config.resource_types_to_schema[resource_type.to_a.reject { |type| type.empty? }.first] || 'http://schema.org/CreativeWork'
|
|
94
|
+
rescue
|
|
95
|
+
'http://schema.org/CreativeWork'
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copied from Curate
|
|
2
|
+
module CurationConcerns
|
|
3
|
+
module WithGenericFiles
|
|
4
|
+
extend ActiveSupport::Concern
|
|
5
|
+
|
|
6
|
+
included do
|
|
7
|
+
# The generic_files association and its accessor methods comes from Hydra::Works::AggregatesGenericFiles
|
|
8
|
+
before_destroy :before_destroy_cleanup_generic_files
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Stopgap unil ActiveFedora ContainerAssociation includes an *_ids accessor.
|
|
12
|
+
# At the moment, this is no more efficient than calling generic_files, but hopefully that will change in the future.
|
|
13
|
+
def generic_file_ids
|
|
14
|
+
generic_files.map { |generic_file| generic_file.id }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def before_destroy_cleanup_generic_files
|
|
18
|
+
generic_files.each(&:destroy)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def copy_visibility_to_files
|
|
22
|
+
generic_files.each do |gf|
|
|
23
|
+
gf.visibility = visibility
|
|
24
|
+
gf.save!
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'active_attr'
|
|
2
|
+
module CurationConcerns
|
|
3
|
+
class ClassifyConcern
|
|
4
|
+
include ActiveAttr::Model
|
|
5
|
+
attribute :curation_concern_type
|
|
6
|
+
|
|
7
|
+
validates(
|
|
8
|
+
:curation_concern_type,
|
|
9
|
+
presence: true,
|
|
10
|
+
inclusion: { in: lambda { |record| record.registered_curation_concern_types } }
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
def all_curation_concern_classes
|
|
14
|
+
registered_curation_concern_types.sort.map { |c| self.class.to_class(c) }
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def registered_curation_concern_types
|
|
18
|
+
CurationConcerns.configuration.registered_curation_concern_types
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def possible_curation_concern_types
|
|
22
|
+
registered_curation_concern_types.collect do |concern|
|
|
23
|
+
[self.class.to_class(concern).human_readable_type, concern]
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def curation_concern_class
|
|
28
|
+
if possible_curation_concern_types.detect{|name, class_name|
|
|
29
|
+
class_name == curation_concern_type
|
|
30
|
+
}
|
|
31
|
+
self.class.to_class(curation_concern_type)
|
|
32
|
+
else
|
|
33
|
+
raise RuntimeError, "Invalid :curation_concern_type"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.to_class(type)
|
|
38
|
+
# TODO we may want to allow a different (or nil) namespace
|
|
39
|
+
type.camelize.constantize
|
|
40
|
+
# begin
|
|
41
|
+
# "::#{type.camelize}".constantize
|
|
42
|
+
# rescue NameError
|
|
43
|
+
# "CurationConcerns::#{type}".constantize
|
|
44
|
+
# end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module CurationConcerns
|
|
2
|
+
class QuickClassificationQuery
|
|
3
|
+
|
|
4
|
+
def self.each_for_context(*args, &block)
|
|
5
|
+
new(*args).all.each(&block)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
attr_reader :user
|
|
9
|
+
|
|
10
|
+
def initialize(user, options = {})
|
|
11
|
+
@user = user
|
|
12
|
+
@concern_name_normalizer = options.fetch(:concern_name_normalizer, ClassifyConcern.method(:to_class))
|
|
13
|
+
@registered_curation_concern_names = options.fetch(:registered_curation_concern_names, CurationConcerns.configuration.registered_curation_concern_types)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def all
|
|
17
|
+
ActiveFedora::Base.logger.debug "User is #{user}"
|
|
18
|
+
ActiveFedora::Base.logger.debug "try is #{normalized_curation_concern_names.first}"
|
|
19
|
+
ActiveFedora::Base.logger.debug "can is #{user.can?(:create, normalized_curation_concern_names.first)}"
|
|
20
|
+
normalized_curation_concern_names.select {|klass| user.can?(:create, klass)}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
attr_reader :concern_name_normalizer, :registered_curation_concern_names
|
|
26
|
+
|
|
27
|
+
def normalized_curation_concern_names
|
|
28
|
+
registered_curation_concern_names.collect{|name| concern_name_normalizer.call(name) }
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
class FitsDatastream < ActiveFedora::OmDatastream
|
|
2
|
+
include OM::XML::Document
|
|
3
|
+
|
|
4
|
+
set_terminology do |t|
|
|
5
|
+
t.root(path: "fits",
|
|
6
|
+
xmlns: "http://hul.harvard.edu/ois/xml/ns/fits/fits_output",
|
|
7
|
+
schema: "http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd")
|
|
8
|
+
t.identification {
|
|
9
|
+
t.identity {
|
|
10
|
+
t.format_label(path: {attribute: "format"})
|
|
11
|
+
t.mime_type(path: {attribute: "mimetype"})
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
t.fileinfo {
|
|
15
|
+
t.file_size(path: "size")
|
|
16
|
+
t.last_modified(path: "lastmodified")
|
|
17
|
+
t.filename(path: "filename")
|
|
18
|
+
t.original_checksum(path: "md5checksum")
|
|
19
|
+
t.rights_basis(path: "rightsBasis")
|
|
20
|
+
t.copyright_basis(path: "copyrightBasis")
|
|
21
|
+
t.copyright_note(path: "copyrightNote")
|
|
22
|
+
}
|
|
23
|
+
t.filestatus {
|
|
24
|
+
t.well_formed(path: "well-formed")
|
|
25
|
+
t.valid(path: "valid")
|
|
26
|
+
t.status_message(path: "message")
|
|
27
|
+
}
|
|
28
|
+
t.metadata {
|
|
29
|
+
t.document {
|
|
30
|
+
t.file_title(path: "title")
|
|
31
|
+
t.file_author(path: "author")
|
|
32
|
+
t.file_language(path: "language")
|
|
33
|
+
t.page_count(path: "pageCount")
|
|
34
|
+
t.word_count(path: "wordCount")
|
|
35
|
+
t.character_count(path: "characterCount")
|
|
36
|
+
t.paragraph_count(path: "paragraphCount")
|
|
37
|
+
t.line_count(path: "lineCount")
|
|
38
|
+
t.table_count(path: "tableCount")
|
|
39
|
+
t.graphics_count(path: "graphicsCount")
|
|
40
|
+
}
|
|
41
|
+
t.image {
|
|
42
|
+
t.byte_order(path: "byteOrder")
|
|
43
|
+
t.compression(path: "compressionScheme")
|
|
44
|
+
t.width(path: "imageWidth")
|
|
45
|
+
t.height(path: "imageHeight")
|
|
46
|
+
t.color_space(path: "colorSpace")
|
|
47
|
+
t.profile_name(path: "iccProfileName")
|
|
48
|
+
t.profile_version(path: "iccProfileVersion")
|
|
49
|
+
t.orientation(path: "orientation")
|
|
50
|
+
t.color_map(path: "colorMap")
|
|
51
|
+
t.image_producer(path: "imageProducer")
|
|
52
|
+
t.capture_device(path: "captureDevice")
|
|
53
|
+
t.scanning_software(path: "scanningSoftwareName")
|
|
54
|
+
t.exif_version(path: "exifVersion")
|
|
55
|
+
t.gps_timestamp(path: "gpsTimeStamp")
|
|
56
|
+
t.latitude(path: "gpsDestLatitude")
|
|
57
|
+
t.longitude(path: "gpsDestLongitude")
|
|
58
|
+
}
|
|
59
|
+
t.text {
|
|
60
|
+
t.character_set(path: "charset")
|
|
61
|
+
t.markup_basis(path: "markupBasis")
|
|
62
|
+
t.markup_language(path: "markupLanguage")
|
|
63
|
+
}
|
|
64
|
+
t.audio {
|
|
65
|
+
t.duration(path: "duration")
|
|
66
|
+
t.bit_depth(path: "bitDepth")
|
|
67
|
+
t.sample_rate(path: "sampleRate")
|
|
68
|
+
t.channels(path: "channels")
|
|
69
|
+
t.data_format(path: "dataFormatType")
|
|
70
|
+
t.offset(path: "offset")
|
|
71
|
+
}
|
|
72
|
+
t.video {
|
|
73
|
+
t.width(path: "imageWidth")
|
|
74
|
+
t.height(path: "imageHeight")
|
|
75
|
+
t.duration(path: "duration")
|
|
76
|
+
t.sample_rate(path: "sampleRate")
|
|
77
|
+
t.frame_rate(path: "frameRate")
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
t.format_label(proxy: [:identification, :identity, :format_label])
|
|
81
|
+
t.mime_type(proxy: [:identification, :identity, :mime_type])
|
|
82
|
+
t.file_size(proxy: [:fileinfo, :file_size])
|
|
83
|
+
t.last_modified(proxy: [:fileinfo, :last_modified])
|
|
84
|
+
t.filename(proxy: [:fileinfo, :filename])
|
|
85
|
+
t.original_checksum(proxy: [:fileinfo, :original_checksum])
|
|
86
|
+
t.rights_basis(proxy: [:fileinfo, :rights_basis])
|
|
87
|
+
t.copyright_basis(proxy: [:fileinfo, :copyright_basis])
|
|
88
|
+
t.copyright_note(proxy: [:fileinfo, :copyright_note])
|
|
89
|
+
t.well_formed(proxy: [:filestatus, :well_formed])
|
|
90
|
+
t.valid(proxy: [:filestatus, :valid])
|
|
91
|
+
t.status_message(proxy: [:filestatus, :status_message])
|
|
92
|
+
t.file_title(proxy: [:metadata, :document, :file_title])
|
|
93
|
+
t.file_author(proxy: [:metadata, :document, :file_author])
|
|
94
|
+
t.page_count(proxy: [:metadata, :document, :page_count])
|
|
95
|
+
t.file_language(proxy: [:metadata, :document, :file_language])
|
|
96
|
+
t.word_count(proxy: [:metadata, :document, :word_count])
|
|
97
|
+
t.character_count(proxy: [:metadata, :document, :character_count])
|
|
98
|
+
t.paragraph_count(proxy: [:metadata, :document, :paragraph_count])
|
|
99
|
+
t.line_count(proxy: [:metadata, :document, :line_count])
|
|
100
|
+
t.table_count(proxy: [:metadata, :document, :table_count])
|
|
101
|
+
t.graphics_count(proxy: [:metadata, :document, :graphics_count])
|
|
102
|
+
t.byte_order(proxy: [:metadata, :image, :byte_order])
|
|
103
|
+
t.compression(proxy: [:metadata, :image, :compression])
|
|
104
|
+
t.width(proxy: [:metadata, :image, :width])
|
|
105
|
+
t.video_width( proxy: [:metadata, :video, :width])
|
|
106
|
+
t.height(proxy: [:metadata, :image, :height])
|
|
107
|
+
t.video_height(proxy: [:metadata, :video, :height])
|
|
108
|
+
t.color_space(proxy: [:metadata, :image, :color_space])
|
|
109
|
+
t.profile_name(proxy: [:metadata, :image, :profile_name])
|
|
110
|
+
t.profile_version(proxy: [:metadata, :image, :profile_version])
|
|
111
|
+
t.orientation(proxy: [:metadata, :image, :orientation])
|
|
112
|
+
t.color_map(proxy: [:metadata, :image, :color_map])
|
|
113
|
+
t.image_producer(proxy: [:metadata, :image, :image_producer])
|
|
114
|
+
t.capture_device(proxy: [:metadata, :image, :capture_device])
|
|
115
|
+
t.scanning_software(proxy: [:metadata, :image, :scanning_software])
|
|
116
|
+
t.exif_version(proxy: [:metadata, :image, :exif_version])
|
|
117
|
+
t.gps_timestamp(proxy: [:metadata, :image, :gps_timestamp])
|
|
118
|
+
t.latitude(proxy: [:metadata, :image, :latitude])
|
|
119
|
+
t.longitude(proxy: [:metadata, :image, :longitude])
|
|
120
|
+
t.character_set(proxy: [:metadata, :text, :character_set])
|
|
121
|
+
t.markup_basis(proxy: [:metadata, :text, :markup_basis])
|
|
122
|
+
t.markup_language(proxy: [:metadata, :text, :markup_language])
|
|
123
|
+
t.duration(proxy: [:metadata, :audio, :duration])
|
|
124
|
+
t.video_duration(proxy: [:metadata, :video, :duration])
|
|
125
|
+
t.bit_depth(proxy: [:metadata, :audio, :bit_depth])
|
|
126
|
+
t.sample_rate(proxy: [:metadata, :audio, :sample_rate])
|
|
127
|
+
t.video_sample_rate(proxy: [:metadata, :video, :sample_rate])
|
|
128
|
+
t.channels(proxy: [:metadata, :audio, :channels])
|
|
129
|
+
t.data_format(proxy: [:metadata, :audio, :data_format])
|
|
130
|
+
t.offset(proxy: [:metadata, :audio, :offset])
|
|
131
|
+
t.frame_rate(proxy: [:metadata, :video, :frame_rate])
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def self.xml_template
|
|
135
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
|
136
|
+
xml.fits(xmlns: 'http://hul.harvard.edu/ois/xml/ns/fits/fits_output',
|
|
137
|
+
'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
|
138
|
+
'xsi:schemaLocation' =>
|
|
139
|
+
"http://hul.harvard.edu/ois/xml/ns/fits/fits_output
|
|
140
|
+
http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd",
|
|
141
|
+
version: "0.6.0",
|
|
142
|
+
timestamp: "1/25/12 11:04 AM") {
|
|
143
|
+
xml.identification { xml.identity(toolname: 'FITS') }
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
builder.doc
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
module CurationConcerns
|
|
2
|
+
# Run FITS to gather technical metadata about the content and the full text.
|
|
3
|
+
# Store this extracted metadata in the characterization datastream.
|
|
4
|
+
class CharacterizationService
|
|
5
|
+
include Hydra::Derivatives::ExtractMetadata
|
|
6
|
+
|
|
7
|
+
delegate :mime_type, :uri, to: :@generic_file
|
|
8
|
+
attr_reader :generic_file
|
|
9
|
+
|
|
10
|
+
def self.run(generic_file)
|
|
11
|
+
new(generic_file).characterize
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(generic_file)
|
|
15
|
+
@generic_file = generic_file
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
## Extract the metadata from the content datastream and record it in the characterization datastream
|
|
19
|
+
def characterize
|
|
20
|
+
store_metadata(extract_metadata)
|
|
21
|
+
store_fulltext(extract_fulltext)
|
|
22
|
+
generic_file.filename = [generic_file.original_file.original_name]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
protected
|
|
26
|
+
|
|
27
|
+
def store_fulltext(extracted_text)
|
|
28
|
+
if extracted_text.present?
|
|
29
|
+
extracted_text_file = generic_file.build_extracted_text
|
|
30
|
+
extracted_text_file.content = extracted_text
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def extract_fulltext
|
|
35
|
+
FullTextExtractionService.run(generic_file)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def store_metadata(metadata)
|
|
39
|
+
generic_file.characterization.ng_xml = metadata if metadata.present?
|
|
40
|
+
append_metadata
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def extract_metadata
|
|
44
|
+
return unless generic_file.original_file.has_content?
|
|
45
|
+
Hydra::FileCharacterization.characterize(generic_file.original_file.content, filename_for_characterization.join, :fits) do |config|
|
|
46
|
+
config[:fits] = Hydra::Derivatives.fits_path
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Populate GenericFile's properties with fields from FITS (e.g. Author from pdfs)
|
|
51
|
+
def append_metadata
|
|
52
|
+
terms = generic_file.characterization_terms
|
|
53
|
+
CurationConcerns.config.fits_to_desc_mapping.each_pair do |k, v|
|
|
54
|
+
if terms.has_key?(k)
|
|
55
|
+
# coerce to array to remove a conditional
|
|
56
|
+
terms[k] = [terms[k]] unless terms[k].is_a? Array
|
|
57
|
+
terms[k].each do |term_value|
|
|
58
|
+
proxy_term = generic_file.send(v)
|
|
59
|
+
if proxy_term.kind_of?(Array)
|
|
60
|
+
proxy_term << term_value unless proxy_term.include?(term_value)
|
|
61
|
+
else
|
|
62
|
+
# these are single-valued terms which cannot be appended to
|
|
63
|
+
generic_file.send("#{v}=", term_value)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module CurationConcerns
|
|
2
|
+
# Extract the full text from the content using Solr's extract handler
|
|
3
|
+
class FullTextExtractionService
|
|
4
|
+
def self.run(generic_file)
|
|
5
|
+
new(generic_file).extract
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
delegate :original_file, :logger, :mime_type, :id, to: :@generic_file
|
|
9
|
+
|
|
10
|
+
def initialize(generic_file)
|
|
11
|
+
@generic_file = generic_file
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def extract
|
|
15
|
+
uri = URI("#{connection_url}/update/extract?extractOnly=true&wt=json&extractFormat=text")
|
|
16
|
+
req = Net::HTTP.new(uri.host, uri.port)
|
|
17
|
+
resp = req.post(uri.to_s, original_file.content, {
|
|
18
|
+
'Content-type' => "#{mime_type};charset=utf-8",
|
|
19
|
+
'Content-Length' => original_file.content.size.to_s
|
|
20
|
+
})
|
|
21
|
+
raise "URL '#{uri}' returned code #{resp.code}" unless resp.code == "200"
|
|
22
|
+
original_file.content.rewind if original_file.content.respond_to?(:rewind)
|
|
23
|
+
JSON.parse(resp.body)[''].rstrip
|
|
24
|
+
rescue => e
|
|
25
|
+
logger.error("Error extracting content from #{id}: #{e.inspect}")
|
|
26
|
+
return nil
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def connection_url
|
|
30
|
+
case
|
|
31
|
+
when Blacklight.connection_config[:url] then Blacklight.connection_config[:url]
|
|
32
|
+
when Blacklight.connection_config["url"] then Blacklight.connection_config["url"]
|
|
33
|
+
when Blacklight.connection_config[:fulltext] then Blacklight.connection_config[:fulltext]["url"]
|
|
34
|
+
else Blacklight.connection_config[:default]["url"]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|