curation_concerns-models 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +1 -1
- data/app/actors/concerns/curation_concerns/manages_embargoes_actor.rb +11 -19
- data/app/actors/curation_concerns/base_actor.rb +41 -45
- data/app/actors/curation_concerns/embargo_actor.rb +19 -0
- data/app/actors/curation_concerns/file_set_actor.rb +200 -0
- data/app/actors/curation_concerns/lease_actor.rb +19 -0
- data/app/actors/curation_concerns/work_actor_behavior.rb +55 -58
- data/app/indexers/curation_concerns/collection_indexer.rb +10 -0
- data/app/indexers/curation_concerns/file_set_indexing_service.rb +24 -0
- data/app/{services/curation_concerns/generic_work_indexing_service.rb → indexers/curation_concerns/work_indexing_service.rb} +6 -6
- data/app/jobs/active_fedora_id_based_job.rb +5 -12
- data/app/jobs/audit_job.rb +11 -17
- data/app/jobs/characterize_job.rb +8 -7
- data/app/jobs/create_derivatives_job.rb +8 -11
- data/app/jobs/import_url_job.rb +12 -25
- data/app/jobs/ingest_file_job.rb +16 -0
- data/app/jobs/ingest_local_file_job.rb +14 -35
- data/app/jobs/resolrize_job.rb +3 -5
- data/app/jobs/upload_set_update_job.rb +68 -0
- data/app/models/checksum_audit_log.rb +2 -3
- data/app/models/concerns/curation_concerns/ability.rb +18 -10
- data/app/models/concerns/curation_concerns/basic_metadata.rb +1 -3
- data/app/models/concerns/curation_concerns/collection_behavior.rb +13 -14
- data/app/models/concerns/curation_concerns/file_set/belongs_to_upload_sets.rb +15 -0
- data/app/models/concerns/curation_concerns/{generic_file → file_set}/belongs_to_works.rb +8 -14
- data/app/models/concerns/curation_concerns/file_set/derivatives.rb +54 -0
- data/app/models/concerns/curation_concerns/{generic_file → file_set}/full_text_indexing.rb +1 -2
- data/app/models/concerns/curation_concerns/{generic_file → file_set}/indexing.rb +2 -2
- data/app/models/concerns/curation_concerns/{generic_file → file_set}/versions.rb +2 -3
- data/app/models/concerns/curation_concerns/file_set_behavior.rb +36 -0
- data/app/models/concerns/curation_concerns/generic_file.rb +1 -1
- data/app/models/concerns/curation_concerns/has_representative.rb +6 -7
- data/app/models/concerns/curation_concerns/human_readable_type.rb +5 -7
- data/app/models/concerns/curation_concerns/permissions.rb +2 -2
- data/app/models/concerns/curation_concerns/permissions/readable.rb +0 -1
- data/app/models/concerns/curation_concerns/permissions/writable.rb +10 -51
- data/app/models/concerns/curation_concerns/serializers.rb +3 -5
- data/app/models/concerns/curation_concerns/solr_document_behavior.rb +37 -40
- data/app/models/concerns/curation_concerns/upload_set_behavior.rb +38 -0
- data/app/models/concerns/curation_concerns/user.rb +4 -51
- data/app/models/concerns/curation_concerns/with_file_sets.rb +28 -0
- data/app/models/concerns/curation_concerns/{generic_work_behavior.rb → work_behavior.rb} +12 -6
- data/app/models/curation_concerns/classify_concern.rb +7 -7
- data/app/models/curation_concerns/quick_classification_query.rb +6 -7
- data/app/models/single_use_link.rb +34 -0
- data/app/models/upload_set.rb +3 -0
- data/app/services/curation_concerns/derivative_path.rb +32 -0
- data/app/services/curation_concerns/{generic_file_audit_service.rb → file_set_audit_service.rb} +17 -18
- data/app/services/curation_concerns/indexes_thumbnails.rb +14 -0
- data/app/services/curation_concerns/local_file_service.rb +10 -0
- data/app/services/curation_concerns/lock_manager.rb +40 -0
- data/app/services/curation_concerns/noid.rb +1 -1
- data/app/services/curation_concerns/persist_derivatives.rb +33 -0
- data/app/services/curation_concerns/persist_directly_contained_output_file_service.rb +26 -0
- data/app/services/curation_concerns/repository_audit_service.rb +1 -3
- data/app/services/curation_concerns/thumbnail_path_service.rb +46 -0
- data/app/services/curation_concerns/time_service.rb +7 -0
- data/app/services/curation_concerns/versioning_service.rb +11 -12
- data/curation_concerns-models.gemspec +6 -6
- data/lib/curation_concerns/configuration.rb +154 -0
- data/lib/curation_concerns/messages.rb +26 -26
- data/lib/curation_concerns/models.rb +5 -14
- data/lib/curation_concerns/models/engine.rb +0 -30
- data/lib/curation_concerns/models/utils.rb +4 -4
- data/lib/curation_concerns/models/version.rb +1 -1
- data/lib/generators/curation_concerns/models/abstract_migration_generator.rb +8 -7
- data/lib/generators/curation_concerns/models/clamav_generator.rb +3 -3
- data/lib/generators/curation_concerns/models/install_generator.rb +13 -20
- data/lib/generators/curation_concerns/models/templates/app/models/file_set.rb +4 -0
- data/lib/generators/curation_concerns/models/templates/config/clamav.rb +1 -1
- data/lib/generators/curation_concerns/models/templates/config/curation_concerns.rb +52 -65
- data/lib/generators/curation_concerns/models/templates/config/redis_config.rb +13 -17
- data/lib/generators/curation_concerns/models/templates/config/resque_config.rb +2 -1
- data/lib/generators/curation_concerns/models/templates/migrations/create_checksum_audit_logs.rb +3 -3
- data/lib/generators/curation_concerns/models/templates/migrations/create_single_use_links.rb +12 -0
- data/lib/tasks/curation_concerns-models_tasks.rake +4 -62
- data/lib/tasks/migrate.rake +1 -1
- data/lib/tasks/resque.rake +1 -0
- data/lib/tasks/solr_reindex.rake +1 -1
- metadata +59 -52
- data/app/actors/curation_concerns/generic_file_actor.rb +0 -150
- data/app/jobs/active_fedora_pid_based_job.rb +0 -6
- data/app/jobs/copy_permissions_job.rb +0 -24
- data/app/models/concerns/curation_concerns/generic_file/characterization.rb +0 -89
- data/app/models/concerns/curation_concerns/generic_file/content.rb +0 -8
- data/app/models/concerns/curation_concerns/generic_file/export.rb +0 -343
- data/app/models/concerns/curation_concerns/generic_file_behavior.rb +0 -44
- data/app/models/concerns/curation_concerns/with_basic_metadata.rb +0 -98
- data/app/models/concerns/curation_concerns/with_generic_files.rb +0 -29
- data/app/models/datastreams/fits_datastream.rb +0 -148
- data/app/services/curation_concerns/characterization_service.rb +0 -71
- data/app/services/curation_concerns/full_text_extraction_service.rb +0 -38
- data/app/services/curation_concerns/generic_file_indexing_service.rb +0 -14
- data/lib/curation_concerns/models/resque.rb +0 -36
- data/lib/generators/curation_concerns/models/fulltext_generator.rb +0 -28
- data/lib/generators/curation_concerns/models/templates/app/models/generic_file.rb +0 -4
- data/lib/generators/curation_concerns/models/templates/config/resque_admin.rb +0 -10
@@ -1,44 +0,0 @@
|
|
1
|
-
module CurationConcerns
|
2
|
-
module GenericFileBehavior
|
3
|
-
extend ActiveSupport::Concern
|
4
|
-
include Hydra::Works::GenericFileBehavior
|
5
|
-
include Hydra::Works::GenericFile::VirusCheck
|
6
|
-
include Hydra::WithDepositor
|
7
|
-
include CurationConcerns::Serializers
|
8
|
-
include CurationConcerns::Noid
|
9
|
-
include CurationConcerns::Permissions
|
10
|
-
include CurationConcerns::GenericFile::Export
|
11
|
-
include CurationConcerns::GenericFile::Characterization
|
12
|
-
include CurationConcerns::BasicMetadata
|
13
|
-
include CurationConcerns::GenericFile::Content
|
14
|
-
include CurationConcerns::GenericFile::FullTextIndexing
|
15
|
-
include CurationConcerns::GenericFile::Indexing
|
16
|
-
include CurationConcerns::GenericFile::BelongsToWorks
|
17
|
-
include Hydra::AccessControls::Embargoable
|
18
|
-
|
19
|
-
included do
|
20
|
-
attr_accessor :file
|
21
|
-
|
22
|
-
# make filename single-value (CurationConcerns::GenericFile::Characterization makes it multivalue)
|
23
|
-
def filename
|
24
|
-
self[:filename].first
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def human_readable_type
|
29
|
-
self.class.to_s.demodulize.titleize
|
30
|
-
end
|
31
|
-
|
32
|
-
def representative
|
33
|
-
to_param
|
34
|
-
end
|
35
|
-
|
36
|
-
def to_solr(solr_doc = {})
|
37
|
-
super(solr_doc).tap do |solr_doc|
|
38
|
-
# Enables Riiif to not have to recalculate this each time.
|
39
|
-
solr_doc['height_isi'] = Integer(height.first) if height.present?
|
40
|
-
solr_doc['width_isi'] = Integer(width.first) if width.present?
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,98 +0,0 @@
|
|
1
|
-
module CurationConcerns
|
2
|
-
# This is a direct copy of Sufia::GenericFile::Metadata with a few modifications:
|
3
|
-
# * title & description are single-value instead of multivalue
|
4
|
-
module DefaultMetadata
|
5
|
-
extend ActiveSupport::Concern
|
6
|
-
|
7
|
-
included do
|
8
|
-
|
9
|
-
property :label, predicate: ::RDF::DC.title, multiple: false
|
10
|
-
|
11
|
-
property :depositor, predicate: ::RDF::URI.new("http://id.loc.gov/vocabulary/relators/dpt"), multiple: false do |index|
|
12
|
-
index.as :symbol, :stored_searchable
|
13
|
-
end
|
14
|
-
|
15
|
-
property :relative_path, predicate: ::RDF::URI.new('http://scholarsphere.psu.edu/ns#relativePath'), multiple: false
|
16
|
-
|
17
|
-
property :import_url, predicate: ::RDF::URI.new('http://scholarsphere.psu.edu/ns#importUrl'), multiple: false do |index|
|
18
|
-
index.as :symbol
|
19
|
-
end
|
20
|
-
|
21
|
-
property :part_of, predicate: ::RDF::DC.isPartOf
|
22
|
-
property :resource_type, predicate: ::RDF::DC.type do |index|
|
23
|
-
index.as :stored_searchable, :facetable
|
24
|
-
end
|
25
|
-
property :title, predicate: ::RDF::DC.title, multiple:false do |index|
|
26
|
-
index.as :stored_searchable, :facetable
|
27
|
-
end
|
28
|
-
property :creator, predicate: ::RDF::DC.creator do |index|
|
29
|
-
index.as :stored_searchable, :facetable
|
30
|
-
end
|
31
|
-
property :contributor, predicate: ::RDF::DC.contributor do |index|
|
32
|
-
index.as :stored_searchable, :facetable
|
33
|
-
end
|
34
|
-
property :description, predicate: ::RDF::DC.description, multiple: false do |index|
|
35
|
-
index.type :text
|
36
|
-
index.as :stored_searchable
|
37
|
-
end
|
38
|
-
property :tag, predicate: ::RDF::DC.relation do |index|
|
39
|
-
index.as :stored_searchable, :facetable
|
40
|
-
end
|
41
|
-
property :rights, predicate: ::RDF::DC.rights do |index|
|
42
|
-
index.as :stored_searchable
|
43
|
-
end
|
44
|
-
property :publisher, predicate: ::RDF::DC.publisher do |index|
|
45
|
-
index.as :stored_searchable, :facetable
|
46
|
-
end
|
47
|
-
property :date_created, predicate: ::RDF::DC.created do |index|
|
48
|
-
index.as :stored_searchable
|
49
|
-
end
|
50
|
-
property :date_uploaded, predicate: ::RDF::DC.dateSubmitted, multiple: false do |index|
|
51
|
-
index.type :date
|
52
|
-
index.as :stored_sortable
|
53
|
-
end
|
54
|
-
property :date_modified, predicate: ::RDF::DC.modified, multiple: false do |index|
|
55
|
-
index.type :date
|
56
|
-
index.as :stored_sortable
|
57
|
-
end
|
58
|
-
property :subject, predicate: ::RDF::DC.subject do |index|
|
59
|
-
index.as :stored_searchable, :facetable
|
60
|
-
end
|
61
|
-
property :language, predicate: ::RDF::DC.language do |index|
|
62
|
-
index.as :stored_searchable, :facetable
|
63
|
-
end
|
64
|
-
property :identifier, predicate: ::RDF::DC.identifier do |index|
|
65
|
-
index.as :stored_searchable
|
66
|
-
end
|
67
|
-
property :based_near, predicate: ::RDF::FOAF.based_near do |index|
|
68
|
-
index.as :stored_searchable, :facetable
|
69
|
-
end
|
70
|
-
property :related_url, predicate: ::RDF::RDFS.seeAlso do |index|
|
71
|
-
index.as :stored_searchable
|
72
|
-
end
|
73
|
-
property :bibliographic_citation, predicate: ::RDF::DC.bibliographicCitation do |index|
|
74
|
-
index.as :stored_searchable
|
75
|
-
end
|
76
|
-
property :source, predicate: ::RDF::DC.source do |index|
|
77
|
-
index.as :stored_searchable
|
78
|
-
end
|
79
|
-
|
80
|
-
# TODO: Move this somewhere more appropriate
|
81
|
-
begin
|
82
|
-
LocalAuthority.register_vocabulary(self, "subject", "lc_subjects")
|
83
|
-
LocalAuthority.register_vocabulary(self, "language", "lexvo_languages")
|
84
|
-
LocalAuthority.register_vocabulary(self, "tag", "lc_genres")
|
85
|
-
rescue
|
86
|
-
puts "tables for vocabularies missing"
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
# Add a schema.org itemtype
|
91
|
-
def itemtype
|
92
|
-
# Look up the first non-empty resource type value in a hash from the config
|
93
|
-
CurationConcerns.config.resource_types_to_schema[resource_type.to_a.reject { |type| type.empty? }.first] || 'http://schema.org/CreativeWork'
|
94
|
-
rescue
|
95
|
-
'http://schema.org/CreativeWork'
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# Copied from Curate
|
2
|
-
module CurationConcerns
|
3
|
-
module WithGenericFiles
|
4
|
-
extend ActiveSupport::Concern
|
5
|
-
|
6
|
-
included do
|
7
|
-
# The generic_files association and its accessor methods comes from Hydra::Works::AggregatesGenericFiles
|
8
|
-
before_destroy :before_destroy_cleanup_generic_files
|
9
|
-
end
|
10
|
-
|
11
|
-
# Stopgap unil ActiveFedora ContainerAssociation includes an *_ids accessor.
|
12
|
-
# At the moment, this is no more efficient than calling generic_files, but hopefully that will change in the future.
|
13
|
-
def generic_file_ids
|
14
|
-
generic_files.map { |generic_file| generic_file.id }
|
15
|
-
end
|
16
|
-
|
17
|
-
def before_destroy_cleanup_generic_files
|
18
|
-
generic_files.each(&:destroy)
|
19
|
-
end
|
20
|
-
|
21
|
-
def copy_visibility_to_files
|
22
|
-
generic_files.each do |gf|
|
23
|
-
gf.visibility = visibility
|
24
|
-
gf.save!
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
end
|
@@ -1,148 +0,0 @@
|
|
1
|
-
class FitsDatastream < ActiveFedora::OmDatastream
|
2
|
-
include OM::XML::Document
|
3
|
-
|
4
|
-
set_terminology do |t|
|
5
|
-
t.root(path: "fits",
|
6
|
-
xmlns: "http://hul.harvard.edu/ois/xml/ns/fits/fits_output",
|
7
|
-
schema: "http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd")
|
8
|
-
t.identification {
|
9
|
-
t.identity {
|
10
|
-
t.format_label(path: {attribute: "format"})
|
11
|
-
t.mime_type(path: {attribute: "mimetype"})
|
12
|
-
}
|
13
|
-
}
|
14
|
-
t.fileinfo {
|
15
|
-
t.file_size(path: "size")
|
16
|
-
t.last_modified(path: "lastmodified")
|
17
|
-
t.filename(path: "filename")
|
18
|
-
t.original_checksum(path: "md5checksum")
|
19
|
-
t.rights_basis(path: "rightsBasis")
|
20
|
-
t.copyright_basis(path: "copyrightBasis")
|
21
|
-
t.copyright_note(path: "copyrightNote")
|
22
|
-
}
|
23
|
-
t.filestatus {
|
24
|
-
t.well_formed(path: "well-formed")
|
25
|
-
t.valid(path: "valid")
|
26
|
-
t.status_message(path: "message")
|
27
|
-
}
|
28
|
-
t.metadata {
|
29
|
-
t.document {
|
30
|
-
t.file_title(path: "title")
|
31
|
-
t.file_author(path: "author")
|
32
|
-
t.file_language(path: "language")
|
33
|
-
t.page_count(path: "pageCount")
|
34
|
-
t.word_count(path: "wordCount")
|
35
|
-
t.character_count(path: "characterCount")
|
36
|
-
t.paragraph_count(path: "paragraphCount")
|
37
|
-
t.line_count(path: "lineCount")
|
38
|
-
t.table_count(path: "tableCount")
|
39
|
-
t.graphics_count(path: "graphicsCount")
|
40
|
-
}
|
41
|
-
t.image {
|
42
|
-
t.byte_order(path: "byteOrder")
|
43
|
-
t.compression(path: "compressionScheme")
|
44
|
-
t.width(path: "imageWidth")
|
45
|
-
t.height(path: "imageHeight")
|
46
|
-
t.color_space(path: "colorSpace")
|
47
|
-
t.profile_name(path: "iccProfileName")
|
48
|
-
t.profile_version(path: "iccProfileVersion")
|
49
|
-
t.orientation(path: "orientation")
|
50
|
-
t.color_map(path: "colorMap")
|
51
|
-
t.image_producer(path: "imageProducer")
|
52
|
-
t.capture_device(path: "captureDevice")
|
53
|
-
t.scanning_software(path: "scanningSoftwareName")
|
54
|
-
t.exif_version(path: "exifVersion")
|
55
|
-
t.gps_timestamp(path: "gpsTimeStamp")
|
56
|
-
t.latitude(path: "gpsDestLatitude")
|
57
|
-
t.longitude(path: "gpsDestLongitude")
|
58
|
-
}
|
59
|
-
t.text {
|
60
|
-
t.character_set(path: "charset")
|
61
|
-
t.markup_basis(path: "markupBasis")
|
62
|
-
t.markup_language(path: "markupLanguage")
|
63
|
-
}
|
64
|
-
t.audio {
|
65
|
-
t.duration(path: "duration")
|
66
|
-
t.bit_depth(path: "bitDepth")
|
67
|
-
t.sample_rate(path: "sampleRate")
|
68
|
-
t.channels(path: "channels")
|
69
|
-
t.data_format(path: "dataFormatType")
|
70
|
-
t.offset(path: "offset")
|
71
|
-
}
|
72
|
-
t.video {
|
73
|
-
t.width(path: "imageWidth")
|
74
|
-
t.height(path: "imageHeight")
|
75
|
-
t.duration(path: "duration")
|
76
|
-
t.sample_rate(path: "sampleRate")
|
77
|
-
t.frame_rate(path: "frameRate")
|
78
|
-
}
|
79
|
-
}
|
80
|
-
t.format_label(proxy: [:identification, :identity, :format_label])
|
81
|
-
t.mime_type(proxy: [:identification, :identity, :mime_type])
|
82
|
-
t.file_size(proxy: [:fileinfo, :file_size])
|
83
|
-
t.last_modified(proxy: [:fileinfo, :last_modified])
|
84
|
-
t.filename(proxy: [:fileinfo, :filename])
|
85
|
-
t.original_checksum(proxy: [:fileinfo, :original_checksum])
|
86
|
-
t.rights_basis(proxy: [:fileinfo, :rights_basis])
|
87
|
-
t.copyright_basis(proxy: [:fileinfo, :copyright_basis])
|
88
|
-
t.copyright_note(proxy: [:fileinfo, :copyright_note])
|
89
|
-
t.well_formed(proxy: [:filestatus, :well_formed])
|
90
|
-
t.valid(proxy: [:filestatus, :valid])
|
91
|
-
t.status_message(proxy: [:filestatus, :status_message])
|
92
|
-
t.file_title(proxy: [:metadata, :document, :file_title])
|
93
|
-
t.file_author(proxy: [:metadata, :document, :file_author])
|
94
|
-
t.page_count(proxy: [:metadata, :document, :page_count])
|
95
|
-
t.file_language(proxy: [:metadata, :document, :file_language])
|
96
|
-
t.word_count(proxy: [:metadata, :document, :word_count])
|
97
|
-
t.character_count(proxy: [:metadata, :document, :character_count])
|
98
|
-
t.paragraph_count(proxy: [:metadata, :document, :paragraph_count])
|
99
|
-
t.line_count(proxy: [:metadata, :document, :line_count])
|
100
|
-
t.table_count(proxy: [:metadata, :document, :table_count])
|
101
|
-
t.graphics_count(proxy: [:metadata, :document, :graphics_count])
|
102
|
-
t.byte_order(proxy: [:metadata, :image, :byte_order])
|
103
|
-
t.compression(proxy: [:metadata, :image, :compression])
|
104
|
-
t.width(proxy: [:metadata, :image, :width])
|
105
|
-
t.video_width( proxy: [:metadata, :video, :width])
|
106
|
-
t.height(proxy: [:metadata, :image, :height])
|
107
|
-
t.video_height(proxy: [:metadata, :video, :height])
|
108
|
-
t.color_space(proxy: [:metadata, :image, :color_space])
|
109
|
-
t.profile_name(proxy: [:metadata, :image, :profile_name])
|
110
|
-
t.profile_version(proxy: [:metadata, :image, :profile_version])
|
111
|
-
t.orientation(proxy: [:metadata, :image, :orientation])
|
112
|
-
t.color_map(proxy: [:metadata, :image, :color_map])
|
113
|
-
t.image_producer(proxy: [:metadata, :image, :image_producer])
|
114
|
-
t.capture_device(proxy: [:metadata, :image, :capture_device])
|
115
|
-
t.scanning_software(proxy: [:metadata, :image, :scanning_software])
|
116
|
-
t.exif_version(proxy: [:metadata, :image, :exif_version])
|
117
|
-
t.gps_timestamp(proxy: [:metadata, :image, :gps_timestamp])
|
118
|
-
t.latitude(proxy: [:metadata, :image, :latitude])
|
119
|
-
t.longitude(proxy: [:metadata, :image, :longitude])
|
120
|
-
t.character_set(proxy: [:metadata, :text, :character_set])
|
121
|
-
t.markup_basis(proxy: [:metadata, :text, :markup_basis])
|
122
|
-
t.markup_language(proxy: [:metadata, :text, :markup_language])
|
123
|
-
t.duration(proxy: [:metadata, :audio, :duration])
|
124
|
-
t.video_duration(proxy: [:metadata, :video, :duration])
|
125
|
-
t.bit_depth(proxy: [:metadata, :audio, :bit_depth])
|
126
|
-
t.sample_rate(proxy: [:metadata, :audio, :sample_rate])
|
127
|
-
t.video_sample_rate(proxy: [:metadata, :video, :sample_rate])
|
128
|
-
t.channels(proxy: [:metadata, :audio, :channels])
|
129
|
-
t.data_format(proxy: [:metadata, :audio, :data_format])
|
130
|
-
t.offset(proxy: [:metadata, :audio, :offset])
|
131
|
-
t.frame_rate(proxy: [:metadata, :video, :frame_rate])
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.xml_template
|
135
|
-
builder = Nokogiri::XML::Builder.new do |xml|
|
136
|
-
xml.fits(xmlns: 'http://hul.harvard.edu/ois/xml/ns/fits/fits_output',
|
137
|
-
'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
138
|
-
'xsi:schemaLocation' =>
|
139
|
-
"http://hul.harvard.edu/ois/xml/ns/fits/fits_output
|
140
|
-
http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd",
|
141
|
-
version: "0.6.0",
|
142
|
-
timestamp: "1/25/12 11:04 AM") {
|
143
|
-
xml.identification { xml.identity(toolname: 'FITS') }
|
144
|
-
}
|
145
|
-
end
|
146
|
-
builder.doc
|
147
|
-
end
|
148
|
-
end
|
@@ -1,71 +0,0 @@
|
|
1
|
-
module CurationConcerns
|
2
|
-
# Run FITS to gather technical metadata about the content and the full text.
|
3
|
-
# Store this extracted metadata in the characterization datastream.
|
4
|
-
class CharacterizationService
|
5
|
-
include Hydra::Derivatives::ExtractMetadata
|
6
|
-
|
7
|
-
delegate :mime_type, :uri, to: :@generic_file
|
8
|
-
attr_reader :generic_file
|
9
|
-
|
10
|
-
def self.run(generic_file)
|
11
|
-
new(generic_file).characterize
|
12
|
-
end
|
13
|
-
|
14
|
-
def initialize(generic_file)
|
15
|
-
@generic_file = generic_file
|
16
|
-
end
|
17
|
-
|
18
|
-
## Extract the metadata from the content datastream and record it in the characterization datastream
|
19
|
-
def characterize
|
20
|
-
store_metadata(extract_metadata)
|
21
|
-
store_fulltext(extract_fulltext)
|
22
|
-
generic_file.filename = [generic_file.original_file.original_name]
|
23
|
-
end
|
24
|
-
|
25
|
-
protected
|
26
|
-
|
27
|
-
def store_fulltext(extracted_text)
|
28
|
-
if extracted_text.present?
|
29
|
-
extracted_text_file = generic_file.build_extracted_text
|
30
|
-
extracted_text_file.content = extracted_text
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def extract_fulltext
|
35
|
-
FullTextExtractionService.run(generic_file)
|
36
|
-
end
|
37
|
-
|
38
|
-
def store_metadata(metadata)
|
39
|
-
generic_file.characterization.ng_xml = metadata if metadata.present?
|
40
|
-
append_metadata
|
41
|
-
end
|
42
|
-
|
43
|
-
def extract_metadata
|
44
|
-
return unless generic_file.original_file.has_content?
|
45
|
-
Hydra::FileCharacterization.characterize(generic_file.original_file.content, filename_for_characterization.join, :fits) do |config|
|
46
|
-
config[:fits] = Hydra::Derivatives.fits_path
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
# Populate GenericFile's properties with fields from FITS (e.g. Author from pdfs)
|
51
|
-
def append_metadata
|
52
|
-
terms = generic_file.characterization_terms
|
53
|
-
CurationConcerns.config.fits_to_desc_mapping.each_pair do |k, v|
|
54
|
-
if terms.has_key?(k)
|
55
|
-
# coerce to array to remove a conditional
|
56
|
-
terms[k] = [terms[k]] unless terms[k].is_a? Array
|
57
|
-
terms[k].each do |term_value|
|
58
|
-
proxy_term = generic_file.send(v)
|
59
|
-
if proxy_term.kind_of?(Array)
|
60
|
-
proxy_term << term_value unless proxy_term.include?(term_value)
|
61
|
-
else
|
62
|
-
# these are single-valued terms which cannot be appended to
|
63
|
-
generic_file.send("#{v}=", term_value)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
module CurationConcerns
|
2
|
-
# Extract the full text from the content using Solr's extract handler
|
3
|
-
class FullTextExtractionService
|
4
|
-
def self.run(generic_file)
|
5
|
-
new(generic_file).extract
|
6
|
-
end
|
7
|
-
|
8
|
-
delegate :original_file, :logger, :mime_type, :id, to: :@generic_file
|
9
|
-
|
10
|
-
def initialize(generic_file)
|
11
|
-
@generic_file = generic_file
|
12
|
-
end
|
13
|
-
|
14
|
-
def extract
|
15
|
-
uri = URI("#{connection_url}/update/extract?extractOnly=true&wt=json&extractFormat=text")
|
16
|
-
req = Net::HTTP.new(uri.host, uri.port)
|
17
|
-
resp = req.post(uri.to_s, original_file.content, {
|
18
|
-
'Content-type' => "#{mime_type};charset=utf-8",
|
19
|
-
'Content-Length' => original_file.content.size.to_s
|
20
|
-
})
|
21
|
-
raise "URL '#{uri}' returned code #{resp.code}" unless resp.code == "200"
|
22
|
-
original_file.content.rewind if original_file.content.respond_to?(:rewind)
|
23
|
-
JSON.parse(resp.body)[''].rstrip
|
24
|
-
rescue => e
|
25
|
-
logger.error("Error extracting content from #{id}: #{e.inspect}")
|
26
|
-
return nil
|
27
|
-
end
|
28
|
-
|
29
|
-
def connection_url
|
30
|
-
case
|
31
|
-
when Blacklight.connection_config[:url] then Blacklight.connection_config[:url]
|
32
|
-
when Blacklight.connection_config["url"] then Blacklight.connection_config["url"]
|
33
|
-
when Blacklight.connection_config[:fulltext] then Blacklight.connection_config[:fulltext]["url"]
|
34
|
-
else Blacklight.connection_config[:default]["url"]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
module CurationConcerns
|
2
|
-
class GenericFileIndexingService < ActiveFedora::IndexingService
|
3
|
-
def generate_solr_document
|
4
|
-
super.tap do |solr_doc|
|
5
|
-
solr_doc[Solrizer.solr_name('label')] = object.label
|
6
|
-
solr_doc[Solrizer.solr_name('file_format')] = object.file_format
|
7
|
-
solr_doc[Solrizer.solr_name('file_format', :facetable)] = object.file_format
|
8
|
-
solr_doc[Solrizer.solr_name(:file_size, :symbol)] = object.file_size[0]
|
9
|
-
solr_doc['all_text_timv'] = object.full_text.content
|
10
|
-
solr_doc[Solrizer.solr_name('generic_work_ids', :symbol)] = object.generic_work_ids unless object.generic_work_ids.empty?
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|