iiif_print 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +98 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
- data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
- data/app/models/concerns/iiif_print/solr/document.rb +14 -0
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +10 -9
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +2 -1
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
- data/lib/iiif_print/catalog_search_builder.rb +5 -1
- data/lib/iiif_print/configuration.rb +145 -8
- data/lib/iiif_print/data/fileset_helper.rb +1 -1
- data/lib/iiif_print/data/work_derivatives.rb +3 -3
- data/lib/iiif_print/engine.rb +7 -13
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
- data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
- data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/lib/iiif_print/lineage_service.rb +29 -8
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +167 -12
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/spec/factories/newspaper_page_solr_document.rb +9 -1
- data/spec/fixtures/authorities/licenses.yml +4 -0
- data/spec/fixtures/authorities/rights_statements.yml +4 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
- data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
- data/spec/iiif_print/configuration_spec.rb +141 -15
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
- data/spec/iiif_print/lineage_service_spec.rb +1 -1
- data/spec/iiif_print/metadata_spec.rb +157 -23
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
- data/spec/iiif_print_spec.rb +125 -5
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
- data/spec/samvera/derivatives/configuration_spec.rb +41 -0
- data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
- data/spec/samvera/derivatives_spec.rb +54 -0
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +123 -35
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
@@ -3,8 +3,128 @@ module IiifPrint
|
|
3
3
|
module IiifManifestPresenterBehavior
|
4
4
|
extend ActiveSupport::Concern
|
5
5
|
|
6
|
+
# Extending the presenter to the base url which includes the protocol.
|
7
|
+
# We need the base url to render the facet links and normalize the interface.
|
8
|
+
attr_accessor :base_url
|
9
|
+
|
10
|
+
def manifest_metadata
|
11
|
+
# ensure we are using a SolrDocument
|
12
|
+
@manifest_metadata ||= IiifPrint.manifest_metadata_from(work: model.solr_document, presenter: self)
|
13
|
+
end
|
14
|
+
|
6
15
|
def search_service
|
7
16
|
Rails.application.routes.url_helpers.solr_document_iiif_search_url(id, host: hostname)
|
8
17
|
end
|
18
|
+
|
19
|
+
# OVERRIDE: Hyrax 3x, avoid nil returning to IIIF Manifest gem
|
20
|
+
# @see https://github.com/samvera/iiif_manifest/blob/c408f90eba11bef908796c7236ba6bcf8d687acc/lib/iiif_manifest/v3/manifest_builder/record_property_builder.rb#L28
|
21
|
+
##
|
22
|
+
# @return [Array<Hash{String => String}>]
|
23
|
+
def sequence_rendering
|
24
|
+
Array(try(:rendering_ids)).map do |file_set_id|
|
25
|
+
rendering = file_set_presenters.find { |p| p.id == file_set_id }
|
26
|
+
return [] unless rendering
|
27
|
+
|
28
|
+
{ '@id' => Hyrax::Engine.routes.url_helpers.download_url(rendering.id, host: hostname),
|
29
|
+
'format' => rendering.mime_type.presence || I18n.t("hyrax.manifest.unknown_mime_text"),
|
30
|
+
'label' => I18n.t("hyrax.manifest.download_text") + (rendering.label || '') }
|
31
|
+
end.flatten
|
32
|
+
end
|
33
|
+
|
34
|
+
# OVERRIDE: Hyrax v3.x
|
35
|
+
module DisplayImagePresenterBehavior
|
36
|
+
# Extending the presenter to the base url which includes the protocol.
|
37
|
+
# We need the base url to render the facet links and normalize the interface.
|
38
|
+
attr_accessor :base_url
|
39
|
+
|
40
|
+
# Extending this class because there is an #ability= but not #ability and this definition
|
41
|
+
# mirrors the Hyrax::IiifManifestPresenter#ability.
|
42
|
+
def ability
|
43
|
+
@ability ||= NullAbility.new
|
44
|
+
end
|
45
|
+
|
46
|
+
def display_image
|
47
|
+
return nil unless latest_file_id
|
48
|
+
return nil unless model.image?
|
49
|
+
return nil unless IiifPrint.config.default_iiif_manifest_version == 2
|
50
|
+
|
51
|
+
IIIFManifest::DisplayImage
|
52
|
+
.new(display_image_url(hostname),
|
53
|
+
format: image_format(alpha_channels),
|
54
|
+
width: width,
|
55
|
+
height: height,
|
56
|
+
iiif_endpoint: iiif_endpoint(latest_file_id, base_url: hostname))
|
57
|
+
end
|
58
|
+
|
59
|
+
# OVERRIDE: IIIF Hyrax AV v0.2 #display_content for prez 3 manifests
|
60
|
+
def display_content
|
61
|
+
return nil unless latest_file_id
|
62
|
+
return super unless model.image?
|
63
|
+
|
64
|
+
IIIFManifest::V3::DisplayContent
|
65
|
+
.new(display_image_url(hostname),
|
66
|
+
format: image_format(alpha_channels),
|
67
|
+
width: width,
|
68
|
+
height: height,
|
69
|
+
type: 'Image',
|
70
|
+
iiif_endpoint: iiif_endpoint(latest_file_id, base_url: hostname))
|
71
|
+
end
|
72
|
+
|
73
|
+
def display_image_url(base_url)
|
74
|
+
if ENV['EXTERNAL_IIIF_URL'].present?
|
75
|
+
# At the moment we are only concerned about Hyrax's default image url builder
|
76
|
+
iiif_image_url_builder(url_builder: Hyrax.config.iiif_image_url_builder)
|
77
|
+
else
|
78
|
+
super
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def iiif_endpoint(file_id, base_url: request.base_url)
|
83
|
+
if ENV['EXTERNAL_IIIF_URL'].present?
|
84
|
+
IIIFManifest::IIIFEndpoint.new(
|
85
|
+
File.join(ENV['EXTERNAL_IIIF_URL'], file_id),
|
86
|
+
profile: Hyrax.config.iiif_image_compliance_level_uri
|
87
|
+
)
|
88
|
+
else
|
89
|
+
super
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def hostname
|
94
|
+
@hostname || 'localhost'
|
95
|
+
end
|
96
|
+
|
97
|
+
##
|
98
|
+
# @return [Boolean] false
|
99
|
+
def work?
|
100
|
+
false
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def latest_file_id
|
106
|
+
if ENV['EXTERNAL_IIIF_URL'].present?
|
107
|
+
external_latest_file_id
|
108
|
+
else
|
109
|
+
super
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def external_latest_file_id
|
114
|
+
@latest_file_id ||= digest_sha1
|
115
|
+
end
|
116
|
+
|
117
|
+
def iiif_image_url_builder(url_builder:)
|
118
|
+
args = [
|
119
|
+
latest_file_id,
|
120
|
+
ENV['EXTERNAL_IIIF_URL'],
|
121
|
+
Hyrax.config.iiif_image_size_default
|
122
|
+
]
|
123
|
+
# In Hyrax 3, Hyrax.config.iiif_image_url_builder takes an additional argument
|
124
|
+
args << image_format(alpha_channels) if url_builder.arity == 4
|
125
|
+
|
126
|
+
url_builder.call(*args).gsub(%r{images/}, '')
|
127
|
+
end
|
128
|
+
end
|
9
129
|
end
|
10
130
|
end
|
@@ -14,7 +14,7 @@ module IiifPrint
|
|
14
14
|
presenter_class.for(solr_doc)
|
15
15
|
elsif Hyrax.config.curation_concerns.include?(solr_doc.hydra_model)
|
16
16
|
# look up file set ids and loop through those
|
17
|
-
file_set_docs = load_file_set_docs(solr_doc.file_set_ids)
|
17
|
+
file_set_docs = load_file_set_docs(solr_doc.try(:file_set_ids) || solr_doc.try(:[], 'file_set_ids_ssim'))
|
18
18
|
file_set_docs.map { |doc| presenter_class.for(doc) } if file_set_docs.length
|
19
19
|
end
|
20
20
|
end.flatten.compact
|
@@ -4,26 +4,35 @@ module IiifPrint
|
|
4
4
|
module WorkShowPresenterDecorator
|
5
5
|
delegate :file_set_ids, to: :solr_document
|
6
6
|
|
7
|
-
# OVERRIDE Hyrax 2.9.6 to remove check for representative_presenter.image?
|
8
|
-
# a fallback to check for images on the child works
|
7
|
+
# OVERRIDE Hyrax 2.9.6 to remove check for representative_presenter.image?
|
9
8
|
# @return [Boolean] render a IIIF viewer
|
10
9
|
def iiif_viewer?
|
11
|
-
|
10
|
+
Hyrax.config.iiif_image_server? &&
|
11
|
+
representative_id.present? &&
|
12
|
+
representative_presenter.present? &&
|
13
|
+
members_include_viewable_image?
|
12
14
|
end
|
13
15
|
|
14
16
|
alias universal_viewer? iiif_viewer?
|
15
17
|
|
16
18
|
private
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
20
|
+
# overriding Hyrax to include file sets for both work and child works (file set ids include both)
|
21
|
+
# process each id, short-circuiting the loop once one true value is found. This speeds up the test
|
22
|
+
# by not loading more member_presenters than needed.
|
23
|
+
def members_include_viewable_image?
|
24
|
+
all_member_ids = (solr_document.try(:file_set_ids) || solr_document.try(:[], 'file_set_ids_ssim'))
|
25
|
+
Array.wrap(all_member_ids).each do |id|
|
26
|
+
return true if file_type_and_permissions_valid?(member_presenters_for([id]).first)
|
27
|
+
end
|
28
|
+
false
|
23
29
|
end
|
24
30
|
|
25
|
-
|
26
|
-
|
31
|
+
# This method allows for overriding to add additional file types to mix in with IiifAv
|
32
|
+
# TODO: add configuration setting for file types to loop through so an override is unneeded.
|
33
|
+
def file_type_and_permissions_valid?(presenter)
|
34
|
+
current_ability.can?(:read, presenter.id) &&
|
35
|
+
(presenter.try(:image?) || presenter.try(:solr_document).try(:image?))
|
27
36
|
end
|
28
37
|
end
|
29
38
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IiifPrint
|
4
|
+
module AllinsonFlexFields
|
5
|
+
def include_allinson_flex_fields(solr_parameters)
|
6
|
+
return unless defined?(AllinsonFlex)
|
7
|
+
|
8
|
+
query_fields = solr_parameters[:qf].split(' ') + IiifPrint.allinson_flex_fields
|
9
|
+
.each_with_object([]) do |field, arr|
|
10
|
+
arr << (field.name + '_tesim') if field.is_a?(AllinsonFlex::ProfileProperty)
|
11
|
+
end
|
12
|
+
solr_parameters[:qf] = query_fields.uniq.join(' ')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -6,9 +6,10 @@ module IiifPrint
|
|
6
6
|
def highlight_search_params(solr_parameters = {})
|
7
7
|
return unless solr_parameters[:q] || solr_parameters[:all_fields]
|
8
8
|
solr_parameters[:hl] = true
|
9
|
-
solr_parameters[:'hl.fl'] = '
|
9
|
+
solr_parameters[:'hl.fl'] = '*'
|
10
10
|
solr_parameters[:'hl.fragsize'] = 100
|
11
11
|
solr_parameters[:'hl.snippets'] = 5
|
12
|
+
solr_parameters[:'hl.requiredFieldMatch'] = true
|
12
13
|
end
|
13
14
|
end
|
14
15
|
end
|
@@ -0,0 +1,382 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
##
|
3
|
+
# This class implements the interface of a Hyrax::DerivativeService.
|
4
|
+
#
|
5
|
+
# That means three important methods are:
|
6
|
+
#
|
7
|
+
# - {#valid?}
|
8
|
+
# - {#create_derivatives}
|
9
|
+
# - {#cleanup_derivatives}
|
10
|
+
#
|
11
|
+
# And the object initializes with a FileSet.
|
12
|
+
#
|
13
|
+
# It is a companion to {IiifPrint::PluggableDerivativeService}.
|
14
|
+
#
|
15
|
+
# @see https://github.com/samvera/hyrax/blob/main/app/services/hyrax/derivative_service.rb Hyrax::DerivativesService
|
16
|
+
# rubocop:disable Metrics/ClassLength
|
17
|
+
class DerivativeRodeoService
|
18
|
+
##
|
19
|
+
# @!group Class Attributes
|
20
|
+
#
|
21
|
+
# @!attribute parent_work_identifier_property_name [r|w]
|
22
|
+
# @return [String] the property we use to identify the unique identifier of the parent work as
|
23
|
+
# it went through the SpaceStone pre-process.
|
24
|
+
#
|
25
|
+
# @todo The default of :aark_id is a quick hack for adventist. By exposing a configuration
|
26
|
+
# value, my hope is that this becomes easier to configure.
|
27
|
+
# @api public
|
28
|
+
class_attribute :parent_work_identifier_property_name, default: 'aark_id'
|
29
|
+
|
30
|
+
##
|
31
|
+
# @!attribute preprocessed_location_adapter_name [r|w]
|
32
|
+
# @return [String] The name of a derivative rodeo storage location; this will must be a
|
33
|
+
# registered with the DerivativeRodeo::StorageLocations::BaseLocation.
|
34
|
+
# @api public
|
35
|
+
class_attribute :preprocessed_location_adapter_name, default: 's3'
|
36
|
+
|
37
|
+
##
|
38
|
+
# @!attribute named_derivatives_and_generators_by_type [r|w]
|
39
|
+
# @return [Hash<Symbol, #constantize>] the named derivative and it's associated generator.
|
40
|
+
# The "name" is important for Hyrax or IIIF Print implementations. The generator is
|
41
|
+
# one that exists in the DerivativeRodeo.
|
42
|
+
#
|
43
|
+
# @example
|
44
|
+
# # In this case there are two changes:
|
45
|
+
# # 1. Do not use the DerivativeRodeo to process PDFs; instead fallback to another
|
46
|
+
# # applicable service.
|
47
|
+
# # 2. For Images, we will use the DerivativeRodeo but will only generate the thumbnail.
|
48
|
+
# # We will skip the JSON, XML, and TXT for an image.
|
49
|
+
# #
|
50
|
+
# # NOTE: Changing the behavior in this way may create broken assumptions in Hyrax.
|
51
|
+
# IiifPrint::DerivativeRodeoService.named_derivatives_and_generators_by_type =
|
52
|
+
# { image: { thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator" } }
|
53
|
+
#
|
54
|
+
# @todo Could be nice to have a registry for the DerivativeRodeo::Generators; but that's a
|
55
|
+
# tomorrow wish.
|
56
|
+
# @api public
|
57
|
+
class_attribute(:named_derivatives_and_generators_by_type, default: {
|
58
|
+
pdf: {
|
59
|
+
thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator"
|
60
|
+
},
|
61
|
+
image: {
|
62
|
+
thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator",
|
63
|
+
json: "DerivativeRodeo::Generators::WordCoordinatesGenerator",
|
64
|
+
xml: "DerivativeRodeo::Generators::AltoGenerator",
|
65
|
+
txt: "DerivativeRodeo::Generators::PlainTextGenerator"
|
66
|
+
}
|
67
|
+
})
|
68
|
+
|
69
|
+
##
|
70
|
+
# @!attribute named_derivatives_and_generators_filter [r|w]
|
71
|
+
# @return [#call] with three named parameters: :filename, :candidates, :file_set
|
72
|
+
#
|
73
|
+
# - :file_set is a {FileSet}
|
74
|
+
# - :filename is a String
|
75
|
+
# - :named_derivatives_and_generators is an entry from
|
76
|
+
# {.named_derivatives_and_generators_by_type} as pulled from
|
77
|
+
# {#named_derivatives_and_generators}
|
78
|
+
#
|
79
|
+
# The lambda is responsible for filtering any named generators that should or should not
|
80
|
+
# be run. It should return a data structure similar to the provided
|
81
|
+
# :named_derivatives_and_generators
|
82
|
+
#
|
83
|
+
# @example
|
84
|
+
# # The following configured filter will skip thumbnail generation for any files that
|
85
|
+
# # end in '.tn.jpg'
|
86
|
+
# IiifPrint::DerivativeRodeoService.named_derivatives_and_generators_filter =
|
87
|
+
# ->(file_set:, filename:, named_derivatives_and_generators:) do
|
88
|
+
# named_derivatives_and_generators.reject do |named_derivative, generators|
|
89
|
+
# named_derivative == :thumbnail && filename.downcase.ends_with?('.tn.jpg')
|
90
|
+
# end
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# @see .named_derivatives_and_generators_by_type
|
94
|
+
# @see #named_derivatives_and_generators
|
95
|
+
# @api public
|
96
|
+
# rubocop:disable Lint/UnusedBlockArgument
|
97
|
+
class_attribute(:named_derivatives_and_generators_filter,
|
98
|
+
default: ->(file_set:, filename:, named_derivatives_and_generators:) { named_derivatives_and_generators })
|
99
|
+
|
100
|
+
# rubocop:enable Lint/UnusedBlockArgument
|
101
|
+
# @!endgroup Class Attributes
|
102
|
+
##
|
103
|
+
|
104
|
+
##
|
105
|
+
# @see .named_derivatives_and_generators_by_type
|
106
|
+
#
|
107
|
+
# @return [Hash<Symbol,String] The named derivative types and their corresponding generators.
|
108
|
+
# @raise [IiifPrint::UnexpectedMimeTypeError] when the {#file_set}'s {#mime_type} is not one
|
109
|
+
# that is part of {.named_derivatives_and_generators_by_type}
|
110
|
+
def named_derivatives_and_generators
|
111
|
+
@named_derivatives_and_generators ||=
|
112
|
+
if file_set.class.pdf_mime_types.include?(mime_type)
|
113
|
+
named_derivatives_and_generators_by_type.fetch(:pdf).deep_dup
|
114
|
+
elsif file_set.class.image_mime_types.include?(mime_type)
|
115
|
+
named_derivatives_and_generators_by_type.fetch(:image).deep_dup
|
116
|
+
else
|
117
|
+
raise UnexpectedMimeTypeError.new(file_set: file_set, mime_type: mime_type)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
##
|
122
|
+
# This method encodes some existing assumptions about the URI based on implementations for
|
123
|
+
# Adventist. Those are reasonable assumptions but time will tell how reasonable.
|
124
|
+
#
|
125
|
+
# By convention, this method is returning output_location of the SpaceStone::Serverless
|
126
|
+
# processing. We might know the original location that SpaceStone::Serverless processed, but
|
127
|
+
# that seems to be a tenuous assumption.
|
128
|
+
#
|
129
|
+
# In other words, where would SpaceStone, by convention, have written the original file and by
|
130
|
+
# convention written that original file's derivatives.
|
131
|
+
#
|
132
|
+
# TODO: We also need to account for PDF splitting
|
133
|
+
#
|
134
|
+
# @param file_set [FileSet]
|
135
|
+
# @param filename [String]
|
136
|
+
# @param extension [String]
|
137
|
+
# @param adapter_name [String] Added as a parameter to make testing just a bit easier. See
|
138
|
+
# {.preprocessed_location_adapter_name}
|
139
|
+
#
|
140
|
+
# @return [String] when we have a possible candidate.
|
141
|
+
# @return [NilClass] when we could not derive a candidate.
|
142
|
+
# rubocop:disable Metrics/MethodLength
|
143
|
+
def self.derivative_rodeo_uri(file_set:, filename: nil, extension: nil, adapter_name: preprocessed_location_adapter_name)
|
144
|
+
# TODO: This is a hack that knows about the inner workings of Hydra::Works, but for
|
145
|
+
# expendiency, I'm using it. See
|
146
|
+
# https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/services/add_file_to_file_set.rb#L49-L53
|
147
|
+
filename ||= Hydra::Works::DetermineOriginalName.call(file_set.original_file)
|
148
|
+
|
149
|
+
dirname = derivative_rodeo_preprocessed_directory_for(file_set: file_set, filename: filename)
|
150
|
+
return nil unless dirname
|
151
|
+
|
152
|
+
# The aforementioned filename and the following basename and extension are here to allow for
|
153
|
+
# us to take an original file and see if we've pre-processed the derivative file. In the
|
154
|
+
# pre-processed derivative case, that would mean we have a different extension than the
|
155
|
+
# original.
|
156
|
+
extension ||= File.extname(filename)
|
157
|
+
extension = ".#{extension}" unless extension.start_with?(".")
|
158
|
+
|
159
|
+
# We want to strip off the extension of the given filename.
|
160
|
+
basename = File.basename(filename, File.extname(filename))
|
161
|
+
|
162
|
+
# TODO: What kinds of exceptions might we raise if the location is not configured? Do we need
|
163
|
+
# to "validate" it in another step.
|
164
|
+
location = DerivativeRodeo::StorageLocations::BaseLocation.load_location(adapter_name)
|
165
|
+
|
166
|
+
File.join(location.adapter_prefix, dirname, "#{basename}#{extension}")
|
167
|
+
end
|
168
|
+
# rubocop:enable Metrics/MethodLength
|
169
|
+
|
170
|
+
##
|
171
|
+
# @api public
|
172
|
+
#
|
173
|
+
# Figure out the ancestor type and ancestor
|
174
|
+
def self.get_ancestor(filename: nil, file_set:)
|
175
|
+
# In the case of a page split from a PDF, we need to know the grandparent's identifier to
|
176
|
+
# find the file(s) in the DerivativeRodeo.
|
177
|
+
if DerivativeRodeo::Generators::PdfSplitGenerator.filename_for_a_derived_page_from_a_pdf?(filename: filename)
|
178
|
+
[IiifPrint.grandparent_for(file_set), :grandparent]
|
179
|
+
else
|
180
|
+
[IiifPrint.parent_for(file_set), :parent]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
##
|
185
|
+
# @api public
|
186
|
+
#
|
187
|
+
# @note You may find yourself wanting to override this method. Please do if you find a better
|
188
|
+
# way to do this.
|
189
|
+
#
|
190
|
+
# By convention, we're putting the files of a work in a "directory" that is based on some
|
191
|
+
# identifying value (e.g. an object's AARK ID) of the work.
|
192
|
+
#
|
193
|
+
# Because we split PDFs (see {IiifPrint::SplitPdfs::DerivativeRodeoSplitter} we need to consider
|
194
|
+
# that we may be working on the PDF (and that FileSet is directly associated with the work) or
|
195
|
+
# we are working on one of the pages ripped from the PDF (and the FileSet's work is a to be
|
196
|
+
# related child work of the original work).
|
197
|
+
#
|
198
|
+
# @param file_set [FileSet]
|
199
|
+
# @param filename [String]
|
200
|
+
# @return [String] the dirname (without any "/" we hope)
|
201
|
+
# @return [NilClass] when we cannot infer a URI from the object.
|
202
|
+
# rubocop:disable Metrics/MethodLength
|
203
|
+
def self.derivative_rodeo_preprocessed_directory_for(file_set:, filename:)
|
204
|
+
ancestor, ancestor_type = get_ancestor(filename: filename, file_set: file_set)
|
205
|
+
|
206
|
+
# Why might we not have an ancestor? In the case of grandparent_for, we may not yet have run
|
207
|
+
# the create relationships job. We could sneak a peak in the table to maybe glean some insight.
|
208
|
+
# However, read further the `else` clause to see the novel approach.
|
209
|
+
# rubocop:disable Style/GuardClause
|
210
|
+
if ancestor
|
211
|
+
message = "#{self.class}.#{__method__} #{file_set.class} ID=#{file_set.id} and filename: #{filename.inspect}" \
|
212
|
+
"has #{ancestor_type} of #{ancestor.class} ID=#{ancestor.id}"
|
213
|
+
Rails.logger.info(message)
|
214
|
+
parent_work_identifier = ancestor.public_send(parent_work_identifier_property_name)
|
215
|
+
return parent_work_identifier if parent_work_identifier.present?
|
216
|
+
Rails.logger.warn("Expected #{ancestor.class} ID=#{ancestor.id} (#{ancestor_type} of #{file_set.class} ID=#{file_set.id}) " \
|
217
|
+
"to have a present #{parent_work_identifier_property_name.inspect}")
|
218
|
+
nil
|
219
|
+
else
|
220
|
+
# HACK: This makes critical assumptions about how we're creating the title for the file_set;
|
221
|
+
# but we don't have much to fall-back on. Consider making this a configurable function. Or
|
222
|
+
# perhaps this entire method should be more configurable.
|
223
|
+
# TODO: Revisit this implementation.
|
224
|
+
candidate = file_set.title.first.split(".").first
|
225
|
+
return candidate if candidate.present?
|
226
|
+
nil
|
227
|
+
end
|
228
|
+
# rubocop:enable Style/GuardClause
|
229
|
+
end
|
230
|
+
# rubocop:enable Metrics/MethodLength
|
231
|
+
|
232
|
+
def initialize(file_set)
|
233
|
+
@file_set = file_set
|
234
|
+
end
|
235
|
+
|
236
|
+
attr_reader :file_set
|
237
|
+
delegate :uri, :mime_type, to: :file_set
|
238
|
+
|
239
|
+
##
|
240
|
+
# @return
|
241
|
+
# @see https://github.com/samvera/hyrax/blob/426575a9065a5dd3b30f458f5589a0a705ad7be2/app/services/hyrax/file_set_derivatives_service.rb#L18-L20 Hyrax::FileSetDerivativesService#valid?
|
242
|
+
def valid?
|
243
|
+
if in_the_rodeo?
|
244
|
+
Rails.logger.info("Using the DerivativeRodeo for FileSet ID=#{file_set.id} with mime_type of #{mime_type}")
|
245
|
+
true
|
246
|
+
else
|
247
|
+
Rails.logger.info("Skipping the DerivativeRodeo for FileSet ID=#{file_set.id} with mime_type of #{mime_type}")
|
248
|
+
false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# @api public
|
254
|
+
#
|
255
|
+
# The file_set.class.*_mime_types are carried over from Hyrax.
|
256
|
+
#
|
257
|
+
# @note We write derivatives to the {#absolute_derivative_path_for} and should likewise clean
|
258
|
+
# them up when deleted.
|
259
|
+
# @see #cleanup_derivatives
|
260
|
+
#
|
261
|
+
# @param filename [String]
|
262
|
+
#
|
263
|
+
# @see .named_derivatives_and_generators_filter
|
264
|
+
# @see #named_derivatives_and_generators
|
265
|
+
def create_derivatives(filename)
|
266
|
+
named_derivatives_and_generators_filter
|
267
|
+
.call(file_set: file_set, filename: filename, named_derivatives_and_generators: named_derivatives_and_generators)
|
268
|
+
.flat_map do |named_derivative, generator_name|
|
269
|
+
lasso_up_some_derivatives(
|
270
|
+
named_derivative: named_derivative,
|
271
|
+
generator_name: generator_name,
|
272
|
+
filename: filename
|
273
|
+
)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
# We need to clean up the derivatives that we created.
|
278
|
+
#
|
279
|
+
# @see #create_derivatives
|
280
|
+
#
|
281
|
+
# @note Due to the configurability and plasticity of the named derivatives, it is possible that
|
282
|
+
# when we created the derivatives, we had a different configuration (e.g. were we to
|
283
|
+
# create derivatives again, we might get a set of different files). So we must ask
|
284
|
+
# ourselves, is it important to clean up all derivatives (even ones that may not be in
|
285
|
+
# scope for this service) or to clean up only those presently in scope? I am favoring
|
286
|
+
# removing all of them. In part because of the nature of the valid derivative service.
|
287
|
+
def cleanup_derivatives
|
288
|
+
## Were we to only delete the derivatives that this service presently creates, this would be
|
289
|
+
## that code:
|
290
|
+
#
|
291
|
+
# named_derivatives_and_generators.keys.each do |named_derivative|
|
292
|
+
# path = absolute_derivative_path_for(named_derivative)
|
293
|
+
# FileUtils.rm_f(path) if File.exist?(path)
|
294
|
+
# end
|
295
|
+
|
296
|
+
## Instead, let's clean it all up.
|
297
|
+
Hyrax::DerivativePath.derivatives_for_reference(file_set).each do |path|
|
298
|
+
FileUtils.rm_f(path) if File.exist?(path)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
private
|
303
|
+
|
304
|
+
def absolute_derivative_path_for(named_derivative:)
|
305
|
+
Hyrax::DerivativePath.derivative_path_for_reference(file_set, named_derivative.to_s)
|
306
|
+
end
|
307
|
+
|
308
|
+
# rubocop:disable Metrics/MethodLength
|
309
|
+
def lasso_up_some_derivatives(filename:, named_derivative:, generator_name:)
|
310
|
+
# TODO: Can we use the filename instead of the antics of the original_file on the file_set?
|
311
|
+
# We have the filename in create_derivatives.
|
312
|
+
|
313
|
+
# This is the location that Hyrax expects us to put files that will be added to Fedora.
|
314
|
+
output_location_template = "file://#{absolute_derivative_path_for(named_derivative: named_derivative)}"
|
315
|
+
|
316
|
+
# The generator knows the output extensions.
|
317
|
+
generator = generator_name.constantize
|
318
|
+
|
319
|
+
# This is the location where we hope the derivative rodeo will have generated the derived
|
320
|
+
# file (e.g. a PDF page's txt file or an image's thumbnail.
|
321
|
+
preprocessed_location_template = self.class.derivative_rodeo_uri(file_set: file_set, filename: filename, extension: generator.output_extension)
|
322
|
+
|
323
|
+
begin
|
324
|
+
generator.new(
|
325
|
+
input_uris: [input_uri],
|
326
|
+
preprocessed_location_template: preprocessed_location_template,
|
327
|
+
output_location_template: output_location_template
|
328
|
+
).generated_files.first.file_path
|
329
|
+
rescue => e
|
330
|
+
message = "#{generator}#generated_files encountered `#{e.class}' “#{e}” for " \
|
331
|
+
"input_uri: #{input_uri.inspect}, " \
|
332
|
+
"output_location_template: #{output_location_template.inspect}, and " \
|
333
|
+
"preprocessed_location_template: #{preprocessed_location_template.inspect}."
|
334
|
+
exception = RuntimeError.new(message)
|
335
|
+
exception.set_backtrace(e.backtrace)
|
336
|
+
# Why this additional logging? Because you may splice in a different logger for the
|
337
|
+
# Rodeo, and having this information might be helpful as you try to debug a very woolly
|
338
|
+
# operation.
|
339
|
+
DerivativeRodeo.logger.error(message)
|
340
|
+
raise exception
|
341
|
+
end
|
342
|
+
end
|
343
|
+
# rubocop:enable Metrics/MethodLength
|
344
|
+
|
345
|
+
def supported_mime_types
|
346
|
+
# If we've configured the rodeo
|
347
|
+
named_derivatives_and_generators_by_type.keys.flat_map { |type| file_set.class.public_send("#{type}_mime_types") }
|
348
|
+
end
|
349
|
+
|
350
|
+
# Where can we find the "original" file that we want to operate on?
|
351
|
+
#
|
352
|
+
# @return [String]
|
353
|
+
def input_uri
|
354
|
+
return @input_uri if defined?(@input_uri)
|
355
|
+
|
356
|
+
# TODO: I've built up logic to use the derivative_rodeo_uri, however what if we don't need to
|
357
|
+
# look at that location? If not there, then we need to look to the file associated with the
|
358
|
+
# file set.
|
359
|
+
# QUESTION: Should we skip using the derivative rodeo uri as a candidate for the input_uri?
|
360
|
+
input_uri = self.class.derivative_rodeo_uri(file_set: file_set)
|
361
|
+
location = DerivativeRodeo::StorageLocations::BaseLocation.from_uri(input_uri)
|
362
|
+
@input_uri = if location.exist?
|
363
|
+
input_uri
|
364
|
+
elsif file_set.import_url.present?
|
365
|
+
file_set.import_url
|
366
|
+
else
|
367
|
+
# TODO: This is the fedora URL representing the file we uploaded; is that adequate? Will we
|
368
|
+
# have access to this file?
|
369
|
+
file_set.original_file.uri.to_s
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def in_the_rodeo?
|
374
|
+
# We can assume that we are not going to have pre-processed an unsupported mime type. We
|
375
|
+
# could check if the original file is in the rodeo, but the way it's designed thee rodeo is
|
376
|
+
# capable of generating all of the enumerated derivatives (see
|
377
|
+
# .named_derivatives_and_generators_by_type) for the supported mime type.
|
378
|
+
supported_mime_types.include?(mime_type)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
# rubocop:enable Metrics/ClassLength
|
382
|
+
end
|