iiif_print 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +102 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
- data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
- data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
- data/app/models/concerns/iiif_print/solr/document.rb +19 -3
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +11 -10
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +14 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
- data/lib/iiif_print/catalog_search_builder.rb +7 -3
- data/lib/iiif_print/configuration.rb +205 -8
- data/lib/iiif_print/data/fileset_helper.rb +3 -3
- data/lib/iiif_print/data/work_derivatives.rb +4 -4
- data/lib/iiif_print/engine.rb +53 -15
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +47 -13
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +210 -20
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +111 -196
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
- data/bin/rails +0 -13
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
- data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -12
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -67
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -115
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -51
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
@@ -1,7 +1,38 @@
|
|
1
1
|
module IiifPrint
|
2
|
+
# rubocop:disable Metrics/ClassLength
|
2
3
|
class Configuration
|
3
4
|
attr_writer :after_create_fileset_handler
|
4
5
|
|
6
|
+
attr_writer :ingest_queue_name
|
7
|
+
##
|
8
|
+
# @return [Symbol, Proc]
|
9
|
+
def ingest_queue_name
|
10
|
+
return @ingest_queue_name if @ingest_queue_name.present?
|
11
|
+
if defined?(Hyrax)
|
12
|
+
Hyrax.config.ingest_queue_name
|
13
|
+
elsif defined?(Bulkrax) && Bulkrax.config.respond_to?(:ingest_queue_name)
|
14
|
+
Bulkrax.config.ingest_queue_name
|
15
|
+
else
|
16
|
+
:ingest
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_writer :persistence_adapter
|
21
|
+
def persistence_adapter
|
22
|
+
@persistence_adapter || default_persistence_adapter
|
23
|
+
end
|
24
|
+
|
25
|
+
def default_persistence_adapter
|
26
|
+
# There's probably some configuration of Hyrax we could use to better refine this; but it's
|
27
|
+
# likely a reasonable guess. The main goal is to not break existing implementations and
|
28
|
+
# maintain an upgrade path.
|
29
|
+
if Gem::Version.new(Hyrax::VERSION) >= Gem::Version.new('6.0.0')
|
30
|
+
IiifPrint::PersistenceLayer::ValkyrieAdapter
|
31
|
+
else
|
32
|
+
IiifPrint::PersistenceLayer::ActiveFedoraAdapter
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
5
36
|
# @param file_set [FileSet]
|
6
37
|
# @param user [User]
|
7
38
|
def handle_after_create_fileset(file_set, user)
|
@@ -12,6 +43,17 @@ module IiifPrint
|
|
12
43
|
end
|
13
44
|
end
|
14
45
|
|
46
|
+
attr_writer :ancestory_identifier_function
|
47
|
+
# The function, with arity 1, that receives a work and returns it's identifier (as a string) for
|
48
|
+
# the purposes of object ancestry.
|
49
|
+
#
|
50
|
+
# @return [Proc]
|
51
|
+
def ancestory_identifier_function
|
52
|
+
# If the work.id is nil, keep it nil. Otherwise cast that id to a string; to deal with the
|
53
|
+
# `Valkyrie::ID`.
|
54
|
+
@ancestory_identifier_function ||= ->(work) { work.id&.to_s }
|
55
|
+
end
|
56
|
+
|
15
57
|
attr_writer :excluded_model_name_solr_field_values
|
16
58
|
# By default, this uses an array of human readable types
|
17
59
|
# ex: ['Generic Work', 'Image']
|
@@ -21,6 +63,45 @@ module IiifPrint
|
|
21
63
|
@excluded_model_name_solr_field_values = []
|
22
64
|
end
|
23
65
|
|
66
|
+
def skip_splitting_pdf_files_that_end_with_these_texts=(values)
|
67
|
+
@skip_splitting_pdf_files_that_end_with_these_texts = Array.wrap(values).map(&:downcase)
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# @return [Array<String>] the file suffixes (e.g. [".reader.pdf"]) that we will skip. Per
|
72
|
+
# the implementation of {.split_for_path_suffix?}, these values are cast to
|
73
|
+
# downcase.
|
74
|
+
def skip_splitting_pdf_files_that_end_with_these_texts
|
75
|
+
@skip_splitting_pdf_files_that_end_with_these_texts || []
|
76
|
+
end
|
77
|
+
|
78
|
+
attr_writer :unique_child_title_generator_function
|
79
|
+
|
80
|
+
# The function, with keywords (though maybe you'll want to splat ignore a few), is responsible
|
81
|
+
# for generating the child work file title. of object ancestry.
|
82
|
+
#
|
83
|
+
# The keyword parameters that will be passed to this function are:
|
84
|
+
#
|
85
|
+
# :original_pdf_path - The fully qualified pathname to the original PDF from which the images
|
86
|
+
# were split.
|
87
|
+
# :image_path - The fully qualified pathname for an image of the single page from the PDF.
|
88
|
+
# :parent_work - The object in which we're "attaching" the image.
|
89
|
+
# :page_number - The image is of the N-th page_number of the original PDF
|
90
|
+
# :page_padding - A helper number that indicates the number of significant digits of pages
|
91
|
+
# (e.g. 150 pages would have a padding of 3).
|
92
|
+
#
|
93
|
+
# @return [Proc]
|
94
|
+
# rubocop:disable Lint/UnusedBlockArgument
|
95
|
+
def unique_child_title_generator_function
|
96
|
+
@unique_child_title_generator_function ||= lambda { |original_pdf_path:, image_path:, parent_work:, page_number:, page_padding:|
|
97
|
+
identifier = parent_work.id
|
98
|
+
filename = File.basename(original_pdf_path)
|
99
|
+
page_suffix = "Page #{(page_number.to_i + 1).to_s.rjust(page_padding.to_i, '0')}"
|
100
|
+
"#{identifier} - #{filename} #{page_suffix}"
|
101
|
+
}
|
102
|
+
end
|
103
|
+
# rubocop:enable Lint/UnusedBlockArgument
|
104
|
+
|
24
105
|
# This method wraps Hyrax's configuration so we can sniff out the correct method to use. The
|
25
106
|
# {Hyrax::Configuration#whitelisted_ingest_dirs} is deprecated in favor of
|
26
107
|
# {Hyrax::Configuration#registered_ingest_dirs}.
|
@@ -44,7 +125,7 @@ module IiifPrint
|
|
44
125
|
|
45
126
|
attr_writer :default_iiif_manifest_version
|
46
127
|
def default_iiif_manifest_version
|
47
|
-
@default_iiif_manifest_version || 2
|
128
|
+
@default_iiif_manifest_version.presence || 2
|
48
129
|
end
|
49
130
|
|
50
131
|
attr_writer :metadata_fields
|
@@ -81,19 +162,135 @@ module IiifPrint
|
|
81
162
|
end
|
82
163
|
# rubocop:enable Metrics/MethodLength
|
83
164
|
|
165
|
+
attr_writer :additional_tesseract_options
|
166
|
+
##
|
167
|
+
# The additional options to pass to the Tesseract configuration
|
168
|
+
#
|
169
|
+
# @see https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
|
170
|
+
# @return [String]
|
171
|
+
def additional_tesseract_options
|
172
|
+
@additional_tesseract_options || ""
|
173
|
+
end
|
174
|
+
|
175
|
+
attr_writer :uv_config_path
|
176
|
+
##
|
177
|
+
# According to https://github.com/samvera/hyrax/wiki/Hyrax-Management-Guide#universal-viewer-config
|
178
|
+
# the name of the UV config file should be /uv/uv_config.json (with an _)
|
179
|
+
# However, in most applications, it is /uv/uv-config.json (with a -)
|
180
|
+
def uv_config_path
|
181
|
+
@uv_config_path || "/uv/uv-config.json"
|
182
|
+
end
|
183
|
+
|
184
|
+
attr_writer :uv_base_path
|
185
|
+
##
|
186
|
+
# While we're at it, we're going to go ahead and make the base path configurable as well
|
187
|
+
def uv_base_path
|
188
|
+
@uv_base_path || "/uv/uv.html"
|
189
|
+
end
|
190
|
+
|
191
|
+
attr_writer :child_work_attributes_function
|
192
|
+
##
|
193
|
+
# Here we allow for customization of the child work attributes
|
194
|
+
# rubocop:disable Metrics/MethodLength, Metrics/BlockLength
|
195
|
+
def child_work_attributes_function
|
196
|
+
@child_work_attributes_function ||= lambda do |parent_work:, admin_set_id:|
|
197
|
+
embargo = parent_work.embargo
|
198
|
+
lease = parent_work.lease
|
199
|
+
embargo_params = {}
|
200
|
+
lease_params = {}
|
201
|
+
visibility_params = {}
|
202
|
+
|
203
|
+
if embargo
|
204
|
+
embargo_params = {
|
205
|
+
visibility: 'embargo',
|
206
|
+
visibility_after_embargo: embargo.visibility_after_embargo,
|
207
|
+
visibility_during_embargo: embargo.visibility_during_embargo,
|
208
|
+
embargo_release_date: embargo.embargo_release_date
|
209
|
+
}
|
210
|
+
elsif lease
|
211
|
+
lease_params = {
|
212
|
+
visibility: 'lease',
|
213
|
+
visibility_after_lease: lease.visibility_after_lease,
|
214
|
+
visibility_during_lease: lease.visibility_during_lease,
|
215
|
+
lease_release_date: lease.lease_release_date
|
216
|
+
}
|
217
|
+
else
|
218
|
+
visibility_params = { visibility: parent_work.visibility.to_s }
|
219
|
+
end
|
220
|
+
|
221
|
+
params = {
|
222
|
+
admin_set_id: admin_set_id.to_s,
|
223
|
+
creator: parent_work.creator.to_a,
|
224
|
+
rights_statement: parent_work.rights_statement.to_a,
|
225
|
+
is_child: true
|
226
|
+
}
|
227
|
+
|
228
|
+
params.merge!(embargo_params).merge!(lease_params).merge!(visibility_params)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
# rubocop:enable Metrics/MethodLength, Metrics/BlockLength
|
232
|
+
|
84
233
|
attr_writer :sort_iiif_manifest_canvases_by
|
234
|
+
##
|
235
|
+
# Normally, the canvases are sorted by the `ordered_members` association.
|
236
|
+
# However, if you want it to be sorted by another property, you can set this
|
237
|
+
# configuration. Change `nil` to something like `:title` or `:identifier`.
|
238
|
+
#
|
239
|
+
# Should you want to sort by the filename of the image, you
|
240
|
+
# set `nil` to `:label`. This looks at the canvas label, which is typically set
|
241
|
+
# to the filename of the image.
|
85
242
|
def sort_iiif_manifest_canvases_by
|
86
|
-
@sort_iiif_manifest_canvases_by ||
|
243
|
+
@sort_iiif_manifest_canvases_by || nil
|
87
244
|
end
|
88
245
|
|
89
|
-
attr_writer :
|
246
|
+
attr_writer :ocr_coords_from_json_function
|
90
247
|
##
|
91
|
-
#
|
248
|
+
# This is used to determine where to pull the OCR coordinates from. By default, it will
|
249
|
+
# pull from the JSON file that is generated by the OCR engine. However, if you have a
|
250
|
+
# different source, you can set this configuration. Current implementation has access to
|
251
|
+
# the `file_set_id`` and the `document` [SolrDocument].
|
92
252
|
#
|
93
|
-
# @see
|
94
|
-
|
95
|
-
|
96
|
-
|
253
|
+
# @see IiifPrint::BlacklightIiifSearch::AnnotationDecorator#fetch_and_parse_coords
|
254
|
+
def ocr_coords_from_json_function
|
255
|
+
@ocr_coords_from_json_function ||= lambda do |file_set_id:, **|
|
256
|
+
IiifPrint::Data::WorkDerivatives.data(from: file_set_id, of_type: 'json')
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
attr_writer :all_text_generator_function
|
261
|
+
##
|
262
|
+
# This configuration determines where to pull the full text from. By default, it will
|
263
|
+
# pull from the TXT file that is generated by the OCR engine. However, if your
|
264
|
+
# application has its own implementation of generating the full text, then you can
|
265
|
+
# set your own configuration here.
|
266
|
+
def all_text_generator_function
|
267
|
+
@all_text_generator_function ||= lambda do |object:|
|
268
|
+
IiifPrint::Data::WorkDerivatives.data(from: object, of_type: 'txt')
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
attr_writer :iiif_metadata_field_presentation_order
|
273
|
+
##
|
274
|
+
# This is the default sorter for the metadata. It will sort by the order of the keys specificied.
|
275
|
+
# By default, this is turned off as it returns nil. If you want to turn it on, you can set this
|
276
|
+
# this to an array of symbols the properties on the work.
|
277
|
+
#
|
278
|
+
# @example [:title, :description, :date_created]
|
279
|
+
# @return [Array<Symbol>]
|
280
|
+
def iiif_metadata_field_presentation_order
|
281
|
+
@iiif_metadata_field_presentation_order || nil
|
282
|
+
end
|
283
|
+
|
284
|
+
def questioning_authority_fields=(fields)
|
285
|
+
@questioning_authority_fields = Array.wrap(fields).map(&:to_s)
|
286
|
+
end
|
287
|
+
|
288
|
+
##
|
289
|
+
# This is used to explicitly set which fields should be rendered as a Questioning Authority in the UV.
|
290
|
+
# By default, we render `rights_statement` and `license` as QA fields.
|
291
|
+
def questioning_authority_fields
|
292
|
+
@questioning_authority_fields ||= ['rights_statement', 'license']
|
97
293
|
end
|
98
294
|
end
|
295
|
+
# rubocop:enable Metrics/ModuleLength
|
99
296
|
end
|
@@ -7,7 +7,7 @@ module IiifPrint
|
|
7
7
|
# if context is itself a string, presume it is a file set id
|
8
8
|
return @work if @work.is_a? String
|
9
9
|
# if context is not a String, presume a work or fileset context:
|
10
|
-
fileset
|
10
|
+
fileset&.id
|
11
11
|
end
|
12
12
|
|
13
13
|
def first_fileset
|
@@ -15,9 +15,9 @@ module IiifPrint
|
|
15
15
|
# get the fileset from that id
|
16
16
|
return FileSet.find(@work) if @work.is_a?(String)
|
17
17
|
# if "work" context is a FileSet, not actual work, return it
|
18
|
-
return @work if @work.is_a? FileSet
|
18
|
+
return @work if @work.is_a?(Hyrax::FileSet) || @work.is_a?(FileSet)
|
19
19
|
# in most cases, get from work's members:
|
20
|
-
filesets = @work.members.select { |m| m.is_a? FileSet }
|
20
|
+
filesets = @work.members.select { |m| m.is_a?(Hyrax::FileSet) || m.is_a?(FileSet) }
|
21
21
|
filesets.empty? ? nil : filesets[0]
|
22
22
|
end
|
23
23
|
end
|
@@ -42,16 +42,16 @@ module IiifPrint
|
|
42
42
|
#
|
43
43
|
# @return [String]
|
44
44
|
def self.data(from:, of_type:)
|
45
|
-
new(from).data(of_type)
|
45
|
+
new(work: from).data(of_type)
|
46
46
|
end
|
47
47
|
|
48
48
|
# alternate constructor spelling:
|
49
49
|
def self.of(work, fileset = nil, parent = nil)
|
50
|
-
new(work, fileset, parent)
|
50
|
+
new(work: work, fileset: fileset, parent: parent)
|
51
51
|
end
|
52
52
|
|
53
53
|
# Adapt work and either specific or first fileset
|
54
|
-
def initialize(work, fileset
|
54
|
+
def initialize(work: nil, fileset: nil, parent: nil)
|
55
55
|
# adapted context usually work, may be string id of FileSet
|
56
56
|
@work = work
|
57
57
|
@fileset = fileset.nil? ? first_fileset : fileset
|
@@ -239,7 +239,7 @@ module IiifPrint
|
|
239
239
|
# of the first assigned file path for single-file work.
|
240
240
|
work_file = parent
|
241
241
|
return if work_file.nil?
|
242
|
-
work_files = work_file
|
242
|
+
work_files = IiifPrint.parent_for(work_file)
|
243
243
|
return if work_files.nil?
|
244
244
|
work_files.assigned[0]
|
245
245
|
else
|
data/lib/iiif_print/engine.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'active_fedora'
|
2
2
|
require 'hyrax'
|
3
3
|
require 'blacklight_iiif_search'
|
4
|
+
require 'derivative_rodeo'
|
4
5
|
|
5
6
|
module IiifPrint
|
6
7
|
# module constants:
|
@@ -10,8 +11,18 @@ module IiifPrint
|
|
10
11
|
class Engine < ::Rails::Engine
|
11
12
|
isolate_namespace IiifPrint
|
12
13
|
|
14
|
+
config.eager_load_paths += %W[#{config.root}/app/transactions]
|
15
|
+
|
16
|
+
initializer 'requires' do
|
17
|
+
require 'hyrax/transactions/iiif_print_container_decorator'
|
18
|
+
require 'iiif_print/persistence_layer'
|
19
|
+
require 'iiif_print/persistence_layer/active_fedora_adapter' if defined?(ActiveFedora)
|
20
|
+
require 'iiif_print/persistence_layer/valkyrie_adapter' if defined?(Valkyrie)
|
21
|
+
end
|
22
|
+
|
13
23
|
# rubocop:disable Metrics/BlockLength
|
14
24
|
config.to_prepare do
|
25
|
+
require "iiif_print/jobs/create_relationships_job"
|
15
26
|
# We don't have a hard requirement of Bullkrax but in our experience, lingering on earlier
|
16
27
|
# versions can introduce bugs of both Bulkrax and some of the assumptions that we've resolved.
|
17
28
|
# Very early versions of Bulkrax do not have VERSION defined
|
@@ -35,32 +46,57 @@ module IiifPrint
|
|
35
46
|
IiifPrint::PluggableDerivativeService
|
36
47
|
)
|
37
48
|
|
49
|
+
Hyrax.publisher.subscribe(IiifPrint::Listener.new) if Hyrax.respond_to?(:publisher)
|
50
|
+
|
38
51
|
Hyrax::IiifManifestPresenter.prepend(IiifPrint::IiifManifestPresenterBehavior)
|
39
52
|
Hyrax::IiifManifestPresenter::Factory.prepend(IiifPrint::IiifManifestPresenterFactoryBehavior)
|
40
53
|
Hyrax::ManifestBuilderService.prepend(IiifPrint::ManifestBuilderServiceBehavior)
|
41
54
|
Hyrax::Renderers::FacetedAttributeRenderer.prepend(Hyrax::Renderers::FacetedAttributeRendererDecorator)
|
42
55
|
Hyrax::WorksControllerBehavior.prepend(IiifPrint::WorksControllerBehaviorDecorator)
|
56
|
+
"Hyrax::Transactions::Steps::DeleteAllFileSets".safe_constantize&.prepend(Hyrax::Transactions::Steps::DeleteAllFileSetsDecorator)
|
57
|
+
# Hyku::WorksControllerBehavior was introduced in Hyku v6.0.0+. Yes we don't depend on Hyku,
|
58
|
+
# but this allows us to do minimal Hyku antics with IiifPrint.
|
59
|
+
'Hyku::WorksControllerBehavior'.safe_constantize&.prepend(IiifPrint::WorksControllerBehaviorDecorator)
|
60
|
+
|
61
|
+
Hyrax::FileSetPresenter.prepend(IiifPrint::FileSetPresenterDecorator)
|
43
62
|
Hyrax::WorkShowPresenter.prepend(IiifPrint::WorkShowPresenterDecorator)
|
63
|
+
Hyrax::IiifHelper.prepend(IiifPrint::IiifHelperDecorator)
|
44
64
|
|
45
|
-
|
46
|
-
|
65
|
+
if ActiveModel::Type::Boolean.new.cast(ENV.fetch('HYRAX_VALKYRIE', false))
|
66
|
+
# Newer versions of Hyrax favor `Hyrax::Indexers::FileSetIndexer` and deprecate
|
67
|
+
# `Hyrax::ValkyrieFileSetIndexer`.
|
68
|
+
'Hyrax::Indexers::FileSetIndexer'.safe_constantize&.prepend(IiifPrint::FileSetIndexer)
|
47
69
|
|
48
|
-
|
49
|
-
|
50
|
-
|
70
|
+
# Versions 3.0+ of Hyrax have `Hyrax::ValkyrieFileSetIndexer` so we want to decorate that as
|
71
|
+
# well. We want to use the elsif construct because later on Hyrax::ValkyrieFileSetIndexer
|
72
|
+
# inherits from Hyrax::Indexers::FileSetIndexer and only implements:
|
73
|
+
# `def initialize(*args); super; end`
|
74
|
+
'Hyrax::ValkyrieFileSetIndexer'.safe_constantize&.prepend(IiifPrint::FileSetIndexer)
|
51
75
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
# Extending this class because there is an #ability= but not #ability and this definition
|
57
|
-
# mirrors the Hyrax::IiifManifestPresenter#ability.
|
58
|
-
module Hyrax::IiifManifestPresenter::DisplayImagePresenterDecorator
|
59
|
-
def ability
|
60
|
-
@ability ||= NullAbility.new
|
76
|
+
# Newer versions of Hyrax favor `Hyrax::Indexers::PcdmObjectIndexer` and deprecate
|
77
|
+
# `Hyrax::ValkyrieWorkIndexer`
|
78
|
+
indexers = Hyrax.config.curation_concerns.map do |concern|
|
79
|
+
"#{concern}ResourceIndexer".safe_constantize
|
61
80
|
end
|
81
|
+
indexers.each { |indexer| indexer.prepend(IiifPrint::ChildWorkIndexer) }
|
82
|
+
|
83
|
+
# Versions 3.0+ of Hyrax have `Hyrax::ValkyrieWorkIndexer` so we want to decorate that as
|
84
|
+
# well. We want to use the elsif construct because later on Hyrax::ValkyrieWorkIndexer
|
85
|
+
# inherits from Hyrax::Indexers::PcdmObjectIndexer and only implements:
|
86
|
+
# `def initialize(*args); super; end`
|
87
|
+
'Hyrax::ValkyrieWorkIndexer'.safe_constantize&.prepend(IiifPrint::ChildWorkIndexer)
|
88
|
+
else
|
89
|
+
# The ActiveFedora::Base indexer for FileSets
|
90
|
+
Hyrax::FileSetIndexer.prepend(IiifPrint::FileSetIndexer)
|
91
|
+
# The ActiveFedora::Base indexer for Works
|
92
|
+
Hyrax::WorkIndexer.prepend(IiifPrint::ChildWorkIndexer)
|
62
93
|
end
|
63
|
-
|
94
|
+
|
95
|
+
::BlacklightIiifSearch::IiifSearchResponse.prepend(IiifPrint::IiifSearchResponseDecorator)
|
96
|
+
::BlacklightIiifSearch::IiifSearchAnnotation.prepend(IiifPrint::BlacklightIiifSearch::AnnotationDecorator)
|
97
|
+
::BlacklightIiifSearch::IiifSearch.prepend(IiifPrint::IiifSearchDecorator)
|
98
|
+
Hyrax::Actors::FileSetActor.prepend(IiifPrint::Actors::FileSetActorDecorator)
|
99
|
+
Hyrax::Actors::CleanupFileSetsActor.prepend(IiifPrint::Actors::CleanupFileSetsActorDecorator)
|
64
100
|
|
65
101
|
Hyrax.config do |config|
|
66
102
|
config.callback.set(:after_create_fileset) do |file_set, user|
|
@@ -71,6 +107,8 @@ module IiifPrint
|
|
71
107
|
|
72
108
|
config.after_initialize do
|
73
109
|
IiifPrint::Solr::Document.decorate(SolrDocument)
|
110
|
+
Hyrax::IiifManifestPresenter::DisplayImagePresenter
|
111
|
+
.prepend(IiifPrint::IiifManifestPresenterBehavior::DisplayImagePresenterBehavior)
|
74
112
|
end
|
75
113
|
# rubocop:enable Metrics/BlockLength
|
76
114
|
end
|
data/lib/iiif_print/errors.rb
CHANGED
@@ -6,4 +6,22 @@ module IiifPrint
|
|
6
6
|
# Data transformation or read-error:
|
7
7
|
class DataError < IiifPrintError
|
8
8
|
end
|
9
|
+
|
10
|
+
class MissingFileError < IiifPrintError
|
11
|
+
end
|
12
|
+
|
13
|
+
class WorkNotConfiguredToSplitFileSetError < IiifPrintError
|
14
|
+
def initialize(file_set:, work:)
|
15
|
+
message = "Expected that we would be splitting #{file_set.class} ID=#{file_set&.id} #to_param=#{file_set&.to_param} " \
|
16
|
+
"for work #{work.class} ID=#{work&.id} #to_param=#{work&.to_param}. " \
|
17
|
+
"However it was not configured for PDF splitting."
|
18
|
+
super(message)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class UnexpectedMimeTypeError < IiifPrintError
|
23
|
+
def initialize(file_set:, mime_type:)
|
24
|
+
super "Unexpected mime_type #{mime_type} for #{file_set.class} ID=#{file_set.id.inspect}"
|
25
|
+
end
|
26
|
+
end
|
9
27
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Overrides Hyrax to add show_parents_only to processor chain
|
4
|
+
module IiifPrint
|
5
|
+
class HomepageSearchBuilder < Hyrax::HomepageSearchBuilder
|
6
|
+
self.default_processor_chain += [:show_parents_only]
|
7
|
+
|
8
|
+
def show_parents_only(solr_parameters)
|
9
|
+
query = if blacklight_params["include_child_works"] == 'true'
|
10
|
+
IiifPrint.solr_construct_query(is_child_bsi: 'true')
|
11
|
+
else
|
12
|
+
IiifPrint.solr_construct_query(is_child_bsi: nil)
|
13
|
+
end
|
14
|
+
solr_parameters[:fq] += [query]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -3,11 +3,10 @@ require 'tmpdir'
|
|
3
3
|
|
4
4
|
module IiifPrint
|
5
5
|
class ImageTool
|
6
|
-
attr_accessor :path
|
6
|
+
attr_accessor :path
|
7
7
|
|
8
8
|
def initialize(path)
|
9
9
|
@path = path
|
10
|
-
@ftype = magic
|
11
10
|
@metadata = nil
|
12
11
|
end
|
13
12
|
|
@@ -60,7 +59,7 @@ module IiifPrint
|
|
60
59
|
end
|
61
60
|
|
62
61
|
def im_line_select(lines, key)
|
63
|
-
line = lines.find { |l| l.scrub.downcase.strip.start_with?(key) }
|
62
|
+
line = lines.find { |l| l.scrub.downcase.strip.start_with?(key.downcase) }
|
64
63
|
# Given "key: value" line, return the value as String stripped of
|
65
64
|
# leading and trailing whitespace
|
66
65
|
return line if line.nil?
|
@@ -75,20 +74,25 @@ module IiifPrint
|
|
75
74
|
|
76
75
|
# @return [Array<String>] lines of output from imagemagick `identify`
|
77
76
|
def im_identify
|
78
|
-
cmd = "identify -
|
77
|
+
cmd = "identify -format 'Geometry: %G\nDepth: %[bit-depth]\nColorspace: %[colorspace]\nAlpha: %A\nMIME type: %m\n' #{path}"
|
79
78
|
`#{cmd}`.lines
|
80
79
|
end
|
81
80
|
|
82
81
|
def im_mime(lines)
|
83
82
|
return 'application/pdf' if pdf? # workaround older imagemagick bug
|
84
|
-
|
83
|
+
|
84
|
+
format = im_line_select(lines, 'mime type')
|
85
|
+
return if format.blank?
|
86
|
+
|
87
|
+
# `identify -format` with the `%m` switch only gives the format, we are coercing it into an image mime type
|
88
|
+
Mime::Type.lookup_by_extension(format.downcase).to_s
|
85
89
|
end
|
86
90
|
|
87
91
|
def populate_im_color!(lines, result)
|
88
92
|
bpc = im_line_select(lines, 'depth').split('-')[0].to_i # '1-bit' -> 1
|
89
93
|
colorspace = im_line_select(lines, 'colorspace')
|
90
94
|
color = colorspace == 'Gray' ? 'gray' : 'color'
|
91
|
-
has_alpha = !im_line_select(lines, '
|
95
|
+
has_alpha = !im_line_select(lines, 'alpha') == 'Undefined'
|
92
96
|
result[:num_components] = (color == 'gray' ? 1 : 3) + (has_alpha ? 1 : 0)
|
93
97
|
result[:color] = bpc == 1 ? 'monochrome' : color
|
94
98
|
result[:bits_per_component] = bpc
|
@@ -105,11 +109,11 @@ module IiifPrint
|
|
105
109
|
end
|
106
110
|
|
107
111
|
def magic
|
108
|
-
File.read(@path, 23, 0)
|
112
|
+
@magic ||= File.read(@path, 23, 0)
|
109
113
|
end
|
110
114
|
|
111
115
|
def jp2?
|
112
|
-
|
116
|
+
magic.end_with?('ftypjp2')
|
113
117
|
end
|
114
118
|
|
115
119
|
def pdf?
|
@@ -53,7 +53,10 @@ module IiifPrint
|
|
53
53
|
render_cmd = opj_command
|
54
54
|
|
55
55
|
# Run the generated command to make derivative file at @dest_path
|
56
|
-
`#{render_cmd}`
|
56
|
+
data = `#{render_cmd}`
|
57
|
+
|
58
|
+
# Create Hyrax::FileMetadata object for the derivatives (if Valkyrie)
|
59
|
+
IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
|
57
60
|
|
58
61
|
# Clean up any intermediate files or symlinks used during creation
|
59
62
|
cleanup_intermediate
|
@@ -2,7 +2,8 @@ module IiifPrint
|
|
2
2
|
# The purpose of this module is to encode lineage related services:
|
3
3
|
#
|
4
4
|
# - {.ancestor_ids_for}
|
5
|
-
# - {.
|
5
|
+
# - {.descendent_member_ids_for}
|
6
|
+
# - {.ancestor_identifier_for}
|
6
7
|
#
|
7
8
|
# The ancestor and descendent_file_sets are useful for ensuring we index together related items.
|
8
9
|
# For example, when I have a work that is a book, and one file set per page of that book, when I
|
@@ -15,27 +16,60 @@ module IiifPrint
|
|
15
16
|
#
|
16
17
|
# @param object [#in_works] An object that responds to #in_works
|
17
18
|
# @return [Array<String>]
|
19
|
+
#
|
20
|
+
# @note For those implementing their own lineage service, verify that you are not returning
|
21
|
+
# an array of
|
18
22
|
def self.ancestor_ids_for(object)
|
19
23
|
ancestor_ids ||= []
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
# Yes, we're fetching the works, then compressing those into identifiers. Because in the case
|
25
|
+
# of slugs, we need not the identifier, but the slug as the id.
|
26
|
+
IiifPrint.object_in_works(object).each do |work|
|
27
|
+
ancestor_ids << ancestry_identifier_for(work)
|
28
|
+
ancestor_ids += ancestor_ids_for(work) if work.respond_to?(:is_child) && work.is_child
|
23
29
|
end
|
24
|
-
|
30
|
+
# We must convert these to strings as Valkyrie's identifiers will be cast to hashes when we
|
31
|
+
# attempt to write the SolrDocument. Also, per documentation we return an Array of strings, not
|
32
|
+
# an Array that might include Valkyrie::ID objects.
|
33
|
+
ancestor_ids.flatten.compact.uniq.map(&:to_s)
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# @api public
|
38
|
+
#
|
39
|
+
# Given the :work return it's identifier
|
40
|
+
#
|
41
|
+
# @param [Object]
|
42
|
+
# @return [String]
|
43
|
+
def self.ancestry_identifier_for(work)
|
44
|
+
IiifPrint.config.ancestory_identifier_function.call(work)
|
25
45
|
end
|
26
46
|
|
27
47
|
##
|
28
48
|
# @param object [#ordered_works, #file_sets, #member_ids]
|
29
|
-
# @return [Array<String>] the ids of associated file sets
|
30
|
-
|
49
|
+
# @return [Array<String>] the ids of associated file sets and child works
|
50
|
+
#
|
51
|
+
# @see
|
52
|
+
# https://github.com/samvera/hyrax/blob/2b807fe101176d594129ef8a8fe466d3d03a372b/app/indexers/hyrax/work_indexer.rb#L15-L18
|
53
|
+
# for "clarification" of the comingling of file_set_ids and member_ids
|
54
|
+
def self.descendent_member_ids_for(object)
|
55
|
+
return unless object.respond_to?(:member_ids)
|
56
|
+
|
31
57
|
# enables us to return parents when searching for child OCR
|
32
|
-
|
33
|
-
|
34
|
-
|
58
|
+
#
|
59
|
+
# https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/models/concerns/work_behavior.rb#L90-L92
|
60
|
+
#
|
61
|
+
# The Hydara::Works implementation of file_set_ids is "members.select(&:file_set?).map(&:id)";
|
62
|
+
# so no sense doing `object.file_set_ids + object.member_ids`
|
63
|
+
file_set_ids = object.member_ids
|
64
|
+
IiifPrint.object_ordered_works(object)&.each do |child|
|
65
|
+
file_set_ids += Array.wrap(descendent_member_ids_for(child))
|
35
66
|
end
|
36
|
-
#
|
37
|
-
|
38
|
-
file_set_ids.flatten.uniq.compact
|
67
|
+
# We must convert these to strings as Valkyrie's identifiers will be cast to hashes when we
|
68
|
+
# attempt to write the SolrDocument.
|
69
|
+
file_set_ids.flatten.uniq.compact.map(&:to_s)
|
70
|
+
end
|
71
|
+
class << self
|
72
|
+
alias descendent_file_set_ids_for descendent_member_ids_for
|
39
73
|
end
|
40
74
|
end
|
41
75
|
end
|