iiif_print 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +98 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
- data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
- data/app/models/concerns/iiif_print/solr/document.rb +14 -0
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +10 -9
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +2 -1
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
- data/lib/iiif_print/catalog_search_builder.rb +5 -1
- data/lib/iiif_print/configuration.rb +145 -8
- data/lib/iiif_print/data/fileset_helper.rb +1 -1
- data/lib/iiif_print/data/work_derivatives.rb +3 -3
- data/lib/iiif_print/engine.rb +7 -13
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
- data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
- data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/lib/iiif_print/lineage_service.rb +29 -8
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +167 -12
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/spec/factories/newspaper_page_solr_document.rb +9 -1
- data/spec/fixtures/authorities/licenses.yml +4 -0
- data/spec/fixtures/authorities/rights_statements.yml +4 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
- data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
- data/spec/iiif_print/configuration_spec.rb +141 -15
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
- data/spec/iiif_print/lineage_service_spec.rb +1 -1
- data/spec/iiif_print/metadata_spec.rb +157 -23
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
- data/spec/iiif_print_spec.rb +125 -5
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
- data/spec/samvera/derivatives/configuration_spec.rb +41 -0
- data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
- data/spec/samvera/derivatives_spec.rb +54 -0
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +123 -35
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
data/README.md
CHANGED
@@ -35,9 +35,9 @@ IiifPrint supports:
|
|
35
35
|
* OCR keyword match highlighting
|
36
36
|
* viewer with page navigation and deep zooming
|
37
37
|
* splitting of PDFs to LZW compressed TIFFs for viewing
|
38
|
-
* configuring how the manifest canvases are sorted in the viewer
|
39
38
|
* adding metadata fields to the manifest with faceted search links and external links
|
40
39
|
* excluding specified work types to be found in the catalog search
|
40
|
+
* external IIIF image urls that work with services such as serverless-iiif or cantaloup
|
41
41
|
|
42
42
|
A complete list of features can be found [here](https://github.com/scientist-softserv/iiif_print/wiki/Features-List).
|
43
43
|
|
@@ -86,15 +86,39 @@ IiifPrint easily integrates with your Hyrax 2.x applications.
|
|
86
86
|
* In `config/routes.rb`, it adds `concerns :iiif_search` in the `resources :solr_documents` block
|
87
87
|
* Adds `config/initializers/iiif_print.rb`
|
88
88
|
* Adds three migrations, `CreateIiifPrintDerivativeAttachments`, `CreateIiifPrintIngestFileRelations`, and `CreateIiifPrintPendingRelationships`
|
89
|
-
* In `solr/conf/schema.xml`, it adds Blacklight IIIF Search autocomplete config
|
90
|
-
* In `solr/conf/solrconfig.xml`, it adds Blacklight IIIF Search autocomplete config
|
91
|
-
* Adds `solr/lib/solr-tokenizing_suggester-7.x.jar`
|
92
89
|
|
93
90
|
(It may be helpful to run `git diff` after installation to see all the changes made by the installer.)
|
94
91
|
|
92
|
+
## Catalog to Universal Viewer search:
|
93
|
+
To enable a feature where the UV automatically picks up the search from the catalog, do the following:
|
94
|
+
* Add `highlight: urlDataProvider.get('q'),` into your uv.html in the `<script>` section.
|
95
|
+
```js
|
96
|
+
uv = createUV('#uv', {
|
97
|
+
root: '.',
|
98
|
+
iiifResourceUri: urlDataProvider.get('manifest'),
|
99
|
+
configUri: 'uv-config.json',
|
100
|
+
collectionIndex: Number(urlDataProvider.get('c', 0)),
|
101
|
+
manifestIndex: Number(urlDataProvider.get('m', 0)),
|
102
|
+
sequenceIndex: Number(urlDataProvider.get('s', 0)),
|
103
|
+
canvasIndex: Number(urlDataProvider.get('cv', 0)),
|
104
|
+
rangeId: urlDataProvider.get('rid', 0),
|
105
|
+
rotation: Number(urlDataProvider.get('r', 0)),
|
106
|
+
xywh: urlDataProvider.get('xywh', ''),
|
107
|
+
embedded: true,
|
108
|
+
highlight: urlDataProvider.get('q'), // <-- here's a good spot
|
109
|
+
locales: formattedLocales
|
110
|
+
}, urlDataProvider);
|
111
|
+
```
|
112
|
+
|
113
|
+
* Make sure to remove your application's `app/helpers/hyrax/iiif_helper.rb` and `app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb` (if exists)
|
114
|
+
|
95
115
|
## Configuration to enable IiifPrint features
|
96
116
|
**NOTE: WorkTypes and models are used synonymously here.**
|
97
117
|
|
118
|
+
### IIIF URL configuration
|
119
|
+
|
120
|
+
If you set EXTERNAL_IIIF_URL in your environment, then IiifPrint will use that URL as the root for your IIIF URLs. It will also switch from using the file set ID to using the SHA1 of the file as the identifier. This enables using serverless_iiif or Cantaloupe (refered to as the service) by pointing the service to the same S3 bucket that FCREPO writes the uploaded files to. By setting it up that way you do not need the service to connect to FCREPO or Hyrax at all, both natively support connecting to an S3 bucket to get their data.
|
121
|
+
|
98
122
|
### Model level configurations
|
99
123
|
|
100
124
|
In `app/models/{work_type}.rb` add `include IiifPrint.model_configuration` to any work types which require IiifPrint processing features (such as PDF splitting or OCR derivatives). See [lib/iiif_print.rb](./lib/iiif_print.rb) for details on configuration options.
|
@@ -126,10 +150,6 @@ IiifPrint.config do |config|
|
|
126
150
|
# Add configurable solr field key for searching, default key is: 'human_readable_type_sim' if
|
127
151
|
# another key is used, make sure to adjust the config.excluded_model_name_solr_field_values to match
|
128
152
|
config.excluded_model_name_solr_field_key = 'some_solr_field_key'
|
129
|
-
|
130
|
-
# Configure how the manifest sorts the canvases, by default it sorts by `:title`, but a different
|
131
|
-
# model property may be desired such as :date_published
|
132
|
-
config.sort_iiif_manifest_canvases_by = :date_published
|
133
153
|
end
|
134
154
|
```
|
135
155
|
|
@@ -146,7 +166,7 @@ TO ENABLE OCR Search (from the UV and catalog search)
|
|
146
166
|
}
|
147
167
|
end
|
148
168
|
```
|
149
|
-
* Set `config.search_builder_class = IiifPrint::CatalogSearchBuilder` to remove works from the catalog search results if `is_child_bsi: true`
|
169
|
+
* Set `config.search_builder_class = IiifPrint::CatalogSearchBuilder` to remove works from the catalog search results if `is_child_bsi: true`
|
150
170
|
* Ensure that all text search is configured in default_solr_params config block:
|
151
171
|
```rb
|
152
172
|
config.default_solr_params = {
|
@@ -156,6 +176,75 @@ TO ENABLE OCR Search (from the UV and catalog search)
|
|
156
176
|
}
|
157
177
|
```
|
158
178
|
|
179
|
+
To remove child works from recent works on homepage
|
180
|
+
### homepage_controller.rb
|
181
|
+
* In the HomepageController, change the search_builder_class to remove works from recent_documents if `is_child_bsi: true`
|
182
|
+
```rb
|
183
|
+
require "iiif_print/homepage_search_builder"
|
184
|
+
|
185
|
+
def search_builder_class
|
186
|
+
IiifPrint::HomepageSearchBuilder
|
187
|
+
end
|
188
|
+
```
|
189
|
+
|
190
|
+
### Skipping Certain File Suffixes for PDF Splitting
|
191
|
+
|
192
|
+
By default when a work is configured for splitting PDFs, we will split all PDFs. However, in some cases you don't want to split based on the file name's suffix. In that case, configure code as follows:
|
193
|
+
|
194
|
+
```ruby
|
195
|
+
IiifPrint.config do |config|
|
196
|
+
config.skip_splitting_pdf_files_that_end_with_these_texts = ['.reader.pdf']
|
197
|
+
end
|
198
|
+
```
|
199
|
+
|
200
|
+
### Derivative Rodeo Configuration
|
201
|
+
|
202
|
+
The Derivative Rodeo is used in two ways:
|
203
|
+
|
204
|
+
- Configuring the `Hyrax::DerivativeService` by adding `IiifPrint::DerivativeRodeoService`
|
205
|
+
- Enable Derivative Rodeo PDF Splitting service by `IiifPrint.model_configuration`
|
206
|
+
|
207
|
+
#### Configuring Hyrax::Derivative
|
208
|
+
|
209
|
+
In the application initializer:
|
210
|
+
|
211
|
+
```ruby
|
212
|
+
Hyrax::DerivativeService.services = [
|
213
|
+
IiifPrint::DerivativeRodeoService,
|
214
|
+
Hyrax::FileSetDerivativesService]
|
215
|
+
```
|
216
|
+
|
217
|
+
#### Enabling Derivative Rodeo PDF Splitting
|
218
|
+
|
219
|
+
The [IiifPrint.model\_configuration method](./lib/iiif_print.rb) allows for specifying the `pdf\_splitter\_service` as below:
|
220
|
+
|
221
|
+
```ruby
|
222
|
+
class Book < ActiveFedora::Base
|
223
|
+
include IiifPrint.model_configuration(
|
224
|
+
pdf_splitter_service: IiifPrint::SplitPdfs::DerivativeRodeoSplitter
|
225
|
+
)
|
226
|
+
end
|
227
|
+
```
|
228
|
+
|
229
|
+
#### Pre-Process Location
|
230
|
+
|
231
|
+
The [DerivativeRodeo](https://github.com/scientist-softserv/derivative_rodeo) allows for specifying a location where you've done pre-processing (e.g. you ran splitting and derivative generation in AWS's Lambda).
|
232
|
+
|
233
|
+
By default the preprocess location is S3, as that is where SoftServ has been running pre-processing. However that default may not be adequate for local development.
|
234
|
+
|
235
|
+
#### Conditional Derivative Generation
|
236
|
+
|
237
|
+
The [IiifPrint::DerivativeRodeoService][./app/services/iiif_print/derivative_rodeo_service.rb] provides a means of specifying the derivatives to generate via two configuration points:
|
238
|
+
|
239
|
+
- `IiifPrint::DerivativeRodeoService.named_derivatives_and_generators_by_type`
|
240
|
+
- `IiifPrint::DerivativeRodeoService.named_derivatives_and_generators_filter`
|
241
|
+
|
242
|
+
In the case of `named_derivatives_and_generators_by_type`, we're saying all mime categories will generate these derivatives.
|
243
|
+
|
244
|
+
In the case of `named_derivatives_and_generators_filter`, we're providing a point where we can specify for each file_set and filename the specific derivatives to accept/reject/append to the named derivative generation.
|
245
|
+
|
246
|
+
See their examples for further configuration guidance.
|
247
|
+
|
159
248
|
# Ingesting Content
|
160
249
|
|
161
250
|
IiifPrint supports a range of different ingest workflows:
|
data/Rakefile
CHANGED
@@ -35,4 +35,10 @@ end
|
|
35
35
|
Dir.glob('tasks/*.rake').each { |r| import r }
|
36
36
|
Dir.glob('lib/tasks/*.rake').each { |r| import r }
|
37
37
|
|
38
|
+
# Adding the copy_authorities here so it runs the same in CI
|
39
|
+
desc "Generate the engine_cart, copy authorities, and run tests"
|
40
|
+
task prepare_and_run_tests: ['engine_cart:generate', 'engine_cart:copy_authorities'] do
|
41
|
+
puts "Running CI tests"
|
42
|
+
end
|
43
|
+
|
38
44
|
task default: :ci
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# override Hyrax to remove splitting upon work delete
|
4
|
+
module IiifPrint
|
5
|
+
module Actors
|
6
|
+
# Responsible for removing FileSets related to the given curation concern.
|
7
|
+
module CleanupFileSetsActorDecorator
|
8
|
+
# @param [Hyrax::Actors::Environment] env
|
9
|
+
# @return [Boolean] true if destroy was successful
|
10
|
+
def destroy(env)
|
11
|
+
file_sets = env.curation_concern.file_sets
|
12
|
+
file_sets.each do |file_set|
|
13
|
+
# we destroy the children before the file_set, because we need the parent relationship
|
14
|
+
IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
|
15
|
+
file_set: file_set,
|
16
|
+
work: env.curation_concern
|
17
|
+
)
|
18
|
+
end
|
19
|
+
# and now back to your regularly scheduled programming
|
20
|
+
super
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -1,6 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# override to add PDF splitting for file sets
|
3
|
+
# override to add PDF splitting for file sets and remove splitting upon fileset delete
|
4
|
+
|
5
|
+
# Depending on whether we have an uploaded file or a remote url, the sequence of calling
|
6
|
+
# attach_to_work and create_content will switch.
|
4
7
|
module IiifPrint
|
5
8
|
module Actors
|
6
9
|
module FileSetActorDecorator
|
@@ -9,48 +12,47 @@ module IiifPrint
|
|
9
12
|
super
|
10
13
|
|
11
14
|
if from_url
|
12
|
-
#
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
file_locations: [file.path],
|
19
|
-
user: @user,
|
20
|
-
admin_set_id: parent.admin_set_id
|
21
|
-
)
|
22
|
-
end
|
15
|
+
# in this case, the file that came in is a temp file, and we need to use the actual file.
|
16
|
+
# the file was attached to the file_set in Hyrax::ImportUrlJob so we can just access it.
|
17
|
+
args = { file_set: file_set, file: file_set.files.first, import_url: file_set.import_url, user: @user }
|
18
|
+
returned_value = service.conditionally_enqueue(**args)
|
19
|
+
Rails.logger.info("Result of #{returned_value} for conditional enqueueing of #{args.inspect}")
|
20
|
+
true
|
23
21
|
else
|
24
22
|
# we don't have the parent yet... save the paths for later use
|
25
|
-
@
|
23
|
+
@file = file
|
26
24
|
end
|
27
25
|
end
|
28
26
|
|
29
|
-
# Prior to Hyrax v3.1.0, this method did not exist
|
30
|
-
# @param file_set [FileSet]
|
31
|
-
# @return [ActiveFedora::Base]
|
32
|
-
def parent_for(file_set:)
|
33
|
-
file_set.parent
|
34
|
-
end
|
35
|
-
|
36
27
|
# Override to add PDF splitting
|
37
28
|
def attach_to_work(work, file_set_params = {})
|
38
29
|
# Locks to ensure that only one process is operating on the list at a time.
|
39
30
|
super
|
40
31
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
32
|
+
# when we are importing a remote_url, this method is called before the file is attached.
|
33
|
+
# We want to short-circuit the process and prevent unnecessarily confusing logging.
|
34
|
+
return unless @file
|
35
|
+
|
36
|
+
args = { file_set: file_set, work: work, file: @file, user: @user }
|
37
|
+
returned_value = service.conditionally_enqueue(**args)
|
38
|
+
Rails.logger.info("Result of #{returned_value} for conditional enqueueing of #{args.inspect}")
|
39
|
+
true
|
49
40
|
end
|
50
41
|
|
51
42
|
def service
|
52
43
|
IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService
|
53
44
|
end
|
45
|
+
|
46
|
+
# Clean up children when removing the fileset
|
47
|
+
def destroy
|
48
|
+
# we destroy the children before the file_set, because we need the parent relationship
|
49
|
+
IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
|
50
|
+
file_set: file_set,
|
51
|
+
work: file_set.parent
|
52
|
+
)
|
53
|
+
# and now back to your regularly scheduled programming
|
54
|
+
super
|
55
|
+
end
|
54
56
|
end
|
55
57
|
end
|
56
58
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
# Responsible for coordinating the request to resplit a PDF.
|
3
|
+
class SplitPdfsController < ApplicationController
|
4
|
+
before_action :authenticate_user!
|
5
|
+
|
6
|
+
def create
|
7
|
+
@file_set = FileSet.where(id: params[:file_set_id]).first
|
8
|
+
authorize_create_split_request!(@file_set)
|
9
|
+
IiifPrint::Jobs::RequestSplitPdfJob.perform_later(file_set: @file_set, user: current_user)
|
10
|
+
respond_to do |wants|
|
11
|
+
wants.html { redirect_to polymorphic_path([main_app, @file_set]), notice: t("iiif_print.file_set.split_submitted", id: @file_set.id) }
|
12
|
+
wants.json { render json: { id: @file_set.id, to_param: @file_set.to_param }, status: :ok }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
##
|
19
|
+
# @param file_set [FileSet]
|
20
|
+
def authorize_create_split_request!(file_set)
|
21
|
+
# NOTE: Duplicates logic of Hyrax: https://github.com/samvera/hyrax/blob/b334e186e77691d7da8ed59ff27f091be1c2a700/app/controllers/hyrax/file_sets_controller.rb#L234-L241
|
22
|
+
#
|
23
|
+
# Namely if we don't have a file_set we need not proceed.
|
24
|
+
raise CanCan::AccessDenied unless file_set
|
25
|
+
|
26
|
+
##
|
27
|
+
# Rely on CanCan's authorize! method. We could add the :split_pdf action to the ability
|
28
|
+
# class. But we're pigging backing on the idea that you can do this if you can edit the work.
|
29
|
+
authorize!(:edit, file_set)
|
30
|
+
raise "Expected #{file_set.class} ID=#{file_set.id} #to_param=#{file_set.to_param} to be a PDF. Instead found mime_type of #{file_set.mime_type}." unless file_set.pdf?
|
31
|
+
|
32
|
+
work = IiifPrint.parent_for(file_set)
|
33
|
+
raise WorkNotConfiguredToSplitFileSetError.new(file_set: file_set, work: work) unless work&.iiif_print_config&.pdf_splitter_job&.presence
|
34
|
+
|
35
|
+
true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# OVERRIDE Hyrax v2.9.6 add #uv_search_param
|
4
|
+
|
5
|
+
module IiifPrint
|
6
|
+
module IiifHelperDecorator
|
7
|
+
def iiif_viewer_display(work_presenter, locals = {})
|
8
|
+
render iiif_viewer_display_partial(work_presenter),
|
9
|
+
locals.merge(presenter: work_presenter)
|
10
|
+
end
|
11
|
+
|
12
|
+
def iiif_viewer_display_partial(work_presenter)
|
13
|
+
'hyrax/base/iiif_viewers/' + work_presenter.iiif_viewer.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
def universal_viewer_base_url
|
17
|
+
"#{request&.base_url}#{IiifPrint.config.uv_base_path}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def universal_viewer_config_url
|
21
|
+
"#{request&.base_url}#{IiifPrint.config.uv_config_path}"
|
22
|
+
end
|
23
|
+
|
24
|
+
# Extract query param from search
|
25
|
+
def uv_search_param
|
26
|
+
search_params = current_search_session.try(:query_params) || {}
|
27
|
+
q = search_params['q'].presence || ''
|
28
|
+
|
29
|
+
"&q=#{url_encode(q)}" if q.present?
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module IiifPrint::IiifPrintHelperBehavior
|
2
|
+
##
|
3
|
+
# print the ocr snippets. if more than one, separate with <br/>
|
4
|
+
#
|
5
|
+
# @param options [Hash] options hash provided by Blacklight
|
6
|
+
# @return [String] snippets HTML to be rendered
|
7
|
+
# rubocop:disable Rails/OutputSafety
|
8
|
+
def render_ocr_snippets(options = {})
|
9
|
+
snippets = options[:value]
|
10
|
+
return if snippets.blank?
|
11
|
+
|
12
|
+
snippets_content = [content_tag('div',
|
13
|
+
"... #{snippets.first} ...".html_safe,
|
14
|
+
class: 'ocr_snippet first_snippet')]
|
15
|
+
if snippets.length > 1
|
16
|
+
snippets_content << render(partial: 'catalog/snippets_more',
|
17
|
+
locals: { snippets: snippets.drop(1),
|
18
|
+
options: options })
|
19
|
+
end
|
20
|
+
snippets_content.join("\n").html_safe
|
21
|
+
end
|
22
|
+
# rubocop:enable Rails/OutputSafety
|
23
|
+
end
|
@@ -41,24 +41,4 @@ module IiifPrintHelper
|
|
41
41
|
end
|
42
42
|
hl_matches.uniq.sort.join(' ')
|
43
43
|
end
|
44
|
-
|
45
|
-
##
|
46
|
-
# print the ocr snippets. if more than one, separate with <br/>
|
47
|
-
#
|
48
|
-
# @param options [Hash] options hash provided by Blacklight
|
49
|
-
# @return [String] snippets HTML to be rendered
|
50
|
-
# rubocop:disable Rails/OutputSafety
|
51
|
-
def render_ocr_snippets(options = {})
|
52
|
-
snippets = options[:value]
|
53
|
-
snippets_content = [content_tag('div',
|
54
|
-
"... #{snippets.first} ...".html_safe,
|
55
|
-
class: 'ocr_snippet first_snippet')]
|
56
|
-
if snippets.length > 1
|
57
|
-
snippets_content << render(partial: 'catalog/snippets_more',
|
58
|
-
locals: { snippets: snippets.drop(1),
|
59
|
-
options: options })
|
60
|
-
end
|
61
|
-
snippets_content.join("\n").html_safe
|
62
|
-
end
|
63
|
-
# rubocop:enable Rails/OutputSafety
|
64
44
|
end
|
@@ -19,15 +19,21 @@ module IiifPrint
|
|
19
19
|
indexer.prepend(self)
|
20
20
|
indexer.class_attribute(:iiif_print_lineage_service, default: IiifPrint::LineageService)
|
21
21
|
end
|
22
|
-
work_type::GeneratedResourceSchema.send(:include, IiifPrint::SetChildFlag)
|
22
|
+
work_type::GeneratedResourceSchema.send(:include, IiifPrint::SetChildFlag) if work_type.const_defined?(:GeneratedResourceSchema)
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
26
|
def generate_solr_document
|
27
27
|
super.tap do |solr_doc|
|
28
|
-
solr_doc['is_child_bsi']
|
28
|
+
solr_doc['is_child_bsi'] ||= object.is_child
|
29
|
+
solr_doc['split_from_pdf_id_ssi'] ||= object.split_from_pdf_id
|
29
30
|
solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object)
|
30
|
-
|
31
|
+
|
32
|
+
# Due to a long-standing hack in Hyrax, the file_set_ids_ssim contains both file_set_ids and
|
33
|
+
# child work ids.
|
34
|
+
#
|
35
|
+
# See https://github.com/samvera/hyrax/blob/2b807fe101176d594129ef8a8fe466d3d03a372b/app/indexers/hyrax/work_indexer.rb#L15-L18
|
36
|
+
solr_doc['file_set_ids_ssim'] = iiif_print_lineage_service.descendent_member_ids_for(object)
|
31
37
|
end
|
32
38
|
end
|
33
39
|
end
|
@@ -19,11 +19,24 @@ module IiifPrint
|
|
19
19
|
# only UV viewable images should have is_page_of, it is only used for iiif search
|
20
20
|
solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object) if object.mime_type&.match(/image/)
|
21
21
|
# index for full text search
|
22
|
-
|
23
|
-
|
24
|
-
solr_doc['
|
25
|
-
solr_doc['all_text_tsimv'] = text
|
22
|
+
solr_doc['all_text_timv'] = all_text
|
23
|
+
solr_doc['all_text_tsimv'] = all_text
|
24
|
+
solr_doc['digest_ssim'] = digest_from_content
|
26
25
|
end
|
27
26
|
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def digest_from_content
|
31
|
+
return unless object.original_file
|
32
|
+
object.original_file.digest.first.to_s
|
33
|
+
end
|
34
|
+
|
35
|
+
def all_text
|
36
|
+
text = IiifPrint.config.all_text_generator_function.call(object: object) || ''
|
37
|
+
return text if text.empty?
|
38
|
+
|
39
|
+
text.tr("\n", ' ').squeeze(' ')
|
40
|
+
end
|
28
41
|
end
|
29
42
|
end
|
@@ -4,6 +4,10 @@ module RDF
|
|
4
4
|
class CustomIsChildTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/')
|
5
5
|
property 'is_child'
|
6
6
|
end
|
7
|
+
|
8
|
+
class FromPdfIdTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/')
|
9
|
+
property 'split_from_pdf_id'
|
10
|
+
end
|
7
11
|
end
|
8
12
|
|
9
13
|
module IiifPrint
|
@@ -18,6 +22,11 @@ module IiifPrint
|
|
18
22
|
multiple: false do |index|
|
19
23
|
index.as :stored_searchable
|
20
24
|
end
|
25
|
+
property :split_from_pdf_id,
|
26
|
+
predicate: ::RDF::FromPdfIdTerm.split_from_pdf_id,
|
27
|
+
multiple: false do |index|
|
28
|
+
index.as :stored_searchable
|
29
|
+
end
|
21
30
|
end
|
22
31
|
|
23
32
|
def set_children
|
@@ -19,6 +19,8 @@ module IiifPrint::Solr::Document
|
|
19
19
|
def self.decorate(base)
|
20
20
|
base.prepend(self)
|
21
21
|
base.send(:attribute, :is_child, Hyrax::SolrDocument::Metadata::Solr::String, 'is_child_bsi')
|
22
|
+
base.send(:attribute, :split_from_pdf_id, Hyrax::SolrDocument::Metadata::Solr::String, 'split_from_pdf_id_ssi')
|
23
|
+
base.send(:attribute, :digest, Hyrax::SolrDocument::Metadata::Solr::String, 'digest_ssim')
|
22
24
|
|
23
25
|
# @note These properties came from the newspaper_works gem. They are configurable.
|
24
26
|
base.class_attribute :iiif_print_solr_field_names, default: %w[alternative_title genre
|
@@ -31,6 +33,10 @@ module IiifPrint::Solr::Document
|
|
31
33
|
base
|
32
34
|
end
|
33
35
|
|
36
|
+
def digest_sha1
|
37
|
+
digest[/urn:sha1:([\w]+)/, 1]
|
38
|
+
end
|
39
|
+
|
34
40
|
def method_missing(method_name, *args, &block)
|
35
41
|
super unless iiif_print_solr_field_names.include? method_name.to_s
|
36
42
|
self[::ActiveFedora.index_field_mapper.solr_name(method_name.to_s)]
|
@@ -44,4 +50,12 @@ module IiifPrint::Solr::Document
|
|
44
50
|
def file_set_ids
|
45
51
|
self['file_set_ids_ssim']
|
46
52
|
end
|
53
|
+
|
54
|
+
def any_highlighting?
|
55
|
+
response&.[]('highlighting')&.[](id)&.present?
|
56
|
+
end
|
57
|
+
|
58
|
+
def solr_document
|
59
|
+
self
|
60
|
+
end
|
47
61
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# OVERRIDE: Blacklight IIIF Search v1.0.0
|
4
|
+
# IiifSearchDecorator module extends the functionality of the BlacklightIiifSearch::IiifSearch class
|
5
|
+
# by overriding the solr_params method to modify the search query to include the parent's metadata.
|
6
|
+
module IiifPrint
|
7
|
+
module IiifSearchDecorator
|
8
|
+
##
|
9
|
+
# Overrides the solr_params method from BlacklightIiifSearch::IiifSearch to modify the search query.
|
10
|
+
# The method adds an additional filter to the query to include either the object_relation_field OR the
|
11
|
+
# parent document's id and removes the :f parameter from the query.
|
12
|
+
# :object_relation_field refers to the CatalogController's configuration which is typically set to
|
13
|
+
# 'is_page_of_ssim' in the host application which only searches child works by default.
|
14
|
+
#
|
15
|
+
# config.iiif_search = {
|
16
|
+
# full_text_field: 'all_text_tsimv',
|
17
|
+
# object_relation_field: 'is_page_of_ssim',
|
18
|
+
# supported_params: %w[q page],
|
19
|
+
# autocomplete_handler: 'iiif_suggest',
|
20
|
+
# suggester_name: 'iiifSuggester'
|
21
|
+
# }
|
22
|
+
#
|
23
|
+
# @return [Hash] A hash containing the modified Solr search parameters
|
24
|
+
#
|
25
|
+
def solr_params
|
26
|
+
return { q: 'nil:nil' } unless q
|
27
|
+
|
28
|
+
{
|
29
|
+
q: "#{q} AND (#{iiif_config[:object_relation_field]}:\"#{parent_document.id}\" OR id:\"#{parent_document.id}\")",
|
30
|
+
rows: rows,
|
31
|
+
page: page
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -4,14 +4,37 @@ module IiifPrint
|
|
4
4
|
# @see https://github.com/scientist-softserv/louisville-hyku/commit/67467e5cf9fdb755f54419f17d3c24c87032d0af
|
5
5
|
def annotation_list
|
6
6
|
json_results = super
|
7
|
-
|
7
|
+
|
8
|
+
# Check and process invalid hit
|
9
|
+
if json_results&.[]('resources')
|
10
|
+
remove_invalid_hit(json_results)
|
11
|
+
add_metadata_match(json_results)
|
12
|
+
end
|
13
|
+
|
14
|
+
json_results
|
15
|
+
end
|
16
|
+
|
17
|
+
def remove_invalid_hit(json_results)
|
18
|
+
resources = json_results['resources']
|
19
|
+
invalid_hit = resources.detect { |resource| resource["on"].include?(IiifPrint::BlacklightIiifSearch::AnnotationDecorator::INVALID_MATCH_TEXT) }
|
20
|
+
return unless invalid_hit
|
21
|
+
|
22
|
+
# Delete invalid hit from resources, remove first hit (which is from the invalid hit), decrement total within
|
23
|
+
resources.delete(invalid_hit)
|
24
|
+
json_results['hits'].shift
|
25
|
+
json_results['within']['total'] -= 1
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_metadata_match(json_results)
|
29
|
+
json_results['resources'].each do |result_hit|
|
8
30
|
next if result_hit['resource'].present?
|
31
|
+
|
32
|
+
# Add resource details if not present
|
9
33
|
result_hit['resource'] = {
|
10
34
|
"@type": "cnt:ContentAsText",
|
11
35
|
"chars": "Metadata match, see sidebar for details"
|
12
36
|
}
|
13
37
|
end
|
14
|
-
json_results
|
15
38
|
end
|
16
39
|
end
|
17
40
|
end
|
@@ -3,5 +3,8 @@ module IiifPrint
|
|
3
3
|
validates :parent_id, presence: true
|
4
4
|
validates :child_title, presence: true
|
5
5
|
validates :child_order, presence: true
|
6
|
+
validates :parent_model, presence: true
|
7
|
+
validates :child_model, presence: true
|
8
|
+
validates :file_id, presence: true
|
6
9
|
end
|
7
10
|
end
|