iiif_print 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +98 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
  19. data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
  20. data/app/models/concerns/iiif_print/solr/document.rb +14 -0
  21. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  22. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  23. data/app/models/iiif_print/pending_relationship.rb +3 -0
  24. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  25. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  26. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
  27. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  28. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  29. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  30. data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
  31. data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
  32. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  33. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  34. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  35. data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
  36. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  37. data/config/locales/iiif_print.en.yml +4 -0
  38. data/config/routes.rb +3 -0
  39. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  40. data/docker-compose.yml +2 -2
  41. data/iiif_print.gemspec +10 -9
  42. data/lib/generators/iiif_print/install_generator.rb +21 -1
  43. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  44. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  45. data/lib/iiif_print/base_derivative_service.rb +2 -1
  46. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
  47. data/lib/iiif_print/catalog_search_builder.rb +5 -1
  48. data/lib/iiif_print/configuration.rb +145 -8
  49. data/lib/iiif_print/data/fileset_helper.rb +1 -1
  50. data/lib/iiif_print/data/work_derivatives.rb +3 -3
  51. data/lib/iiif_print/engine.rb +7 -13
  52. data/lib/iiif_print/errors.rb +18 -0
  53. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  54. data/lib/iiif_print/image_tool.rb +12 -8
  55. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
  56. data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
  57. data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  58. data/lib/iiif_print/lineage_service.rb +29 -8
  59. data/lib/iiif_print/metadata.rb +67 -48
  60. data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
  61. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
  62. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  63. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
  64. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  65. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  66. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  67. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  68. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  69. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  70. data/lib/iiif_print/version.rb +1 -1
  71. data/lib/iiif_print.rb +167 -12
  72. data/lib/samvera/derivatives/configuration.rb +83 -0
  73. data/lib/samvera/derivatives/hyrax.rb +129 -0
  74. data/lib/samvera/derivatives.rb +238 -0
  75. data/spec/factories/newspaper_page_solr_document.rb +9 -1
  76. data/spec/fixtures/authorities/licenses.yml +4 -0
  77. data/spec/fixtures/authorities/rights_statements.yml +4 -0
  78. data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
  79. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
  80. data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
  81. data/spec/iiif_print/configuration_spec.rb +141 -15
  82. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
  83. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
  84. data/spec/iiif_print/lineage_service_spec.rb +1 -1
  85. data/spec/iiif_print/metadata_spec.rb +157 -23
  86. data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
  87. data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
  88. data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
  89. data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
  90. data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
  91. data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
  92. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
  93. data/spec/iiif_print_spec.rb +125 -5
  94. data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
  95. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
  96. data/spec/samvera/derivatives/configuration_spec.rb +41 -0
  97. data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
  98. data/spec/samvera/derivatives_spec.rb +54 -0
  99. data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
  100. data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
  101. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
  102. data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
  103. data/tasks/copy_authorities_to_test_app.rake +11 -0
  104. data/tasks/iiif_print_dev.rake +4 -4
  105. metadata +123 -35
  106. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  107. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  108. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
data/README.md CHANGED
@@ -35,9 +35,9 @@ IiifPrint supports:
35
35
  * OCR keyword match highlighting
36
36
  * viewer with page navigation and deep zooming
37
37
  * splitting of PDFs to LZW compressed TIFFs for viewing
38
- * configuring how the manifest canvases are sorted in the viewer
39
38
  * adding metadata fields to the manifest with faceted search links and external links
40
39
  * excluding specified work types to be found in the catalog search
40
+ * external IIIF image urls that work with services such as serverless-iiif or cantaloup
41
41
 
42
42
  A complete list of features can be found [here](https://github.com/scientist-softserv/iiif_print/wiki/Features-List).
43
43
 
@@ -86,15 +86,39 @@ IiifPrint easily integrates with your Hyrax 2.x applications.
86
86
  * In `config/routes.rb`, it adds `concerns :iiif_search` in the `resources :solr_documents` block
87
87
  * Adds `config/initializers/iiif_print.rb`
88
88
  * Adds three migrations, `CreateIiifPrintDerivativeAttachments`, `CreateIiifPrintIngestFileRelations`, and `CreateIiifPrintPendingRelationships`
89
- * In `solr/conf/schema.xml`, it adds Blacklight IIIF Search autocomplete config
90
- * In `solr/conf/solrconfig.xml`, it adds Blacklight IIIF Search autocomplete config
91
- * Adds `solr/lib/solr-tokenizing_suggester-7.x.jar`
92
89
 
93
90
  (It may be helpful to run `git diff` after installation to see all the changes made by the installer.)
94
91
 
92
+ ## Catalog to Universal Viewer search:
93
+ To enable a feature where the UV automatically picks up the search from the catalog, do the following:
94
+ * Add `highlight: urlDataProvider.get('q'),` into your uv.html in the `<script>` section.
95
+ ```js
96
+ uv = createUV('#uv', {
97
+ root: '.',
98
+ iiifResourceUri: urlDataProvider.get('manifest'),
99
+ configUri: 'uv-config.json',
100
+ collectionIndex: Number(urlDataProvider.get('c', 0)),
101
+ manifestIndex: Number(urlDataProvider.get('m', 0)),
102
+ sequenceIndex: Number(urlDataProvider.get('s', 0)),
103
+ canvasIndex: Number(urlDataProvider.get('cv', 0)),
104
+ rangeId: urlDataProvider.get('rid', 0),
105
+ rotation: Number(urlDataProvider.get('r', 0)),
106
+ xywh: urlDataProvider.get('xywh', ''),
107
+ embedded: true,
108
+ highlight: urlDataProvider.get('q'), // <-- here's a good spot
109
+ locales: formattedLocales
110
+ }, urlDataProvider);
111
+ ```
112
+
113
+ * Make sure to remove your application's `app/helpers/hyrax/iiif_helper.rb` and `app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb` (if exists)
114
+
95
115
  ## Configuration to enable IiifPrint features
96
116
  **NOTE: WorkTypes and models are used synonymously here.**
97
117
 
118
+ ### IIIF URL configuration
119
+
120
+ If you set EXTERNAL_IIIF_URL in your environment, then IiifPrint will use that URL as the root for your IIIF URLs. It will also switch from using the file set ID to using the SHA1 of the file as the identifier. This enables using serverless_iiif or Cantaloupe (refered to as the service) by pointing the service to the same S3 bucket that FCREPO writes the uploaded files to. By setting it up that way you do not need the service to connect to FCREPO or Hyrax at all, both natively support connecting to an S3 bucket to get their data.
121
+
98
122
  ### Model level configurations
99
123
 
100
124
  In `app/models/{work_type}.rb` add `include IiifPrint.model_configuration` to any work types which require IiifPrint processing features (such as PDF splitting or OCR derivatives). See [lib/iiif_print.rb](./lib/iiif_print.rb) for details on configuration options.
@@ -126,10 +150,6 @@ IiifPrint.config do |config|
126
150
  # Add configurable solr field key for searching, default key is: 'human_readable_type_sim' if
127
151
  # another key is used, make sure to adjust the config.excluded_model_name_solr_field_values to match
128
152
  config.excluded_model_name_solr_field_key = 'some_solr_field_key'
129
-
130
- # Configure how the manifest sorts the canvases, by default it sorts by `:title`, but a different
131
- # model property may be desired such as :date_published
132
- config.sort_iiif_manifest_canvases_by = :date_published
133
153
  end
134
154
  ```
135
155
 
@@ -146,7 +166,7 @@ TO ENABLE OCR Search (from the UV and catalog search)
146
166
  }
147
167
  end
148
168
  ```
149
- * Set `config.search_builder_class = IiifPrint::CatalogSearchBuilder` to remove works from the catalog search results if `is_child_bsi: true`
169
+ * Set `config.search_builder_class = IiifPrint::CatalogSearchBuilder` to remove works from the catalog search results if `is_child_bsi: true`
150
170
  * Ensure that all text search is configured in default_solr_params config block:
151
171
  ```rb
152
172
  config.default_solr_params = {
@@ -156,6 +176,75 @@ TO ENABLE OCR Search (from the UV and catalog search)
156
176
  }
157
177
  ```
158
178
 
179
+ To remove child works from recent works on homepage
180
+ ### homepage_controller.rb
181
+ * In the HomepageController, change the search_builder_class to remove works from recent_documents if `is_child_bsi: true`
182
+ ```rb
183
+ require "iiif_print/homepage_search_builder"
184
+
185
+ def search_builder_class
186
+ IiifPrint::HomepageSearchBuilder
187
+ end
188
+ ```
189
+
190
+ ### Skipping Certain File Suffixes for PDF Splitting
191
+
192
+ By default when a work is configured for splitting PDFs, we will split all PDFs. However, in some cases you don't want to split based on the file name's suffix. In that case, configure code as follows:
193
+
194
+ ```ruby
195
+ IiifPrint.config do |config|
196
+ config.skip_splitting_pdf_files_that_end_with_these_texts = ['.reader.pdf']
197
+ end
198
+ ```
199
+
200
+ ### Derivative Rodeo Configuration
201
+
202
+ The Derivative Rodeo is used in two ways:
203
+
204
+ - Configuring the `Hyrax::DerivativeService` by adding `IiifPrint::DerivativeRodeoService`
205
+ - Enable Derivative Rodeo PDF Splitting service by `IiifPrint.model_configuration`
206
+
207
+ #### Configuring Hyrax::Derivative
208
+
209
+ In the application initializer:
210
+
211
+ ```ruby
212
+ Hyrax::DerivativeService.services = [
213
+ IiifPrint::DerivativeRodeoService,
214
+ Hyrax::FileSetDerivativesService]
215
+ ```
216
+
217
+ #### Enabling Derivative Rodeo PDF Splitting
218
+
219
+ The [IiifPrint.model\_configuration method](./lib/iiif_print.rb) allows for specifying the `pdf\_splitter\_service` as below:
220
+
221
+ ```ruby
222
+ class Book < ActiveFedora::Base
223
+ include IiifPrint.model_configuration(
224
+ pdf_splitter_service: IiifPrint::SplitPdfs::DerivativeRodeoSplitter
225
+ )
226
+ end
227
+ ```
228
+
229
+ #### Pre-Process Location
230
+
231
+ The [DerivativeRodeo](https://github.com/scientist-softserv/derivative_rodeo) allows for specifying a location where you've done pre-processing (e.g. you ran splitting and derivative generation in AWS's Lambda).
232
+
233
+ By default the preprocess location is S3, as that is where SoftServ has been running pre-processing. However that default may not be adequate for local development.
234
+
235
+ #### Conditional Derivative Generation
236
+
237
+ The [IiifPrint::DerivativeRodeoService][./app/services/iiif_print/derivative_rodeo_service.rb] provides a means of specifying the derivatives to generate via two configuration points:
238
+
239
+ - `IiifPrint::DerivativeRodeoService.named_derivatives_and_generators_by_type`
240
+ - `IiifPrint::DerivativeRodeoService.named_derivatives_and_generators_filter`
241
+
242
+ In the case of `named_derivatives_and_generators_by_type`, we're saying all mime categories will generate these derivatives.
243
+
244
+ In the case of `named_derivatives_and_generators_filter`, we're providing a point where we can specify for each file_set and filename the specific derivatives to accept/reject/append to the named derivative generation.
245
+
246
+ See their examples for further configuration guidance.
247
+
159
248
  # Ingesting Content
160
249
 
161
250
  IiifPrint supports a range of different ingest workflows:
data/Rakefile CHANGED
@@ -35,4 +35,10 @@ end
35
35
  Dir.glob('tasks/*.rake').each { |r| import r }
36
36
  Dir.glob('lib/tasks/*.rake').each { |r| import r }
37
37
 
38
+ # Adding the copy_authorities here so it runs the same in CI
39
+ desc "Generate the engine_cart, copy authorities, and run tests"
40
+ task prepare_and_run_tests: ['engine_cart:generate', 'engine_cart:copy_authorities'] do
41
+ puts "Running CI tests"
42
+ end
43
+
38
44
  task default: :ci
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ # override Hyrax to remove splitting upon work delete
4
+ module IiifPrint
5
+ module Actors
6
+ # Responsible for removing FileSets related to the given curation concern.
7
+ module CleanupFileSetsActorDecorator
8
+ # @param [Hyrax::Actors::Environment] env
9
+ # @return [Boolean] true if destroy was successful
10
+ def destroy(env)
11
+ file_sets = env.curation_concern.file_sets
12
+ file_sets.each do |file_set|
13
+ # we destroy the children before the file_set, because we need the parent relationship
14
+ IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
15
+ file_set: file_set,
16
+ work: env.curation_concern
17
+ )
18
+ end
19
+ # and now back to your regularly scheduled programming
20
+ super
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # override to add PDF splitting for file sets
3
+ # override to add PDF splitting for file sets and remove splitting upon fileset delete
4
+
5
+ # Depending on whether we have an uploaded file or a remote url, the sequence of calling
6
+ # attach_to_work and create_content will switch.
4
7
  module IiifPrint
5
8
  module Actors
6
9
  module FileSetActorDecorator
@@ -9,48 +12,47 @@ module IiifPrint
9
12
  super
10
13
 
11
14
  if from_url
12
- # we have everything we need... queue the job
13
- parent = parent_for(file_set: @file_set)
14
-
15
- if service.iiif_print_split?(work: parent) && service.pdfs?(paths: [file_set.import_url])
16
- service.queue_job(
17
- work: parent,
18
- file_locations: [file.path],
19
- user: @user,
20
- admin_set_id: parent.admin_set_id
21
- )
22
- end
15
+ # in this case, the file that came in is a temp file, and we need to use the actual file.
16
+ # the file was attached to the file_set in Hyrax::ImportUrlJob so we can just access it.
17
+ args = { file_set: file_set, file: file_set.files.first, import_url: file_set.import_url, user: @user }
18
+ returned_value = service.conditionally_enqueue(**args)
19
+ Rails.logger.info("Result of #{returned_value} for conditional enqueueing of #{args.inspect}")
20
+ true
23
21
  else
24
22
  # we don't have the parent yet... save the paths for later use
25
- @pdf_paths = service.pdf_paths(files: [file.id.to_s])
23
+ @file = file
26
24
  end
27
25
  end
28
26
 
29
- # Prior to Hyrax v3.1.0, this method did not exist
30
- # @param file_set [FileSet]
31
- # @return [ActiveFedora::Base]
32
- def parent_for(file_set:)
33
- file_set.parent
34
- end
35
-
36
27
  # Override to add PDF splitting
37
28
  def attach_to_work(work, file_set_params = {})
38
29
  # Locks to ensure that only one process is operating on the list at a time.
39
30
  super
40
31
 
41
- return if @pdf_paths.blank?
42
- return unless service.iiif_print_split?(work: work)
43
- service.queue_job(
44
- work: work,
45
- file_locations: @pdf_paths,
46
- user: @user,
47
- admin_set_id: work.admin_set_id
48
- )
32
+ # when we are importing a remote_url, this method is called before the file is attached.
33
+ # We want to short-circuit the process and prevent unnecessarily confusing logging.
34
+ return unless @file
35
+
36
+ args = { file_set: file_set, work: work, file: @file, user: @user }
37
+ returned_value = service.conditionally_enqueue(**args)
38
+ Rails.logger.info("Result of #{returned_value} for conditional enqueueing of #{args.inspect}")
39
+ true
49
40
  end
50
41
 
51
42
  def service
52
43
  IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService
53
44
  end
45
+
46
+ # Clean up children when removing the fileset
47
+ def destroy
48
+ # we destroy the children before the file_set, because we need the parent relationship
49
+ IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
50
+ file_set: file_set,
51
+ work: file_set.parent
52
+ )
53
+ # and now back to your regularly scheduled programming
54
+ super
55
+ end
54
56
  end
55
57
  end
56
58
  end
@@ -0,0 +1,38 @@
1
+ module IiifPrint
2
+ # Responsible for coordinating the request to resplit a PDF.
3
+ class SplitPdfsController < ApplicationController
4
+ before_action :authenticate_user!
5
+
6
+ def create
7
+ @file_set = FileSet.where(id: params[:file_set_id]).first
8
+ authorize_create_split_request!(@file_set)
9
+ IiifPrint::Jobs::RequestSplitPdfJob.perform_later(file_set: @file_set, user: current_user)
10
+ respond_to do |wants|
11
+ wants.html { redirect_to polymorphic_path([main_app, @file_set]), notice: t("iiif_print.file_set.split_submitted", id: @file_set.id) }
12
+ wants.json { render json: { id: @file_set.id, to_param: @file_set.to_param }, status: :ok }
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ ##
19
+ # @param file_set [FileSet]
20
+ def authorize_create_split_request!(file_set)
21
+ # NOTE: Duplicates logic of Hyrax: https://github.com/samvera/hyrax/blob/b334e186e77691d7da8ed59ff27f091be1c2a700/app/controllers/hyrax/file_sets_controller.rb#L234-L241
22
+ #
23
+ # Namely if we don't have a file_set we need not proceed.
24
+ raise CanCan::AccessDenied unless file_set
25
+
26
+ ##
27
+ # Rely on CanCan's authorize! method. We could add the :split_pdf action to the ability
28
+ # class. But we're pigging backing on the idea that you can do this if you can edit the work.
29
+ authorize!(:edit, file_set)
30
+ raise "Expected #{file_set.class} ID=#{file_set.id} #to_param=#{file_set.to_param} to be a PDF. Instead found mime_type of #{file_set.mime_type}." unless file_set.pdf?
31
+
32
+ work = IiifPrint.parent_for(file_set)
33
+ raise WorkNotConfiguredToSplitFileSetError.new(file_set: file_set, work: work) unless work&.iiif_print_config&.pdf_splitter_job&.presence
34
+
35
+ true
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ # OVERRIDE Hyrax v2.9.6 add #uv_search_param
4
+
5
+ module IiifPrint
6
+ module IiifHelperDecorator
7
+ def iiif_viewer_display(work_presenter, locals = {})
8
+ render iiif_viewer_display_partial(work_presenter),
9
+ locals.merge(presenter: work_presenter)
10
+ end
11
+
12
+ def iiif_viewer_display_partial(work_presenter)
13
+ 'hyrax/base/iiif_viewers/' + work_presenter.iiif_viewer.to_s
14
+ end
15
+
16
+ def universal_viewer_base_url
17
+ "#{request&.base_url}#{IiifPrint.config.uv_base_path}"
18
+ end
19
+
20
+ def universal_viewer_config_url
21
+ "#{request&.base_url}#{IiifPrint.config.uv_config_path}"
22
+ end
23
+
24
+ # Extract query param from search
25
+ def uv_search_param
26
+ search_params = current_search_session.try(:query_params) || {}
27
+ q = search_params['q'].presence || ''
28
+
29
+ "&q=#{url_encode(q)}" if q.present?
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,23 @@
1
+ module IiifPrint::IiifPrintHelperBehavior
2
+ ##
3
+ # print the ocr snippets. if more than one, separate with <br/>
4
+ #
5
+ # @param options [Hash] options hash provided by Blacklight
6
+ # @return [String] snippets HTML to be rendered
7
+ # rubocop:disable Rails/OutputSafety
8
+ def render_ocr_snippets(options = {})
9
+ snippets = options[:value]
10
+ return if snippets.blank?
11
+
12
+ snippets_content = [content_tag('div',
13
+ "... #{snippets.first} ...".html_safe,
14
+ class: 'ocr_snippet first_snippet')]
15
+ if snippets.length > 1
16
+ snippets_content << render(partial: 'catalog/snippets_more',
17
+ locals: { snippets: snippets.drop(1),
18
+ options: options })
19
+ end
20
+ snippets_content.join("\n").html_safe
21
+ end
22
+ # rubocop:enable Rails/OutputSafety
23
+ end
@@ -41,24 +41,4 @@ module IiifPrintHelper
41
41
  end
42
42
  hl_matches.uniq.sort.join(' ')
43
43
  end
44
-
45
- ##
46
- # print the ocr snippets. if more than one, separate with <br/>
47
- #
48
- # @param options [Hash] options hash provided by Blacklight
49
- # @return [String] snippets HTML to be rendered
50
- # rubocop:disable Rails/OutputSafety
51
- def render_ocr_snippets(options = {})
52
- snippets = options[:value]
53
- snippets_content = [content_tag('div',
54
- "... #{snippets.first} ...".html_safe,
55
- class: 'ocr_snippet first_snippet')]
56
- if snippets.length > 1
57
- snippets_content << render(partial: 'catalog/snippets_more',
58
- locals: { snippets: snippets.drop(1),
59
- options: options })
60
- end
61
- snippets_content.join("\n").html_safe
62
- end
63
- # rubocop:enable Rails/OutputSafety
64
44
  end
@@ -19,15 +19,21 @@ module IiifPrint
19
19
  indexer.prepend(self)
20
20
  indexer.class_attribute(:iiif_print_lineage_service, default: IiifPrint::LineageService)
21
21
  end
22
- work_type::GeneratedResourceSchema.send(:include, IiifPrint::SetChildFlag)
22
+ work_type::GeneratedResourceSchema.send(:include, IiifPrint::SetChildFlag) if work_type.const_defined?(:GeneratedResourceSchema)
23
23
  end
24
24
  end
25
25
 
26
26
  def generate_solr_document
27
27
  super.tap do |solr_doc|
28
- solr_doc['is_child_bsi'] = object.is_child
28
+ solr_doc['is_child_bsi'] ||= object.is_child
29
+ solr_doc['split_from_pdf_id_ssi'] ||= object.split_from_pdf_id
29
30
  solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object)
30
- solr_doc['file_set_ids_ssim'] = iiif_print_lineage_service.descendent_file_set_ids_for(object)
31
+
32
+ # Due to a long-standing hack in Hyrax, the file_set_ids_ssim contains both file_set_ids and
33
+ # child work ids.
34
+ #
35
+ # See https://github.com/samvera/hyrax/blob/2b807fe101176d594129ef8a8fe466d3d03a372b/app/indexers/hyrax/work_indexer.rb#L15-L18
36
+ solr_doc['file_set_ids_ssim'] = iiif_print_lineage_service.descendent_member_ids_for(object)
31
37
  end
32
38
  end
33
39
  end
@@ -19,11 +19,24 @@ module IiifPrint
19
19
  # only UV viewable images should have is_page_of, it is only used for iiif search
20
20
  solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object) if object.mime_type&.match(/image/)
21
21
  # index for full text search
22
- text = IiifPrint::Data::WorkDerivatives.data(from: object, of_type: 'txt')
23
- text = text.tr("\n", ' ').squeeze(' ')
24
- solr_doc['all_text_timv'] = text
25
- solr_doc['all_text_tsimv'] = text
22
+ solr_doc['all_text_timv'] = all_text
23
+ solr_doc['all_text_tsimv'] = all_text
24
+ solr_doc['digest_ssim'] = digest_from_content
26
25
  end
27
26
  end
27
+
28
+ private
29
+
30
+ def digest_from_content
31
+ return unless object.original_file
32
+ object.original_file.digest.first.to_s
33
+ end
34
+
35
+ def all_text
36
+ text = IiifPrint.config.all_text_generator_function.call(object: object) || ''
37
+ return text if text.empty?
38
+
39
+ text.tr("\n", ' ').squeeze(' ')
40
+ end
28
41
  end
29
42
  end
@@ -4,6 +4,10 @@ module RDF
4
4
  class CustomIsChildTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/')
5
5
  property 'is_child'
6
6
  end
7
+
8
+ class FromPdfIdTerm < Vocabulary('http://id.loc.gov/vocabulary/identifiers/')
9
+ property 'split_from_pdf_id'
10
+ end
7
11
  end
8
12
 
9
13
  module IiifPrint
@@ -18,6 +22,11 @@ module IiifPrint
18
22
  multiple: false do |index|
19
23
  index.as :stored_searchable
20
24
  end
25
+ property :split_from_pdf_id,
26
+ predicate: ::RDF::FromPdfIdTerm.split_from_pdf_id,
27
+ multiple: false do |index|
28
+ index.as :stored_searchable
29
+ end
21
30
  end
22
31
 
23
32
  def set_children
@@ -19,6 +19,8 @@ module IiifPrint::Solr::Document
19
19
  def self.decorate(base)
20
20
  base.prepend(self)
21
21
  base.send(:attribute, :is_child, Hyrax::SolrDocument::Metadata::Solr::String, 'is_child_bsi')
22
+ base.send(:attribute, :split_from_pdf_id, Hyrax::SolrDocument::Metadata::Solr::String, 'split_from_pdf_id_ssi')
23
+ base.send(:attribute, :digest, Hyrax::SolrDocument::Metadata::Solr::String, 'digest_ssim')
22
24
 
23
25
  # @note These properties came from the newspaper_works gem. They are configurable.
24
26
  base.class_attribute :iiif_print_solr_field_names, default: %w[alternative_title genre
@@ -31,6 +33,10 @@ module IiifPrint::Solr::Document
31
33
  base
32
34
  end
33
35
 
36
+ def digest_sha1
37
+ digest[/urn:sha1:([\w]+)/, 1]
38
+ end
39
+
34
40
  def method_missing(method_name, *args, &block)
35
41
  super unless iiif_print_solr_field_names.include? method_name.to_s
36
42
  self[::ActiveFedora.index_field_mapper.solr_name(method_name.to_s)]
@@ -44,4 +50,12 @@ module IiifPrint::Solr::Document
44
50
  def file_set_ids
45
51
  self['file_set_ids_ssim']
46
52
  end
53
+
54
+ def any_highlighting?
55
+ response&.[]('highlighting')&.[](id)&.present?
56
+ end
57
+
58
+ def solr_document
59
+ self
60
+ end
47
61
  end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # OVERRIDE: Blacklight IIIF Search v1.0.0
4
+ # IiifSearchDecorator module extends the functionality of the BlacklightIiifSearch::IiifSearch class
5
+ # by overriding the solr_params method to modify the search query to include the parent's metadata.
6
+ module IiifPrint
7
+ module IiifSearchDecorator
8
+ ##
9
+ # Overrides the solr_params method from BlacklightIiifSearch::IiifSearch to modify the search query.
10
+ # The method adds an additional filter to the query to include either the object_relation_field OR the
11
+ # parent document's id and removes the :f parameter from the query.
12
+ # :object_relation_field refers to the CatalogController's configuration which is typically set to
13
+ # 'is_page_of_ssim' in the host application which only searches child works by default.
14
+ #
15
+ # config.iiif_search = {
16
+ # full_text_field: 'all_text_tsimv',
17
+ # object_relation_field: 'is_page_of_ssim',
18
+ # supported_params: %w[q page],
19
+ # autocomplete_handler: 'iiif_suggest',
20
+ # suggester_name: 'iiifSuggester'
21
+ # }
22
+ #
23
+ # @return [Hash] A hash containing the modified Solr search parameters
24
+ #
25
+ def solr_params
26
+ return { q: 'nil:nil' } unless q
27
+
28
+ {
29
+ q: "#{q} AND (#{iiif_config[:object_relation_field]}:\"#{parent_document.id}\" OR id:\"#{parent_document.id}\")",
30
+ rows: rows,
31
+ page: page
32
+ }
33
+ end
34
+ end
35
+ end
@@ -4,14 +4,37 @@ module IiifPrint
4
4
  # @see https://github.com/scientist-softserv/louisville-hyku/commit/67467e5cf9fdb755f54419f17d3c24c87032d0af
5
5
  def annotation_list
6
6
  json_results = super
7
- json_results&.[]('resources')&.each do |result_hit|
7
+
8
+ # Check and process invalid hit
9
+ if json_results&.[]('resources')
10
+ remove_invalid_hit(json_results)
11
+ add_metadata_match(json_results)
12
+ end
13
+
14
+ json_results
15
+ end
16
+
17
+ def remove_invalid_hit(json_results)
18
+ resources = json_results['resources']
19
+ invalid_hit = resources.detect { |resource| resource["on"].include?(IiifPrint::BlacklightIiifSearch::AnnotationDecorator::INVALID_MATCH_TEXT) }
20
+ return unless invalid_hit
21
+
22
+ # Delete invalid hit from resources, remove first hit (which is from the invalid hit), decrement total within
23
+ resources.delete(invalid_hit)
24
+ json_results['hits'].shift
25
+ json_results['within']['total'] -= 1
26
+ end
27
+
28
+ def add_metadata_match(json_results)
29
+ json_results['resources'].each do |result_hit|
8
30
  next if result_hit['resource'].present?
31
+
32
+ # Add resource details if not present
9
33
  result_hit['resource'] = {
10
34
  "@type": "cnt:ContentAsText",
11
35
  "chars": "Metadata match, see sidebar for details"
12
36
  }
13
37
  end
14
- json_results
15
38
  end
16
39
  end
17
40
  end
@@ -3,5 +3,8 @@ module IiifPrint
3
3
  validates :parent_id, presence: true
4
4
  validates :child_title, presence: true
5
5
  validates :child_order, presence: true
6
+ validates :parent_model, presence: true
7
+ validates :child_model, presence: true
8
+ validates :file_id, presence: true
6
9
  end
7
10
  end