iiif_print 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +102 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
  19. data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
  20. data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
  21. data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
  22. data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  23. data/app/listeners/iiif_print/listener.rb +31 -0
  24. data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
  25. data/app/models/concerns/iiif_print/solr/document.rb +19 -3
  26. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  27. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  28. data/app/models/iiif_print/pending_relationship.rb +3 -0
  29. data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
  30. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  31. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  32. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
  33. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  34. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  35. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  36. data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
  37. data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
  38. data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
  39. data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
  40. data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
  41. data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
  42. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  43. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  44. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  45. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  46. data/config/initializers/simple_schema_loader.rb +1 -0
  47. data/config/locales/iiif_print.en.yml +4 -0
  48. data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
  49. data/config/routes.rb +3 -0
  50. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
  51. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
  52. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
  53. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  54. data/docker-compose.yml +2 -2
  55. data/iiif_print.gemspec +11 -10
  56. data/lib/generators/iiif_print/install_generator.rb +21 -1
  57. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  58. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  59. data/lib/iiif_print/base_derivative_service.rb +14 -2
  60. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
  61. data/lib/iiif_print/catalog_search_builder.rb +7 -3
  62. data/lib/iiif_print/configuration.rb +205 -8
  63. data/lib/iiif_print/data/fileset_helper.rb +3 -3
  64. data/lib/iiif_print/data/work_derivatives.rb +4 -4
  65. data/lib/iiif_print/engine.rb +53 -15
  66. data/lib/iiif_print/errors.rb +18 -0
  67. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  68. data/lib/iiif_print/image_tool.rb +12 -8
  69. data/lib/iiif_print/jp2_derivative_service.rb +4 -1
  70. data/lib/iiif_print/lineage_service.rb +47 -13
  71. data/lib/iiif_print/metadata.rb +67 -48
  72. data/lib/iiif_print/pdf_derivative_service.rb +3 -1
  73. data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
  74. data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
  75. data/lib/iiif_print/persistence_layer.rb +118 -0
  76. data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
  77. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
  78. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  79. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
  80. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  81. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  82. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  83. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  84. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  85. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  86. data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
  87. data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
  88. data/lib/iiif_print/tiff_derivative_service.rb +3 -1
  89. data/lib/iiif_print/version.rb +1 -1
  90. data/lib/iiif_print.rb +210 -20
  91. data/lib/samvera/derivatives/configuration.rb +83 -0
  92. data/lib/samvera/derivatives/hyrax.rb +129 -0
  93. data/lib/samvera/derivatives.rb +238 -0
  94. data/tasks/copy_authorities_to_test_app.rake +11 -0
  95. data/tasks/iiif_print_dev.rake +4 -4
  96. metadata +111 -196
  97. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  98. data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
  99. data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
  100. data/bin/rails +0 -13
  101. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
  102. data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
  103. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  104. data/spec/.keep.txt +0 -1
  105. data/spec/factories/ability.rb +0 -6
  106. data/spec/factories/newspaper_issue.rb +0 -7
  107. data/spec/factories/newspaper_page.rb +0 -7
  108. data/spec/factories/newspaper_page_solr_document.rb +0 -12
  109. data/spec/factories/newspaper_title.rb +0 -8
  110. data/spec/factories/uploaded_pdf_file.rb +0 -9
  111. data/spec/factories/uploaded_txt_file.rb +0 -9
  112. data/spec/factories/user.rb +0 -13
  113. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  114. data/spec/fixtures/files/4.1.07.tiff +0 -0
  115. data/spec/fixtures/files/README.md +0 -7
  116. data/spec/fixtures/files/alto-2-0.xsd +0 -714
  117. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  118. data/spec/fixtures/files/credits.md +0 -16
  119. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  120. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  121. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  122. data/spec/fixtures/files/minimal-alto.xml +0 -31
  123. data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
  124. data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
  125. data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
  126. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  127. data/spec/fixtures/files/ocr_alto.xml +0 -202
  128. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
  129. data/spec/fixtures/files/ocr_color.tiff +0 -0
  130. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  131. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  132. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  133. data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
  134. data/spec/fixtures/files/page1.tiff +0 -0
  135. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  136. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  137. data/spec/fixtures/files/thumbnail.jpg +0 -0
  138. data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
  139. data/spec/helpers/iiif_print_helper_spec.rb +0 -43
  140. data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
  141. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
  142. data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
  143. data/spec/iiif_print/configuration_spec.rb +0 -67
  144. data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
  145. data/spec/iiif_print/data/work_file_spec.rb +0 -99
  146. data/spec/iiif_print/data/work_files_spec.rb +0 -237
  147. data/spec/iiif_print/image_tool_spec.rb +0 -109
  148. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
  149. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
  150. data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
  151. data/spec/iiif_print/lineage_service_spec.rb +0 -13
  152. data/spec/iiif_print/metadata_spec.rb +0 -115
  153. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
  154. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
  155. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
  156. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
  157. data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
  158. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
  159. data/spec/iiif_print_spec.rb +0 -51
  160. data/spec/misc_shared.rb +0 -111
  161. data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
  162. data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
  163. data/spec/models/solr_document_spec.rb +0 -14
  164. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
  165. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
  166. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
  167. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
  168. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
  169. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
  170. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
  171. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
  172. data/spec/spec_helper.rb +0 -181
  173. data/spec/support/controller_level_helpers.rb +0 -28
  174. data/spec/support/iiif_print_models.rb +0 -127
  175. data/spec/test_app_templates/blacklight.yml +0 -9
  176. data/spec/test_app_templates/fedora.yml +0 -15
  177. data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
  178. data/spec/test_app_templates/redis.yml +0 -9
  179. data/spec/test_app_templates/solr/conf/schema.xml +0 -362
  180. data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
  181. data/spec/test_app_templates/solr.yml +0 -7
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ # OVERRIDE Hyrax 5.0.0rc2 to add file_set.iiif_print_conditionally_destroy_spawned_children with user args
4
+
5
+ module Hyrax
6
+ module Transactions
7
+ module Steps
8
+ module DeleteAllFileSetsDecorator
9
+ include Dry::Monads[:result]
10
+
11
+ ##
12
+ # @param [Valkyrie::Resource] resource
13
+ # @param [::User] the user resposible for the delete action
14
+ #
15
+ # @return [Dry::Monads::Result]
16
+ def call(resource, user: nil)
17
+ return Failure(:resource_not_persisted) unless resource.persisted?
18
+
19
+ @query_service.custom_queries.find_child_file_sets(resource: resource).each do |file_set|
20
+ return Failure[:failed_to_delete_file_set, file_set] unless
21
+ Hyrax::Transactions::Container['file_set.destroy']
22
+ .with_step_args('file_set.remove_from_work' => { user: user },
23
+ 'file_set.delete' => { user: user },
24
+ 'file_set.iiif_print_conditionally_destroy_spawned_children' => { user: user })
25
+ .call(file_set).success?
26
+ rescue ::Ldp::Gone
27
+ nil
28
+ end
29
+
30
+ Success(resource)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,13 @@
1
+ <%# OVERRIDE Hyrax 2.9.6 to show parent_query params if metadata is found in parent record %>
2
+
3
+ <div class="search-results-title-row">
4
+ <h3 class="search-result-title">
5
+ <% if params['q'].present? && document.any_highlighting? %>
6
+ <%= link_to document.title_or_label, [document, { parent_query: params['q'] }] %></h3>
7
+ <% elsif params['q'].present? %>
8
+ <%= link_to document.title_or_label, [document, { query: params['q'] }] %></h3>
9
+ <% else %>
10
+ <%= link_to document.title_or_label, document %></h3>
11
+ <% end %>
12
+ </h3>
13
+ </div>
@@ -1,9 +1,10 @@
1
- <% if presenter.iiif_viewer? %>
2
- <% if defined?(viewer) && viewer %>
1
+ <% if presenter.representative_id.present? && presenter.representative_presenter.present? %>
2
+ <% if defined?(viewer) && viewer && presenter.iiif_viewer? %>
3
3
  <%= iiif_viewer_display presenter %>
4
4
  <% else %>
5
5
  <%= render media_display_partial(presenter.representative_presenter), file_set: presenter.representative_presenter %>
6
6
  <% end %>
7
7
  <% else %>
8
- <%= image_tag 'default.png', class: "canonical-image", alt: 'default representative image' %>
8
+ <% alt = block_for(name: 'default_work_image_text') || 'Default work thumbnail' %>
9
+ <%= image_tag default_work_image, class: "canonical-image", alt: alt %>
9
10
  <% end %>
@@ -1,7 +1,7 @@
1
1
  <div class="viewer-wrapper">
2
2
  <iframe
3
3
  id="uv-iframe"
4
- src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %>"
4
+ src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %><%= uv_search_param %>"
5
5
  allowfullscreen="true"
6
6
  frameborder="0"
7
7
  ></iframe>
@@ -0,0 +1,24 @@
1
+ <div class="form-actions">
2
+ <% if Hyrax.config.analytics? %>
3
+ <% # turbolinks needs to be turned off or the page will use the cache and the %>
4
+ <% # analytics graph will not show unless the page is refreshed. %>
5
+ <%= link_to t('.analytics'), @presenter.stats_path, id: 'stats', class: 'btn btn-default', data: { turbolinks: false } %>
6
+ <% end %>
7
+
8
+ <%# Hyrax 2.9.6 does not respond to workflow_restriction; that is something added in later versions. %>
9
+ <% if @presenter.editor? && (!respond_to?(:workflow_restriction?) || !workflow_restriction?(@presenter)) %>
10
+ <%= link_to t(".edit_this", type: @presenter.human_readable_type), edit_polymorphic_path([main_app, @presenter]),
11
+ class: 'btn btn-default' %>
12
+ <%= link_to t(".delete_this", type: @presenter.human_readable_type), [main_app, @presenter],
13
+ class: 'btn btn-danger', data: { confirm: t(".confirm_delete_this", type: @presenter.human_readable_type) },
14
+ method: :delete %>
15
+ <% end %>
16
+
17
+ <% if @presenter.show_split_button? && @presenter.editor? && @presenter.pdf? %>
18
+ <%= link_to t("iiif_print.file_set.split_this"), iiif_print.split_pdf_path(@presenter),
19
+ class: 'btn btn-default', data: { confirm: t("iiif_print.file_set.confirm_split_this") },
20
+ method: :post %>
21
+ <% end %>
22
+
23
+ <%= render 'social_media' %>
24
+ </div>
@@ -0,0 +1 @@
1
+ "Hyrax::SimpleSchemaLoader".safe_constantize&.prepend(IiifPrint::SimpleSchemaLoaderDecorator)
@@ -52,6 +52,10 @@ en:
52
52
  label: 'Place of publication'
53
53
  publication_title:
54
54
  label: 'Publication'
55
+ file_set:
56
+ split_this: 'Re-Split PDF'
57
+ confirm_split_this: 'Re-Split PDF'
58
+ split_submitted: 'Submitted PDF re-splitting job for FileSet ID=%{id}'
55
59
  newspapers_search:
56
60
  title: 'Search Newspapers'
57
61
  text: 'Use this form to search for full-text newspaper content.'
@@ -0,0 +1,21 @@
1
+ attributes:
2
+ is_child:
3
+ type: bool
4
+ multiple: false
5
+ index_keys:
6
+ - "is_child_bsi"
7
+ form:
8
+ required: false
9
+ primary: false
10
+ multiple: false
11
+ predicate: "http://id.loc.gov/vocabulary/identifiers/isChild"
12
+ split_from_pdf_id:
13
+ type: string
14
+ multiple: false
15
+ index_keys:
16
+ - "split_from_pdf_id_ssi"
17
+ form:
18
+ required: false
19
+ primary: false
20
+ multiple: false
21
+ predicate: "http://id.loc.gov/vocabulary/identifiers/splitFromPdfId"
data/config/routes.rb ADDED
@@ -0,0 +1,3 @@
1
+ IiifPrint::Engine.routes.draw do
2
+ post "split_pdfs/:file_set_id" => "split_pdfs#create", as: :split_pdf
3
+ end
@@ -1,12 +1,14 @@
1
1
  class CreateIiifPrintDerivativeAttachments < ActiveRecord::Migration[5.0]
2
2
  def change
3
- create_table :iiif_print_derivative_attachments do |t|
4
- t.string :fileset_id
5
- t.string :path
6
- t.string :destination_name
3
+ unless table_exists?(:iiif_print_derivative_attachments)
4
+ create_table :iiif_print_derivative_attachments do |t|
5
+ t.string :fileset_id
6
+ t.string :path
7
+ t.string :destination_name
7
8
 
8
- t.timestamps
9
+ t.timestamps
10
+ end
11
+ add_index :iiif_print_derivative_attachments, :fileset_id
9
12
  end
10
- add_index :iiif_print_derivative_attachments, :fileset_id
11
13
  end
12
14
  end
@@ -1,11 +1,13 @@
1
1
  class CreateIiifPrintIngestFileRelations < ActiveRecord::Migration[5.0]
2
2
  def change
3
- create_table :iiif_print_ingest_file_relations do |t|
4
- t.string :file_path
5
- t.string :derivative_path
3
+ unless table_exists?(:iiif_print_ingest_file_relations)
4
+ create_table :iiif_print_ingest_file_relations do |t|
5
+ t.string :file_path
6
+ t.string :derivative_path
6
7
 
7
- t.timestamps
8
+ t.timestamps
9
+ end
10
+ add_index :iiif_print_ingest_file_relations, :file_path
8
11
  end
9
- add_index :iiif_print_ingest_file_relations, :file_path
10
12
  end
11
13
  end
@@ -1,11 +1,13 @@
1
1
  class CreateIiifPrintPendingRelationships < ActiveRecord::Migration[5.1]
2
2
  def change
3
- create_table :iiif_print_pending_relationships do |t|
4
- t.string :child_title, null: false
5
- t.string :parent_id, null: false
6
- t.string :child_order, null: false
7
- t.timestamps
3
+ unless table_exists?(:iiif_print_pending_relationships)
4
+ create_table :iiif_print_pending_relationships do |t|
5
+ t.string :child_title, null: false
6
+ t.string :parent_id, null: false
7
+ t.string :child_order, null: false
8
+ t.timestamps
9
+ end
10
+ add_index :iiif_print_pending_relationships, :parent_id
8
11
  end
9
- add_index :iiif_print_pending_relationships, :parent_id
10
12
  end
11
13
  end
@@ -0,0 +1,7 @@
1
+ class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :iiif_print_pending_relationships, :parent_model, :string unless column_exists?(:iiif_print_pending_relationships, :parent_model)
4
+ add_column :iiif_print_pending_relationships, :child_model, :string unless column_exists?(:iiif_print_pending_relationships, :child_model)
5
+ add_column :iiif_print_pending_relationships, :file_id, :string unless column_exists?(:iiif_print_pending_relationships, :file_id)
6
+ end
7
+ end
data/docker-compose.yml CHANGED
@@ -85,12 +85,12 @@ services:
85
85
  environment:
86
86
  - VIRTUAL_PORT=3000
87
87
  - VIRTUAL_HOST=.hyku.test
88
- command: tail -f /dev/null
88
+ # command: tail -f /dev/null
89
89
  ##
90
90
  ## Similar to the above, except we will bundle and then tell the container
91
91
  ## to wait. You'll then need to bash into the web container to do much of
92
92
  ## anything.
93
- # command: sh -l -c "bundle && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
93
+ command: sh -l -c "bundle install && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
94
94
  depends_on:
95
95
  db:
96
96
  condition: service_started
data/iiif_print.gemspec CHANGED
@@ -12,21 +12,18 @@ Gem::Specification.new do |spec|
12
12
  spec.email = ['sean.upton@utah.edu', 'jacob.reed@utah.edu',
13
13
  'brian.mcbride@utah.edu', 'eenglish@bpl.org', 'kirk.wang@scientist.com',
14
14
  'larita@scientist.com', 'jeremy.n.friesen@gmail.com']
15
- spec.homepage = 'https://github.com/samvera-labs/iiif_print'
15
+ spec.homepage = 'https://github.com/scientist-softserv/iiif_print/'
16
16
  spec.description = 'Gem/Engine for IIIF Print works in Hyrax-based Samvera Application.'
17
17
  spec.summary = <<-SUMMARY
18
- iiif_print is a Rails Engine gem providing model and administrative
19
- functions to Hyrax-based Samvera applications, for management of
20
- (primarily scanned) content.
18
+ IiifPrint is a gem (Rails "engine") for Hyrax-based digital repository applications to support displaying parent/child works in the same viewer (Universal Viewer) and the ability to search OCR from the parent work to the child work(s). IiifPring was originally based off of the samvera-labs Newspaper gem.
21
19
  SUMMARY
22
20
  spec.license = 'Apache-2.0'
23
- spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
21
+ spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR).select { |f| File.dirname(f) !~ %r{\A"?spec\/?} && f != 'bin/rails' }
24
22
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
25
- spec.add_dependency 'blacklight_iiif_search', '~> 1.0'
26
- spec.add_dependency 'dry-monads', '~> 1.4.0'
27
- spec.add_dependency 'hyrax', '>= 2.5', '< 4.0'
23
+ spec.add_dependency 'blacklight_iiif_search', '>= 1.0', '< 3.0'
24
+ spec.add_dependency 'derivative-rodeo', "~> 0.5"
25
+ spec.add_dependency 'hyrax', '>= 2.5', '< 6'
28
26
  spec.add_dependency 'nokogiri', '>=1.13.2'
29
- spec.add_dependency 'rails', '~> 5.0'
30
27
  spec.add_dependency 'rdf-vocab', '~> 3.0'
31
28
 
32
29
  spec.add_development_dependency 'bixby'
@@ -34,10 +31,14 @@ SUMMARY
34
31
  spec.add_development_dependency 'engine_cart', '~> 2.2'
35
32
  spec.add_development_dependency "factory_bot", '~> 4.4'
36
33
  spec.add_development_dependency 'fcrepo_wrapper', '~> 0.5', '>= 0.5.1'
34
+ # TODO: We want to remove dependency on this
37
35
  spec.add_development_dependency 'newspaper_works_fixtures', '~> 0.3', '>=0.3.1'
38
36
  spec.add_development_dependency 'rails-controller-testing', '~> 1'
39
- spec.add_development_dependency 'rspec-rails', '~> 3.1'
37
+ spec.add_development_dependency 'json-canonicalization', '0.3.1'
38
+ spec.add_development_dependency 'rspec-rails'
40
39
  spec.add_development_dependency 'rspec-activemodel-mocks'
41
40
  spec.add_development_dependency 'shoulda-matchers', '~> 3.1'
42
41
  spec.add_development_dependency 'solr_wrapper', '>= 1.1', '< 3.0'
42
+ spec.add_development_dependency 'solargraph'
43
+ spec.add_development_dependency 'yard'
43
44
  end
@@ -15,13 +15,21 @@ module IiifPrint
15
15
  say_status('info',
16
16
  'BLACKLIGHT IIIF SEARCH NOT INSTALLED; INSTALLING BLACKLIGHT IIIF SEARCH',
17
17
  :blue)
18
- generate 'blacklight_iiif_search:install'
18
+ generate 'blacklight_iiif_search:install --skip-solr'
19
19
  end
20
20
 
21
21
  def catalog_controller_configuration
22
22
  generate 'iiif_print:catalog_controller'
23
23
  end
24
24
 
25
+ def install_routes
26
+ return if IO.read('config/routes.rb').include?('mount IiifPrint::Engine')
27
+
28
+ inject_into_file 'config/routes.rb', after: /mount Hyrax::Engine\s*\n/ do
29
+ " mount IiifPrint::Engine, at: '/'\n"\
30
+ end
31
+ end
32
+
25
33
  def inject_configuration
26
34
  copy_file 'config/initializers/iiif_print.rb'
27
35
  end
@@ -30,6 +38,10 @@ module IiifPrint
30
38
  generate 'iiif_print:assets'
31
39
  end
32
40
 
41
+ def inject_helper
42
+ copy_file 'helpers/iiif_print_helper.rb' 'app/helpers/iiif_print_helper.rb'
43
+ end
44
+
33
45
  # Blacklight IIIF Search generator has some linting that does not agree with CircleCI on Hyku
34
46
  # ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/controller_generator.rb
35
47
  # the follow two methods does a clean up to appease Rubocop
@@ -48,5 +60,13 @@ module IiifPrint
48
60
  contents.insert(0, "# frozen_string_literal: true\n\n")
49
61
  File.write(file, contents)
50
62
  end
63
+
64
+ def add_allinson_flex_fields_method_to_iiif_search_builder
65
+ file_path = "app/models/iiif_search_builder.rb"
66
+ contents = File.read(file_path)
67
+ contents.gsub!('include Blacklight::Solr::SearchBuilderBehavior', "include Blacklight::Solr::SearchBuilderBehavior\n include IiifPrint::AllinsonFlexFields")
68
+ contents.gsub!('self.default_processor_chain += [:ocr_search_params]', 'self.default_processor_chain += %i[ocr_search_params include_allinson_flex_fields]')
69
+ File.write(file_path, contents)
70
+ end
51
71
  end
52
72
  end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Lint/UnusedBlockArgument
1
2
  IiifPrint.config do |config|
2
3
  # NOTE: WorkTypes and models are used synonymously here.
3
4
  # Add models to be excluded from search so the user
@@ -15,8 +16,14 @@ IiifPrint.config do |config|
15
16
  # @example
16
17
  # config.excluded_model_name_solr_field_key = 'some_solr_field_key'
17
18
 
18
- # Configure how the manifest sorts the canvases, by default it sorts by :title,
19
- # but a different model property may be desired such as :date_published
20
- # @example
21
- # config.sort_iiif_manifest_canvases_by = :date_published
19
+ if Rails.env.development?
20
+ if DerivativeRodeo.config.aws_s3_access_key_id.present? && DerivativeRodeo.config.aws_s3_secret_access_key.present?
21
+ Rails.logger.info("DerivativeRodeo S3 Credentials detected using 's3' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
22
+ IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 's3'
23
+ else
24
+ Rails.logger.info("DerivativeRodeo S3 Credentials not-detected using 'file' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
25
+ IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 'file'
26
+ end
27
+ end
22
28
  end
29
+ # rubocop:enable Lint/UnusedBlockArgument
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IiifPrintHelper
4
+ include IiifPrint::IiifPrintHelperBehavior
5
+ end
@@ -7,7 +7,11 @@ module IiifPrint
7
7
  class_attribute :target_extension, default: nil
8
8
 
9
9
  def initialize(file_set)
10
- @file_set = file_set
10
+ @file_set = if file_set.is_a?(Hyrax::FileMetadata)
11
+ Hyrax.query_service.find_by(id: file_set.file_set_id)
12
+ else
13
+ file_set
14
+ end
11
15
  @dest_path = nil
12
16
  @source_path = nil
13
17
  @source_meta = nil
@@ -25,7 +29,11 @@ module IiifPrint
25
29
  # @see IiifPrint::PluggableDerivativeService#plugins_for
26
30
  # @return [Boolean]
27
31
  def valid?
28
- true
32
+ # @note We are taking a shortcut because currently we are only concerned about images.
33
+ # @TODO: verify if this works for ActiveFedora and if so, remove commented code.
34
+ # If not, modify to use adapter.
35
+ # file_set.class.image_mime_types.include?(file_set.mime_type)
36
+ file_set.original_file.image?
29
37
  end
30
38
 
31
39
  def derivative_path_factory
@@ -109,5 +117,9 @@ module IiifPrint
109
117
  # intermediate -> PDF
110
118
  im_convert
111
119
  end
120
+
121
+ def mime_type_for(extension)
122
+ Marcel::MimeType.for extension: extension
123
+ end
112
124
  end
113
125
  end
@@ -2,6 +2,7 @@
2
2
  module IiifPrint
3
3
  module BlacklightIiifSearch
4
4
  module AnnotationDecorator
5
+ INVALID_MATCH_TEXT = "#xywh=INVALID,INVALID,INVALID,INVALID".freeze
5
6
  ##
6
7
  # Create a URL for the annotation
7
8
  # use a Hyrax-y URL syntax:
@@ -28,23 +29,33 @@ module IiifPrint
28
29
  # @return [String]
29
30
  def coordinates
30
31
  return default_coords if query.blank?
32
+
33
+ sanitized_query = sanitize_query.downcase
31
34
  coords_json = fetch_and_parse_coords
32
- return default_coords unless coords_json && coords_json['coords']
33
- query_terms = query.split(' ').map(&:downcase)
35
+ return derived_coords_json_and_properties(sanitized_query) unless coords_json && coords_json['coords']
36
+
37
+ query_terms = sanitized_query.split(' ')
38
+
34
39
  matches = coords_json['coords'].select do |k, _v|
35
40
  k.downcase =~ /(#{query_terms.join('|')})/
36
41
  end
37
42
  return default_coords if matches.blank?
43
+
38
44
  coords_array = matches.values.flatten(1)[hl_index]
39
- return default unless coords_array
45
+ return default_coords unless coords_array
46
+
40
47
  "#xywh=#{coords_array.join(',')}"
41
48
  end
42
49
 
50
+ def sanitize_query
51
+ query.match(additional_query_terms_regex)[1].strip
52
+ end
53
+
43
54
  ##
44
55
  # return the JSON word-coordinates file contents
45
56
  # @return [JSON]
46
57
  def fetch_and_parse_coords
47
- coords = IiifPrint::Data::WorkDerivatives.data(from: file_set_id, of_type: 'json')
58
+ coords = IiifPrint.config.ocr_coords_from_json_function.call(file_set_id: file_set_id, document: document)
48
59
  return nil if coords.blank?
49
60
  begin
50
61
  JSON.parse(coords)
@@ -53,6 +64,23 @@ module IiifPrint
53
64
  end
54
65
  end
55
66
 
67
+ # This is a bit hacky but it is checking if any of the properties contain the query term
68
+ # if there are no coords and there is a metadata property match
69
+ # then we return the default coords
70
+ # else we insert a invalid match text to be stripped out at a later point
71
+ # @see IiifPrint::IiifSearchResponseDecorator#annotation_list
72
+ def derived_coords_json_and_properties(sanitized_query)
73
+ property = @document.keys.detect do |key|
74
+ (key.ends_with?("_tesim") || key.ends_with?("_tsim")) && property_includes_sanitized_query?(key, sanitized_query)
75
+ end
76
+
77
+ property ? default_coords : INVALID_MATCH_TEXT
78
+ end
79
+
80
+ def property_includes_sanitized_query?(property, sanitized_query)
81
+ @document[property].join.downcase.include?(sanitized_query)
82
+ end
83
+
56
84
  ##
57
85
  # a default set of coordinates
58
86
  # @return [String]
@@ -75,9 +103,33 @@ module IiifPrint
75
103
  def file_set_id
76
104
  return document['id'] if document.file_set?
77
105
 
78
- file_set_ids = document['file_set_ids_ssim']
106
+ file_set_ids = document['member_ids_ssim']
79
107
  raise "#{self.class}: NO FILE SET ID" if file_set_ids.blank?
80
- file_set_ids.first
108
+
109
+ # Since a parent work's `member_ids_ssim` can contain child work ids as well as file set ids,
110
+ # this will ensure that the file set id is indeed a `FileSet`
111
+ file_set_ids.detect { |id| SolrDocument.find(id).file_set? }
112
+ end
113
+
114
+ ##
115
+ # This method is a workaround to compensate for overriding the solr_params method in
116
+ # BlacklightIiifSearch::IiifSearch. In the override, the solr_params method adds an additional filter to the query
117
+ # to include either the object_relation_field OR the parent document's id and removes the :f parameter from the
118
+ # query. This resulted in the query split here returning more than the actual query term.
119
+ #
120
+ # @see IiifPrint::IiifSearchDecorator#solr_params
121
+ # @return [Regexp] A regular expression to find the last AND and everything after it
122
+ # @example
123
+ # 'foo AND (is_page_of_ssim:\"123123\" OR id:\"123123\")' #=> 'foo'
124
+ def additional_query_terms_regex
125
+ /(.*)(?= AND (\(.+\)|\w+)$)/
126
+ end
127
+
128
+ ##
129
+ # @return [IIIF::Presentation::Resource]
130
+ def text_resource_for_annotation
131
+ IIIF::Presentation::Resource.new('@type' => 'cnt:ContentAsText',
132
+ 'chars' => sanitize_query)
81
133
  end
82
134
  end
83
135
  end
@@ -11,19 +11,23 @@ module IiifPrint
11
11
  include IiifPrint::HighlightSearchParams
12
12
  # TODO: Do we need the following as a module? It hides the behavior
13
13
  include IiifPrint::ExcludeModels
14
+ include IiifPrint::AllinsonFlexFields
14
15
 
15
16
  # NOTE: If you are using advanced_search, the :exclude_models and :highlight_search_params must
16
17
  # be added after the advanced_search methods (which are not part of this gem). In other tests,
17
18
  # we found that having the advanced search processing after the two aforementioned processors
18
19
  # resulted in improper evaluation of keyword querying.
19
- self.default_processor_chain += [:exclude_models, :highlight_search_params, :show_parents_only]
20
+ self.default_processor_chain += [:exclude_models,
21
+ :highlight_search_params,
22
+ :show_parents_only,
23
+ :include_allinson_flex_fields]
20
24
 
21
25
  # rubocop:enable Naming/PredicateName
22
26
  def show_parents_only(solr_parameters)
23
27
  query = if blacklight_params["include_child_works"] == 'true'
24
- ActiveFedora::SolrQueryBuilder.construct_query(is_child_bsi: 'true')
28
+ IiifPrint.solr_construct_query(is_child_bsi: 'true')
25
29
  else
26
- ActiveFedora::SolrQueryBuilder.construct_query(is_child_bsi: nil)
30
+ IiifPrint.solr_construct_query(is_child_bsi: nil)
27
31
  end
28
32
  solr_parameters[:fq] += [query]
29
33
  end