iiif_print 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +98 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
  19. data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
  20. data/app/models/concerns/iiif_print/solr/document.rb +14 -0
  21. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  22. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  23. data/app/models/iiif_print/pending_relationship.rb +3 -0
  24. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  25. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  26. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
  27. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  28. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  29. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  30. data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
  31. data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
  32. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  33. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  34. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  35. data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
  36. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  37. data/config/locales/iiif_print.en.yml +4 -0
  38. data/config/routes.rb +3 -0
  39. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  40. data/docker-compose.yml +2 -2
  41. data/iiif_print.gemspec +10 -9
  42. data/lib/generators/iiif_print/install_generator.rb +21 -1
  43. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  44. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  45. data/lib/iiif_print/base_derivative_service.rb +2 -1
  46. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
  47. data/lib/iiif_print/catalog_search_builder.rb +5 -1
  48. data/lib/iiif_print/configuration.rb +145 -8
  49. data/lib/iiif_print/data/fileset_helper.rb +1 -1
  50. data/lib/iiif_print/data/work_derivatives.rb +3 -3
  51. data/lib/iiif_print/engine.rb +7 -13
  52. data/lib/iiif_print/errors.rb +18 -0
  53. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  54. data/lib/iiif_print/image_tool.rb +12 -8
  55. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
  56. data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
  57. data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  58. data/lib/iiif_print/lineage_service.rb +29 -8
  59. data/lib/iiif_print/metadata.rb +67 -48
  60. data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
  61. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
  62. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  63. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
  64. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  65. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  66. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  67. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  68. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  69. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  70. data/lib/iiif_print/version.rb +1 -1
  71. data/lib/iiif_print.rb +167 -12
  72. data/lib/samvera/derivatives/configuration.rb +83 -0
  73. data/lib/samvera/derivatives/hyrax.rb +129 -0
  74. data/lib/samvera/derivatives.rb +238 -0
  75. data/spec/factories/newspaper_page_solr_document.rb +9 -1
  76. data/spec/fixtures/authorities/licenses.yml +4 -0
  77. data/spec/fixtures/authorities/rights_statements.yml +4 -0
  78. data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
  79. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
  80. data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
  81. data/spec/iiif_print/configuration_spec.rb +141 -15
  82. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
  83. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
  84. data/spec/iiif_print/lineage_service_spec.rb +1 -1
  85. data/spec/iiif_print/metadata_spec.rb +157 -23
  86. data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
  87. data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
  88. data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
  89. data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
  90. data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
  91. data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
  92. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
  93. data/spec/iiif_print_spec.rb +125 -5
  94. data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
  95. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
  96. data/spec/samvera/derivatives/configuration_spec.rb +41 -0
  97. data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
  98. data/spec/samvera/derivatives_spec.rb +54 -0
  99. data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
  100. data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
  101. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
  102. data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
  103. data/tasks/copy_authorities_to_test_app.rake +11 -0
  104. data/tasks/iiif_print_dev.rake +4 -4
  105. metadata +123 -35
  106. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  107. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  108. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
@@ -1,14 +1,20 @@
1
1
  module IiifPrint
2
+ # rubocop:disable Metrics/ModuleLength
2
3
  module ManifestBuilderServiceBehavior
3
4
  def initialize(*args,
4
5
  version: IiifPrint.config.default_iiif_manifest_version,
5
6
  iiif_manifest_factory: iiif_manifest_factory_for(version),
6
7
  &block)
7
- super(*args, iiif_manifest_factory: iiif_manifest_factory, &block)
8
+ # Ensure we're setting the version before we go any further.
8
9
  @version = version.to_i
10
+ @child_works = nil
11
+ super(*args, iiif_manifest_factory: iiif_manifest_factory, &block)
9
12
  end
10
13
 
14
+ attr_reader :child_works, :version
15
+
11
16
  def manifest_for(presenter:)
17
+ @child_works = get_solr_hits(member_ids_for(presenter))
12
18
  build_manifest(presenter: presenter)
13
19
  end
14
20
 
@@ -36,62 +42,113 @@ module IiifPrint
36
42
  # ManifestFactory interface?
37
43
  manifest = manifest_factory.new(presenter).to_h
38
44
  hash = JSON.parse(manifest.to_json)
39
- hash = send("sanitize_v#{@version}", hash: hash, presenter: presenter)
40
- send("sorted_canvases_v#{@version}", hash: hash, sort_field: IiifPrint.config.sort_iiif_manifest_canvases_by)
45
+ parent_and_child_solr_hits = parent_and_child_solr_hits(presenter) if @child_works.present?
46
+ hash = send("sanitize_v#{@version}", hash: hash, presenter: presenter, solr_doc_hits: parent_and_child_solr_hits)
47
+ if @child_works.present? && IiifPrint.config.sort_iiif_manifest_canvases_by
48
+ send("sort_canvases_v#{@version}",
49
+ hash: hash,
50
+ sort_field: IiifPrint.config.sort_iiif_manifest_canvases_by)
51
+ end
52
+ hash
41
53
  end
42
54
 
43
- def sanitize_v2(hash:, presenter:)
55
+ def sanitize_v2(hash:, presenter:, solr_doc_hits:)
44
56
  hash['label'] = CGI.unescapeHTML(sanitize_value(hash['label'])) if hash.key?('label')
45
57
  hash.delete('description') # removes default description since it's in the metadata fields
46
58
  hash['sequences']&.each do |sequence|
47
59
  sequence['canvases']&.each do |canvas|
48
60
  canvas['label'] = CGI.unescapeHTML(sanitize_value(canvas['label']))
49
- apply_v2_metadata_to_canvas(canvas: canvas, presenter: presenter)
61
+ apply_metadata_to_canvas(canvas: canvas, presenter: presenter, solr_doc_hits: solr_doc_hits)
50
62
  end
51
63
  end
52
64
  hash
53
65
  end
54
66
 
55
- def sanitize_v3(hash:, **)
56
- # TODO: flesh out metadata for v3
67
+ def sanitize_v3(hash:, presenter:, solr_doc_hits:)
68
+ hash['label']['none'].map! { |text| CGI.unescapeHTML(sanitize_value(text)) } if hash.key('label')
69
+ hash['items'].each do |canvas|
70
+ canvas['label']['none'].map! { |text| CGI.unescapeHTML(sanitize_value(text)) }
71
+ apply_metadata_to_canvas(canvas: canvas, presenter: presenter, solr_doc_hits: solr_doc_hits)
72
+ end
57
73
  hash
58
74
  end
59
75
 
60
- def apply_v2_metadata_to_canvas(canvas:, presenter:)
61
- solr_docs = get_solr_docs(presenter)
62
- # uses the '@id' property which is a URL that contains the FileSet id
63
- file_set_id = canvas['@id'].split('/').last
76
+ def apply_metadata_to_canvas(canvas:, presenter:, solr_doc_hits:)
77
+ return if @child_works.empty?
78
+
79
+ # uses the 'id' property for v3 manifest and `@id' for v2, which is a URL that contains the FileSet id
80
+ file_set_id = (canvas['id'] || canvas['@id']).split('/').last
64
81
  # finds the image that the FileSet is attached to and creates metadata on that canvas
65
- image = solr_docs.find { |doc| doc[:member_ids_ssim]&.include?(file_set_id) }
66
- canvas_metadata = IiifPrint.manifest_metadata_for(work: image,
67
- current_ability: presenter.ability,
68
- base_url: presenter.base_url)
69
- canvas['metadata'] = canvas_metadata
82
+ image = solr_doc_hits.find { |hit| hit[:member_ids_ssim]&.include?(file_set_id) }
83
+ return unless image
84
+ # prevents duplicating the child and parent metadata
85
+ return if image.id == presenter.id
86
+
87
+ canvas['metadata'] = IiifPrint.manifest_metadata_from(work: image, presenter: presenter)
88
+ end
89
+
90
+ LARGEST_SORT_ORDER_CHAR = '~'.freeze
91
+
92
+ def sort_canvases_v2(hash:, sort_field:)
93
+ return sort_by_label_v2(hash) if sort_field == :label
94
+
95
+ sort_field = Hyrax::Renderers::AttributeRenderer.new(sort_field, nil).label
96
+ hash['sequences']&.first&.[]('canvases')&.sort_by! do |canvas|
97
+ selection = canvas['metadata'].select { |h| h['label'] == sort_field }
98
+ fallback = [{ label: sort_field,
99
+ value: [LARGEST_SORT_ORDER_CHAR] }]
100
+ sort_field_metadata = selection.presence || fallback
101
+ sort_field_metadata.first['value'] if sort_field_metadata.present?
102
+ end
103
+ hash
70
104
  end
71
105
 
72
- def sorted_canvases_v2(hash:, sort_field:)
106
+ def sort_canvases_v3(hash:, sort_field:)
73
107
  sort_field = Hyrax::Renderers::AttributeRenderer.new(sort_field, nil).label
74
- hash["sequences"]&.first&.[]("canvases")&.sort_by! do |canvas|
75
- selection = canvas["metadata"].select { |h| h["label"] == sort_field }
76
- fallback = [{ label: sort_field, value: ['~'] }]
77
- identifier_metadata = selection.presence || fallback
78
- identifier_metadata.first["value"] if identifier_metadata.present?
108
+ hash['items']&.sort_by! do |item|
109
+ selection = item['metadata'].select { |h| h['label'][I18n.locale.to_s] == [sort_field] }
110
+ fallback = [{ label: { "#{I18n.locale}": [sort_field] },
111
+ value: { none: [LARGEST_SORT_ORDER_CHAR] } }]
112
+ sort_field_metadata = selection.presence || fallback
113
+ sort_field_metadata.first['value']['none'] if sort_field_metadata.present?
79
114
  end
80
115
  hash
81
116
  end
82
117
 
83
- def sorted_canvases_v3(hash:, **)
84
- # TODO: flesh out metadata for v3
118
+ # TODO: implement this for v3
119
+ def sort_by_label_v2(hash)
120
+ hash['sequences']&.first&.[]('canvases')&.sort_by! do |canvas|
121
+ canvas['label']
122
+ end
85
123
  hash
86
124
  end
87
125
 
88
- def get_solr_docs(presenter)
89
- parent_id = [presenter._source['id']]
90
- child_ids = presenter._source['member_ids_ssim']
91
- parent_id_and_child_ids = parent_id + child_ids
92
- query = ActiveFedora::SolrQueryBuilder.construct_query_for_ids(parent_id_and_child_ids)
93
- solr_hits = ActiveFedora::SolrService.query(query, fq: "-has_model_ssim:FileSet", rows: 100_000)
94
- solr_hits.map { |solr_hit| ::SolrDocument.new(solr_hit) }
126
+ def member_ids_for(presenter)
127
+ member_ids = presenter.try(:ordered_ids) || presenter.try(:member_ids)
128
+ member_ids.nil? ? [] : member_ids
129
+ end
130
+
131
+ def parent_and_child_solr_hits(presenter)
132
+ get_solr_hits([presenter.id]) + @child_works
133
+ end
134
+
135
+ SOLR_QUERY_PAGE_SIZE = 512
136
+ ##
137
+ # return an array of work SolrHits, gathered via paginated segmentation of the ids list
138
+ # to avoid Solr's limit on 1024 logical connections
139
+ # @param ids [Array]
140
+ # @return [Array<ActiveFedora::SolrHit>]
141
+ def get_solr_hits(ids)
142
+ results = []
143
+ ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |paged_ids|
144
+ query = "id:(#{paged_ids.join(' OR ')})"
145
+ results += ActiveFedora::SolrService.query(
146
+ query,
147
+ { fq: "-has_model_ssim:FileSet", rows: paged_ids.size, method: :post }
148
+ )
149
+ end
150
+ results
95
151
  end
96
152
  end
153
+ # rubocop:enable Metrics/ClassLength
97
154
  end
@@ -39,7 +39,7 @@ class IiifPrint::PluggableDerivativeService
39
39
  # multiple plugins, some of which may or may not be valid, so
40
40
  # validity checks happen within as well.
41
41
  def valid?
42
- !valid_plugins.size.zero?
42
+ !valid_plugins.empty?
43
43
  end
44
44
 
45
45
  # get derivative services relevant to method name and file_set context
@@ -105,16 +105,10 @@ class IiifPrint::PluggableDerivativeService
105
105
  # set would use. That "possibility" is based on the work. Later, we will check the plugin's
106
106
  # "valid?" which would now look at the specific file_set for validity.
107
107
  def plugins_for(file_set)
108
- parent = parent_for(file_set)
108
+ parent = IiifPrint.parent_for(file_set)
109
109
  return Array(default_plugins) if parent.nil?
110
110
  return Array(default_plugins) unless parent.respond_to?(:iiif_print_config)
111
111
 
112
- (file_set.parent.iiif_print_config.derivative_service_plugins + Array(default_plugins)).flatten.compact.uniq
113
- end
114
-
115
- def parent_for(file_set)
116
- # fallback to Fedora-stored relationships if work's aggregation of
117
- # file set is not indexed in Solr
118
- file_set.parent || file_set.member_of.find(&:work?)
112
+ (parent.iiif_print_config.derivative_service_plugins + Array(default_plugins)).flatten.compact.uniq
119
113
  end
120
114
  end
@@ -0,0 +1,13 @@
1
+ <%# OVERRIDE Hyrax 2.9.6 to show parent_query params if metadata is found in parent record %>
2
+
3
+ <div class="search-results-title-row">
4
+ <h3 class="search-result-title">
5
+ <% if params['q'].present? && document.any_highlighting? %>
6
+ <%= link_to document.title_or_label, [document, { parent_query: params['q'] }] %></h3>
7
+ <% elsif params['q'].present? %>
8
+ <%= link_to document.title_or_label, [document, { query: params['q'] }] %></h3>
9
+ <% else %>
10
+ <%= link_to document.title_or_label, document %></h3>
11
+ <% end %>
12
+ </h3>
13
+ </div>
@@ -1,9 +1,10 @@
1
- <% if presenter.iiif_viewer? %>
2
- <% if defined?(viewer) && viewer %>
1
+ <% if presenter.representative_id.present? && presenter.representative_presenter.present? %>
2
+ <% if defined?(viewer) && viewer && presenter.iiif_viewer? %>
3
3
  <%= iiif_viewer_display presenter %>
4
4
  <% else %>
5
5
  <%= render media_display_partial(presenter.representative_presenter), file_set: presenter.representative_presenter %>
6
6
  <% end %>
7
7
  <% else %>
8
- <%= image_tag 'default.png', class: "canonical-image", alt: 'default representative image' %>
8
+ <% alt = block_for(name: 'default_work_image_text') || 'Default work thumbnail' %>
9
+ <%= image_tag default_work_image, class: "canonical-image", alt: alt %>
9
10
  <% end %>
@@ -1,7 +1,7 @@
1
1
  <div class="viewer-wrapper">
2
2
  <iframe
3
3
  id="uv-iframe"
4
- src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %>"
4
+ src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %><%= uv_search_param %>"
5
5
  allowfullscreen="true"
6
6
  frameborder="0"
7
7
  ></iframe>
@@ -32,7 +32,8 @@
32
32
  <%= link_to 'Download', hyrax.download_path(file_set),
33
33
  title: "Download #{file_set.to_s.inspect}", target: "_blank" %>
34
34
  </li>
35
- <% IiifPrint::Data::WorkDerivatives.new(file_set.id).keys.each do |name| %>
35
+ <% work_deriv = IiifPrint::Data::WorkDerivatives.new(fileset: file_set) %>
36
+ <% work_deriv.keys.each do |name| %>
36
37
  <li role="menuitem" tabindex="-1">
37
38
  <a href="<%= "/downloads/#{file_set.id}?locale=en&file=#{name}" %>" download>
38
39
  Download <em>(as <%= name %>)</em>
@@ -0,0 +1,24 @@
1
+ <div class="form-actions">
2
+ <% if Hyrax.config.analytics? %>
3
+ <% # turbolinks needs to be turned off or the page will use the cache and the %>
4
+ <% # analytics graph will not show unless the page is refreshed. %>
5
+ <%= link_to t('.analytics'), @presenter.stats_path, id: 'stats', class: 'btn btn-default', data: { turbolinks: false } %>
6
+ <% end %>
7
+
8
+ <%# Hyrax 2.9.6 does not respond to workflow_restriction; that is something added in later versions. %>
9
+ <% if @presenter.editor? && (!respond_to?(:workflow_restriction?) || !workflow_restriction?(@presenter)) %>
10
+ <%= link_to t(".edit_this", type: @presenter.human_readable_type), edit_polymorphic_path([main_app, @presenter]),
11
+ class: 'btn btn-default' %>
12
+ <%= link_to t(".delete_this", type: @presenter.human_readable_type), [main_app, @presenter],
13
+ class: 'btn btn-danger', data: { confirm: t(".confirm_delete_this", type: @presenter.human_readable_type) },
14
+ method: :delete %>
15
+ <% end %>
16
+
17
+ <% if @presenter.editor? && @presenter.pdf? %>
18
+ <%= link_to t("iiif_print.file_set.split_this"), iiif_print.split_pdf_path(@presenter),
19
+ class: 'btn btn-default', data: { confirm: t("iiif_print.file_set.confirm_split_this") },
20
+ method: :post %>
21
+ <% end %>
22
+
23
+ <%= render 'social_media' %>
24
+ </div>
@@ -52,6 +52,10 @@ en:
52
52
  label: 'Place of publication'
53
53
  publication_title:
54
54
  label: 'Publication'
55
+ file_set:
56
+ split_this: 'Re-Split PDF'
57
+ confirm_split_this: 'Re-Split PDF'
58
+ split_submitted: 'Submitted PDF re-splitting job for FileSet ID=%{id}'
55
59
  newspapers_search:
56
60
  title: 'Search Newspapers'
57
61
  text: 'Use this form to search for full-text newspaper content.'
data/config/routes.rb ADDED
@@ -0,0 +1,3 @@
1
+ IiifPrint::Engine.routes.draw do
2
+ post "split_pdfs/:file_set_id" => "split_pdfs#create", as: :split_pdf
3
+ end
@@ -0,0 +1,7 @@
1
+ class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2]
2
+ def change
3
+ add_column :iiif_print_pending_relationships, :parent_model, :string
4
+ add_column :iiif_print_pending_relationships, :child_model, :string
5
+ add_column :iiif_print_pending_relationships, :file_id, :string
6
+ end
7
+ end
data/docker-compose.yml CHANGED
@@ -85,12 +85,12 @@ services:
85
85
  environment:
86
86
  - VIRTUAL_PORT=3000
87
87
  - VIRTUAL_HOST=.hyku.test
88
- command: tail -f /dev/null
88
+ # command: tail -f /dev/null
89
89
  ##
90
90
  ## Similar to the above, except we will bundle and then tell the container
91
91
  ## to wait. You'll then need to bash into the web container to do much of
92
92
  ## anything.
93
- # command: sh -l -c "bundle && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
93
+ command: sh -l -c "bundle install && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
94
94
  depends_on:
95
95
  db:
96
96
  condition: service_started
data/iiif_print.gemspec CHANGED
@@ -12,21 +12,18 @@ Gem::Specification.new do |spec|
12
12
  spec.email = ['sean.upton@utah.edu', 'jacob.reed@utah.edu',
13
13
  'brian.mcbride@utah.edu', 'eenglish@bpl.org', 'kirk.wang@scientist.com',
14
14
  'larita@scientist.com', 'jeremy.n.friesen@gmail.com']
15
- spec.homepage = 'https://github.com/samvera-labs/iiif_print'
15
+ spec.homepage = 'https://github.com/scientist-softserv/iiif_print/'
16
16
  spec.description = 'Gem/Engine for IIIF Print works in Hyrax-based Samvera Application.'
17
17
  spec.summary = <<-SUMMARY
18
- iiif_print is a Rails Engine gem providing model and administrative
19
- functions to Hyrax-based Samvera applications, for management of
20
- (primarily scanned) content.
18
+ IiifPrint is a gem (Rails "engine") for Hyrax-based digital repository applications to support displaying parent/child works in the same viewer (Universal Viewer) and the ability to search OCR from the parent work to the child work(s). IiifPring was originally based off of the samvera-labs Newspaper gem.
21
19
  SUMMARY
22
20
  spec.license = 'Apache-2.0'
23
21
  spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
24
22
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
25
- spec.add_dependency 'blacklight_iiif_search', '~> 1.0'
26
- spec.add_dependency 'dry-monads', '~> 1.4.0'
27
- spec.add_dependency 'hyrax', '>= 2.5', '< 4.0'
23
+ spec.add_dependency 'blacklight_iiif_search', '>= 1.0', '< 3.0'
24
+ spec.add_dependency 'derivative-rodeo', "~> 0.5"
25
+ spec.add_dependency 'hyrax', '>= 2.5', '< 6'
28
26
  spec.add_dependency 'nokogiri', '>=1.13.2'
29
- spec.add_dependency 'rails', '~> 5.0'
30
27
  spec.add_dependency 'rdf-vocab', '~> 3.0'
31
28
 
32
29
  spec.add_development_dependency 'bixby'
@@ -34,10 +31,14 @@ SUMMARY
34
31
  spec.add_development_dependency 'engine_cart', '~> 2.2'
35
32
  spec.add_development_dependency "factory_bot", '~> 4.4'
36
33
  spec.add_development_dependency 'fcrepo_wrapper', '~> 0.5', '>= 0.5.1'
34
+ # TODO: We want to remove dependency on this
37
35
  spec.add_development_dependency 'newspaper_works_fixtures', '~> 0.3', '>=0.3.1'
38
36
  spec.add_development_dependency 'rails-controller-testing', '~> 1'
39
- spec.add_development_dependency 'rspec-rails', '~> 3.1'
37
+ spec.add_development_dependency 'json-canonicalization', '0.3.1'
38
+ spec.add_development_dependency 'rspec-rails'
40
39
  spec.add_development_dependency 'rspec-activemodel-mocks'
41
40
  spec.add_development_dependency 'shoulda-matchers', '~> 3.1'
42
41
  spec.add_development_dependency 'solr_wrapper', '>= 1.1', '< 3.0'
42
+ spec.add_development_dependency 'solargraph'
43
+ spec.add_development_dependency 'yard'
43
44
  end
@@ -15,13 +15,21 @@ module IiifPrint
15
15
  say_status('info',
16
16
  'BLACKLIGHT IIIF SEARCH NOT INSTALLED; INSTALLING BLACKLIGHT IIIF SEARCH',
17
17
  :blue)
18
- generate 'blacklight_iiif_search:install'
18
+ generate 'blacklight_iiif_search:install --skip-solr'
19
19
  end
20
20
 
21
21
  def catalog_controller_configuration
22
22
  generate 'iiif_print:catalog_controller'
23
23
  end
24
24
 
25
+ def install_routes
26
+ return if IO.read('config/routes.rb').include?('mount IiifPrint::Engine')
27
+
28
+ inject_into_file 'config/routes.rb', after: /mount Hyrax::Engine\s*\n/ do
29
+ " mount IiifPrint::Engine, at: '/'\n"\
30
+ end
31
+ end
32
+
25
33
  def inject_configuration
26
34
  copy_file 'config/initializers/iiif_print.rb'
27
35
  end
@@ -30,6 +38,10 @@ module IiifPrint
30
38
  generate 'iiif_print:assets'
31
39
  end
32
40
 
41
+ def inject_helper
42
+ copy_file 'helpers/iiif_print_helper.rb' 'app/helpers/iiif_print_helper.rb'
43
+ end
44
+
33
45
  # Blacklight IIIF Search generator has some linting that does not agree with CircleCI on Hyku
34
46
  # ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/controller_generator.rb
35
47
  # the follow two methods does a clean up to appease Rubocop
@@ -48,5 +60,13 @@ module IiifPrint
48
60
  contents.insert(0, "# frozen_string_literal: true\n\n")
49
61
  File.write(file, contents)
50
62
  end
63
+
64
+ def add_allinson_flex_fields_method_to_iiif_search_builder
65
+ file_path = "app/models/iiif_search_builder.rb"
66
+ contents = File.read(file_path)
67
+ contents.gsub!('include Blacklight::Solr::SearchBuilderBehavior', "include Blacklight::Solr::SearchBuilderBehavior\n include IiifPrint::AllinsonFlexFields")
68
+ contents.gsub!('self.default_processor_chain += [:ocr_search_params]', 'self.default_processor_chain += %i[ocr_search_params include_allinson_flex_fields]')
69
+ File.write(file_path, contents)
70
+ end
51
71
  end
52
72
  end
@@ -1,3 +1,4 @@
1
+ # rubocop:disable Lint/UnusedBlockArgument
1
2
  IiifPrint.config do |config|
2
3
  # NOTE: WorkTypes and models are used synonymously here.
3
4
  # Add models to be excluded from search so the user
@@ -15,8 +16,14 @@ IiifPrint.config do |config|
15
16
  # @example
16
17
  # config.excluded_model_name_solr_field_key = 'some_solr_field_key'
17
18
 
18
- # Configure how the manifest sorts the canvases, by default it sorts by :title,
19
- # but a different model property may be desired such as :date_published
20
- # @example
21
- # config.sort_iiif_manifest_canvases_by = :date_published
19
+ if Rails.env.development?
20
+ if DerivativeRodeo.config.aws_s3_access_key_id.present? && DerivativeRodeo.config.aws_s3_secret_access_key.present?
21
+ Rails.logger.info("DerivativeRodeo S3 Credentials detected using 's3' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
22
+ IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 's3'
23
+ else
24
+ Rails.logger.info("DerivativeRodeo S3 Credentials not-detected using 'file' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
25
+ IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 'file'
26
+ end
27
+ end
22
28
  end
29
+ # rubocop:enable Lint/UnusedBlockArgument
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IiifPrintHelper
4
+ include IiifPrint::IiifPrintHelperBehavior
5
+ end
@@ -25,7 +25,8 @@ module IiifPrint
25
25
  # @see IiifPrint::PluggableDerivativeService#plugins_for
26
26
  # @return [Boolean]
27
27
  def valid?
28
- true
28
+ # @note We are taking a shortcut because currently we are only concerned about images.
29
+ file_set.class.image_mime_types.include?(file_set.mime_type)
29
30
  end
30
31
 
31
32
  def derivative_path_factory
@@ -2,6 +2,7 @@
2
2
  module IiifPrint
3
3
  module BlacklightIiifSearch
4
4
  module AnnotationDecorator
5
+ INVALID_MATCH_TEXT = "#xywh=INVALID,INVALID,INVALID,INVALID".freeze
5
6
  ##
6
7
  # Create a URL for the annotation
7
8
  # use a Hyrax-y URL syntax:
@@ -28,23 +29,33 @@ module IiifPrint
28
29
  # @return [String]
29
30
  def coordinates
30
31
  return default_coords if query.blank?
32
+
33
+ sanitized_query = sanitize_query.downcase
31
34
  coords_json = fetch_and_parse_coords
32
- return default_coords unless coords_json && coords_json['coords']
33
- query_terms = query.split(' ').map(&:downcase)
35
+ return derived_coords_json_and_properties(sanitized_query) unless coords_json && coords_json['coords']
36
+
37
+ query_terms = sanitized_query.split(' ')
38
+
34
39
  matches = coords_json['coords'].select do |k, _v|
35
40
  k.downcase =~ /(#{query_terms.join('|')})/
36
41
  end
37
42
  return default_coords if matches.blank?
43
+
38
44
  coords_array = matches.values.flatten(1)[hl_index]
39
- return default unless coords_array
45
+ return default_coords unless coords_array
46
+
40
47
  "#xywh=#{coords_array.join(',')}"
41
48
  end
42
49
 
50
+ def sanitize_query
51
+ query.match(additional_query_terms_regex)[1].strip
52
+ end
53
+
43
54
  ##
44
55
  # return the JSON word-coordinates file contents
45
56
  # @return [JSON]
46
57
  def fetch_and_parse_coords
47
- coords = IiifPrint::Data::WorkDerivatives.data(from: file_set_id, of_type: 'json')
58
+ coords = IiifPrint.config.ocr_coords_from_json_function.call(file_set_id: file_set_id, document: document)
48
59
  return nil if coords.blank?
49
60
  begin
50
61
  JSON.parse(coords)
@@ -53,6 +64,23 @@ module IiifPrint
53
64
  end
54
65
  end
55
66
 
67
+ # This is a bit hacky but it is checking if any of the properties contain the query term
68
+ # if there are no coords and there is a metadata property match
69
+ # then we return the default coords
70
+ # else we insert a invalid match text to be stripped out at a later point
71
+ # @see IiifPrint::IiifSearchResponseDecorator#annotation_list
72
+ def derived_coords_json_and_properties(sanitized_query)
73
+ property = @document.keys.detect do |key|
74
+ (key.ends_with?("_tesim") || key.ends_with?("_tsim")) && property_includes_sanitized_query?(key, sanitized_query)
75
+ end
76
+
77
+ property ? default_coords : INVALID_MATCH_TEXT
78
+ end
79
+
80
+ def property_includes_sanitized_query?(property, sanitized_query)
81
+ @document[property].join.downcase.include?(sanitized_query)
82
+ end
83
+
56
84
  ##
57
85
  # a default set of coordinates
58
86
  # @return [String]
@@ -77,7 +105,31 @@ module IiifPrint
77
105
 
78
106
  file_set_ids = document['file_set_ids_ssim']
79
107
  raise "#{self.class}: NO FILE SET ID" if file_set_ids.blank?
80
- file_set_ids.first
108
+
109
+ # Since a parent work's `file_set_ids_ssim` can contain child work ids as well as file set ids,
110
+ # this will ensure that the file set id is indeed a `FileSet`
111
+ file_set_ids.detect { |id| SolrDocument.find(id).file_set? }
112
+ end
113
+
114
+ ##
115
+ # This method is a workaround to compensate for overriding the solr_params method in
116
+ # BlacklightIiifSearch::IiifSearch. In the override, the solr_params method adds an additional filter to the query
117
+ # to include either the object_relation_field OR the parent document's id and removes the :f parameter from the
118
+ # query. This resulted in the query split here returning more than the actual query term.
119
+ #
120
+ # @see IiifPrint::IiifSearchDecorator#solr_params
121
+ # @return [Regexp] A regular expression to find the last AND and everything after it
122
+ # @example
123
+ # 'foo AND (is_page_of_ssim:\"123123\" OR id:\"123123\")' #=> 'foo'
124
+ def additional_query_terms_regex
125
+ /(.*)(?= AND (\(.+\)|\w+)$)/
126
+ end
127
+
128
+ ##
129
+ # @return [IIIF::Presentation::Resource]
130
+ def text_resource_for_annotation
131
+ IIIF::Presentation::Resource.new('@type' => 'cnt:ContentAsText',
132
+ 'chars' => sanitize_query)
81
133
  end
82
134
  end
83
135
  end
@@ -11,12 +11,16 @@ module IiifPrint
11
11
  include IiifPrint::HighlightSearchParams
12
12
  # TODO: Do we need the following as a module? It hides the behavior
13
13
  include IiifPrint::ExcludeModels
14
+ include IiifPrint::AllinsonFlexFields
14
15
 
15
16
  # NOTE: If you are using advanced_search, the :exclude_models and :highlight_search_params must
16
17
  # be added after the advanced_search methods (which are not part of this gem). In other tests,
17
18
  # we found that having the advanced search processing after the two aforementioned processors
18
19
  # resulted in improper evaluation of keyword querying.
19
- self.default_processor_chain += [:exclude_models, :highlight_search_params, :show_parents_only]
20
+ self.default_processor_chain += [:exclude_models,
21
+ :highlight_search_params,
22
+ :show_parents_only,
23
+ :include_allinson_flex_fields]
20
24
 
21
25
  # rubocop:enable Naming/PredicateName
22
26
  def show_parents_only(solr_parameters)