iiif_print 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +102 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
- data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
- data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
- data/app/models/concerns/iiif_print/solr/document.rb +19 -3
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +11 -10
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +14 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
- data/lib/iiif_print/catalog_search_builder.rb +7 -3
- data/lib/iiif_print/configuration.rb +205 -8
- data/lib/iiif_print/data/fileset_helper.rb +3 -3
- data/lib/iiif_print/data/work_derivatives.rb +4 -4
- data/lib/iiif_print/engine.rb +53 -15
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +47 -13
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +210 -20
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +111 -196
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
- data/bin/rails +0 -13
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
- data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -12
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -67
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -115
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -51
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# OVERRIDE Hyrax 5.0.0rc2 to add file_set.iiif_print_conditionally_destroy_spawned_children with user args
|
4
|
+
|
5
|
+
module Hyrax
|
6
|
+
module Transactions
|
7
|
+
module Steps
|
8
|
+
module DeleteAllFileSetsDecorator
|
9
|
+
include Dry::Monads[:result]
|
10
|
+
|
11
|
+
##
|
12
|
+
# @param [Valkyrie::Resource] resource
|
13
|
+
# @param [::User] the user resposible for the delete action
|
14
|
+
#
|
15
|
+
# @return [Dry::Monads::Result]
|
16
|
+
def call(resource, user: nil)
|
17
|
+
return Failure(:resource_not_persisted) unless resource.persisted?
|
18
|
+
|
19
|
+
@query_service.custom_queries.find_child_file_sets(resource: resource).each do |file_set|
|
20
|
+
return Failure[:failed_to_delete_file_set, file_set] unless
|
21
|
+
Hyrax::Transactions::Container['file_set.destroy']
|
22
|
+
.with_step_args('file_set.remove_from_work' => { user: user },
|
23
|
+
'file_set.delete' => { user: user },
|
24
|
+
'file_set.iiif_print_conditionally_destroy_spawned_children' => { user: user })
|
25
|
+
.call(file_set).success?
|
26
|
+
rescue ::Ldp::Gone
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
Success(resource)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<%# OVERRIDE Hyrax 2.9.6 to show parent_query params if metadata is found in parent record %>
|
2
|
+
|
3
|
+
<div class="search-results-title-row">
|
4
|
+
<h3 class="search-result-title">
|
5
|
+
<% if params['q'].present? && document.any_highlighting? %>
|
6
|
+
<%= link_to document.title_or_label, [document, { parent_query: params['q'] }] %></h3>
|
7
|
+
<% elsif params['q'].present? %>
|
8
|
+
<%= link_to document.title_or_label, [document, { query: params['q'] }] %></h3>
|
9
|
+
<% else %>
|
10
|
+
<%= link_to document.title_or_label, document %></h3>
|
11
|
+
<% end %>
|
12
|
+
</h3>
|
13
|
+
</div>
|
@@ -1,9 +1,10 @@
|
|
1
|
-
<% if presenter.
|
2
|
-
<% if defined?(viewer) && viewer %>
|
1
|
+
<% if presenter.representative_id.present? && presenter.representative_presenter.present? %>
|
2
|
+
<% if defined?(viewer) && viewer && presenter.iiif_viewer? %>
|
3
3
|
<%= iiif_viewer_display presenter %>
|
4
4
|
<% else %>
|
5
5
|
<%= render media_display_partial(presenter.representative_presenter), file_set: presenter.representative_presenter %>
|
6
6
|
<% end %>
|
7
7
|
<% else %>
|
8
|
-
|
8
|
+
<% alt = block_for(name: 'default_work_image_text') || 'Default work thumbnail' %>
|
9
|
+
<%= image_tag default_work_image, class: "canonical-image", alt: alt %>
|
9
10
|
<% end %>
|
@@ -1,7 +1,7 @@
|
|
1
1
|
<div class="viewer-wrapper">
|
2
2
|
<iframe
|
3
3
|
id="uv-iframe"
|
4
|
-
src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %>"
|
4
|
+
src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %><%= uv_search_param %>"
|
5
5
|
allowfullscreen="true"
|
6
6
|
frameborder="0"
|
7
7
|
></iframe>
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<div class="form-actions">
|
2
|
+
<% if Hyrax.config.analytics? %>
|
3
|
+
<% # turbolinks needs to be turned off or the page will use the cache and the %>
|
4
|
+
<% # analytics graph will not show unless the page is refreshed. %>
|
5
|
+
<%= link_to t('.analytics'), @presenter.stats_path, id: 'stats', class: 'btn btn-default', data: { turbolinks: false } %>
|
6
|
+
<% end %>
|
7
|
+
|
8
|
+
<%# Hyrax 2.9.6 does not respond to workflow_restriction; that is something added in later versions. %>
|
9
|
+
<% if @presenter.editor? && (!respond_to?(:workflow_restriction?) || !workflow_restriction?(@presenter)) %>
|
10
|
+
<%= link_to t(".edit_this", type: @presenter.human_readable_type), edit_polymorphic_path([main_app, @presenter]),
|
11
|
+
class: 'btn btn-default' %>
|
12
|
+
<%= link_to t(".delete_this", type: @presenter.human_readable_type), [main_app, @presenter],
|
13
|
+
class: 'btn btn-danger', data: { confirm: t(".confirm_delete_this", type: @presenter.human_readable_type) },
|
14
|
+
method: :delete %>
|
15
|
+
<% end %>
|
16
|
+
|
17
|
+
<% if @presenter.show_split_button? && @presenter.editor? && @presenter.pdf? %>
|
18
|
+
<%= link_to t("iiif_print.file_set.split_this"), iiif_print.split_pdf_path(@presenter),
|
19
|
+
class: 'btn btn-default', data: { confirm: t("iiif_print.file_set.confirm_split_this") },
|
20
|
+
method: :post %>
|
21
|
+
<% end %>
|
22
|
+
|
23
|
+
<%= render 'social_media' %>
|
24
|
+
</div>
|
@@ -0,0 +1 @@
|
|
1
|
+
"Hyrax::SimpleSchemaLoader".safe_constantize&.prepend(IiifPrint::SimpleSchemaLoaderDecorator)
|
@@ -52,6 +52,10 @@ en:
|
|
52
52
|
label: 'Place of publication'
|
53
53
|
publication_title:
|
54
54
|
label: 'Publication'
|
55
|
+
file_set:
|
56
|
+
split_this: 'Re-Split PDF'
|
57
|
+
confirm_split_this: 'Re-Split PDF'
|
58
|
+
split_submitted: 'Submitted PDF re-splitting job for FileSet ID=%{id}'
|
55
59
|
newspapers_search:
|
56
60
|
title: 'Search Newspapers'
|
57
61
|
text: 'Use this form to search for full-text newspaper content.'
|
@@ -0,0 +1,21 @@
|
|
1
|
+
attributes:
|
2
|
+
is_child:
|
3
|
+
type: bool
|
4
|
+
multiple: false
|
5
|
+
index_keys:
|
6
|
+
- "is_child_bsi"
|
7
|
+
form:
|
8
|
+
required: false
|
9
|
+
primary: false
|
10
|
+
multiple: false
|
11
|
+
predicate: "http://id.loc.gov/vocabulary/identifiers/isChild"
|
12
|
+
split_from_pdf_id:
|
13
|
+
type: string
|
14
|
+
multiple: false
|
15
|
+
index_keys:
|
16
|
+
- "split_from_pdf_id_ssi"
|
17
|
+
form:
|
18
|
+
required: false
|
19
|
+
primary: false
|
20
|
+
multiple: false
|
21
|
+
predicate: "http://id.loc.gov/vocabulary/identifiers/splitFromPdfId"
|
data/config/routes.rb
ADDED
@@ -1,12 +1,14 @@
|
|
1
1
|
class CreateIiifPrintDerivativeAttachments < ActiveRecord::Migration[5.0]
|
2
2
|
def change
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
unless table_exists?(:iiif_print_derivative_attachments)
|
4
|
+
create_table :iiif_print_derivative_attachments do |t|
|
5
|
+
t.string :fileset_id
|
6
|
+
t.string :path
|
7
|
+
t.string :destination_name
|
7
8
|
|
8
|
-
|
9
|
+
t.timestamps
|
10
|
+
end
|
11
|
+
add_index :iiif_print_derivative_attachments, :fileset_id
|
9
12
|
end
|
10
|
-
add_index :iiif_print_derivative_attachments, :fileset_id
|
11
13
|
end
|
12
14
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
class CreateIiifPrintIngestFileRelations < ActiveRecord::Migration[5.0]
|
2
2
|
def change
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
unless table_exists?(:iiif_print_ingest_file_relations)
|
4
|
+
create_table :iiif_print_ingest_file_relations do |t|
|
5
|
+
t.string :file_path
|
6
|
+
t.string :derivative_path
|
6
7
|
|
7
|
-
|
8
|
+
t.timestamps
|
9
|
+
end
|
10
|
+
add_index :iiif_print_ingest_file_relations, :file_path
|
8
11
|
end
|
9
|
-
add_index :iiif_print_ingest_file_relations, :file_path
|
10
12
|
end
|
11
13
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
class CreateIiifPrintPendingRelationships < ActiveRecord::Migration[5.1]
|
2
2
|
def change
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
unless table_exists?(:iiif_print_pending_relationships)
|
4
|
+
create_table :iiif_print_pending_relationships do |t|
|
5
|
+
t.string :child_title, null: false
|
6
|
+
t.string :parent_id, null: false
|
7
|
+
t.string :child_order, null: false
|
8
|
+
t.timestamps
|
9
|
+
end
|
10
|
+
add_index :iiif_print_pending_relationships, :parent_id
|
8
11
|
end
|
9
|
-
add_index :iiif_print_pending_relationships, :parent_id
|
10
12
|
end
|
11
13
|
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
add_column :iiif_print_pending_relationships, :parent_model, :string unless column_exists?(:iiif_print_pending_relationships, :parent_model)
|
4
|
+
add_column :iiif_print_pending_relationships, :child_model, :string unless column_exists?(:iiif_print_pending_relationships, :child_model)
|
5
|
+
add_column :iiif_print_pending_relationships, :file_id, :string unless column_exists?(:iiif_print_pending_relationships, :file_id)
|
6
|
+
end
|
7
|
+
end
|
data/docker-compose.yml
CHANGED
@@ -85,12 +85,12 @@ services:
|
|
85
85
|
environment:
|
86
86
|
- VIRTUAL_PORT=3000
|
87
87
|
- VIRTUAL_HOST=.hyku.test
|
88
|
-
command: tail -f /dev/null
|
88
|
+
# command: tail -f /dev/null
|
89
89
|
##
|
90
90
|
## Similar to the above, except we will bundle and then tell the container
|
91
91
|
## to wait. You'll then need to bash into the web container to do much of
|
92
92
|
## anything.
|
93
|
-
|
93
|
+
command: sh -l -c "bundle install && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
|
94
94
|
depends_on:
|
95
95
|
db:
|
96
96
|
condition: service_started
|
data/iiif_print.gemspec
CHANGED
@@ -12,21 +12,18 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.email = ['sean.upton@utah.edu', 'jacob.reed@utah.edu',
|
13
13
|
'brian.mcbride@utah.edu', 'eenglish@bpl.org', 'kirk.wang@scientist.com',
|
14
14
|
'larita@scientist.com', 'jeremy.n.friesen@gmail.com']
|
15
|
-
spec.homepage = 'https://github.com/
|
15
|
+
spec.homepage = 'https://github.com/scientist-softserv/iiif_print/'
|
16
16
|
spec.description = 'Gem/Engine for IIIF Print works in Hyrax-based Samvera Application.'
|
17
17
|
spec.summary = <<-SUMMARY
|
18
|
-
|
19
|
-
functions to Hyrax-based Samvera applications, for management of
|
20
|
-
(primarily scanned) content.
|
18
|
+
IiifPrint is a gem (Rails "engine") for Hyrax-based digital repository applications to support displaying parent/child works in the same viewer (Universal Viewer) and the ability to search OCR from the parent work to the child work(s). IiifPring was originally based off of the samvera-labs Newspaper gem.
|
21
19
|
SUMMARY
|
22
20
|
spec.license = 'Apache-2.0'
|
23
|
-
spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
|
21
|
+
spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR).select { |f| File.dirname(f) !~ %r{\A"?spec\/?} && f != 'bin/rails' }
|
24
22
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
25
|
-
spec.add_dependency 'blacklight_iiif_search', '
|
26
|
-
spec.add_dependency '
|
27
|
-
spec.add_dependency 'hyrax', '>= 2.5', '<
|
23
|
+
spec.add_dependency 'blacklight_iiif_search', '>= 1.0', '< 3.0'
|
24
|
+
spec.add_dependency 'derivative-rodeo', "~> 0.5"
|
25
|
+
spec.add_dependency 'hyrax', '>= 2.5', '< 6'
|
28
26
|
spec.add_dependency 'nokogiri', '>=1.13.2'
|
29
|
-
spec.add_dependency 'rails', '~> 5.0'
|
30
27
|
spec.add_dependency 'rdf-vocab', '~> 3.0'
|
31
28
|
|
32
29
|
spec.add_development_dependency 'bixby'
|
@@ -34,10 +31,14 @@ SUMMARY
|
|
34
31
|
spec.add_development_dependency 'engine_cart', '~> 2.2'
|
35
32
|
spec.add_development_dependency "factory_bot", '~> 4.4'
|
36
33
|
spec.add_development_dependency 'fcrepo_wrapper', '~> 0.5', '>= 0.5.1'
|
34
|
+
# TODO: We want to remove dependency on this
|
37
35
|
spec.add_development_dependency 'newspaper_works_fixtures', '~> 0.3', '>=0.3.1'
|
38
36
|
spec.add_development_dependency 'rails-controller-testing', '~> 1'
|
39
|
-
spec.add_development_dependency '
|
37
|
+
spec.add_development_dependency 'json-canonicalization', '0.3.1'
|
38
|
+
spec.add_development_dependency 'rspec-rails'
|
40
39
|
spec.add_development_dependency 'rspec-activemodel-mocks'
|
41
40
|
spec.add_development_dependency 'shoulda-matchers', '~> 3.1'
|
42
41
|
spec.add_development_dependency 'solr_wrapper', '>= 1.1', '< 3.0'
|
42
|
+
spec.add_development_dependency 'solargraph'
|
43
|
+
spec.add_development_dependency 'yard'
|
43
44
|
end
|
@@ -15,13 +15,21 @@ module IiifPrint
|
|
15
15
|
say_status('info',
|
16
16
|
'BLACKLIGHT IIIF SEARCH NOT INSTALLED; INSTALLING BLACKLIGHT IIIF SEARCH',
|
17
17
|
:blue)
|
18
|
-
generate 'blacklight_iiif_search:install'
|
18
|
+
generate 'blacklight_iiif_search:install --skip-solr'
|
19
19
|
end
|
20
20
|
|
21
21
|
def catalog_controller_configuration
|
22
22
|
generate 'iiif_print:catalog_controller'
|
23
23
|
end
|
24
24
|
|
25
|
+
def install_routes
|
26
|
+
return if IO.read('config/routes.rb').include?('mount IiifPrint::Engine')
|
27
|
+
|
28
|
+
inject_into_file 'config/routes.rb', after: /mount Hyrax::Engine\s*\n/ do
|
29
|
+
" mount IiifPrint::Engine, at: '/'\n"\
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
25
33
|
def inject_configuration
|
26
34
|
copy_file 'config/initializers/iiif_print.rb'
|
27
35
|
end
|
@@ -30,6 +38,10 @@ module IiifPrint
|
|
30
38
|
generate 'iiif_print:assets'
|
31
39
|
end
|
32
40
|
|
41
|
+
def inject_helper
|
42
|
+
copy_file 'helpers/iiif_print_helper.rb' 'app/helpers/iiif_print_helper.rb'
|
43
|
+
end
|
44
|
+
|
33
45
|
# Blacklight IIIF Search generator has some linting that does not agree with CircleCI on Hyku
|
34
46
|
# ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/controller_generator.rb
|
35
47
|
# the follow two methods does a clean up to appease Rubocop
|
@@ -48,5 +60,13 @@ module IiifPrint
|
|
48
60
|
contents.insert(0, "# frozen_string_literal: true\n\n")
|
49
61
|
File.write(file, contents)
|
50
62
|
end
|
63
|
+
|
64
|
+
def add_allinson_flex_fields_method_to_iiif_search_builder
|
65
|
+
file_path = "app/models/iiif_search_builder.rb"
|
66
|
+
contents = File.read(file_path)
|
67
|
+
contents.gsub!('include Blacklight::Solr::SearchBuilderBehavior', "include Blacklight::Solr::SearchBuilderBehavior\n include IiifPrint::AllinsonFlexFields")
|
68
|
+
contents.gsub!('self.default_processor_chain += [:ocr_search_params]', 'self.default_processor_chain += %i[ocr_search_params include_allinson_flex_fields]')
|
69
|
+
File.write(file_path, contents)
|
70
|
+
end
|
51
71
|
end
|
52
72
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# rubocop:disable Lint/UnusedBlockArgument
|
1
2
|
IiifPrint.config do |config|
|
2
3
|
# NOTE: WorkTypes and models are used synonymously here.
|
3
4
|
# Add models to be excluded from search so the user
|
@@ -15,8 +16,14 @@ IiifPrint.config do |config|
|
|
15
16
|
# @example
|
16
17
|
# config.excluded_model_name_solr_field_key = 'some_solr_field_key'
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
if Rails.env.development?
|
20
|
+
if DerivativeRodeo.config.aws_s3_access_key_id.present? && DerivativeRodeo.config.aws_s3_secret_access_key.present?
|
21
|
+
Rails.logger.info("DerivativeRodeo S3 Credentials detected using 's3' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
|
22
|
+
IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 's3'
|
23
|
+
else
|
24
|
+
Rails.logger.info("DerivativeRodeo S3 Credentials not-detected using 'file' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
|
25
|
+
IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 'file'
|
26
|
+
end
|
27
|
+
end
|
22
28
|
end
|
29
|
+
# rubocop:enable Lint/UnusedBlockArgument
|
@@ -7,7 +7,11 @@ module IiifPrint
|
|
7
7
|
class_attribute :target_extension, default: nil
|
8
8
|
|
9
9
|
def initialize(file_set)
|
10
|
-
@file_set = file_set
|
10
|
+
@file_set = if file_set.is_a?(Hyrax::FileMetadata)
|
11
|
+
Hyrax.query_service.find_by(id: file_set.file_set_id)
|
12
|
+
else
|
13
|
+
file_set
|
14
|
+
end
|
11
15
|
@dest_path = nil
|
12
16
|
@source_path = nil
|
13
17
|
@source_meta = nil
|
@@ -25,7 +29,11 @@ module IiifPrint
|
|
25
29
|
# @see IiifPrint::PluggableDerivativeService#plugins_for
|
26
30
|
# @return [Boolean]
|
27
31
|
def valid?
|
28
|
-
|
32
|
+
# @note We are taking a shortcut because currently we are only concerned about images.
|
33
|
+
# @TODO: verify if this works for ActiveFedora and if so, remove commented code.
|
34
|
+
# If not, modify to use adapter.
|
35
|
+
# file_set.class.image_mime_types.include?(file_set.mime_type)
|
36
|
+
file_set.original_file.image?
|
29
37
|
end
|
30
38
|
|
31
39
|
def derivative_path_factory
|
@@ -109,5 +117,9 @@ module IiifPrint
|
|
109
117
|
# intermediate -> PDF
|
110
118
|
im_convert
|
111
119
|
end
|
120
|
+
|
121
|
+
def mime_type_for(extension)
|
122
|
+
Marcel::MimeType.for extension: extension
|
123
|
+
end
|
112
124
|
end
|
113
125
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
module IiifPrint
|
3
3
|
module BlacklightIiifSearch
|
4
4
|
module AnnotationDecorator
|
5
|
+
INVALID_MATCH_TEXT = "#xywh=INVALID,INVALID,INVALID,INVALID".freeze
|
5
6
|
##
|
6
7
|
# Create a URL for the annotation
|
7
8
|
# use a Hyrax-y URL syntax:
|
@@ -28,23 +29,33 @@ module IiifPrint
|
|
28
29
|
# @return [String]
|
29
30
|
def coordinates
|
30
31
|
return default_coords if query.blank?
|
32
|
+
|
33
|
+
sanitized_query = sanitize_query.downcase
|
31
34
|
coords_json = fetch_and_parse_coords
|
32
|
-
return
|
33
|
-
|
35
|
+
return derived_coords_json_and_properties(sanitized_query) unless coords_json && coords_json['coords']
|
36
|
+
|
37
|
+
query_terms = sanitized_query.split(' ')
|
38
|
+
|
34
39
|
matches = coords_json['coords'].select do |k, _v|
|
35
40
|
k.downcase =~ /(#{query_terms.join('|')})/
|
36
41
|
end
|
37
42
|
return default_coords if matches.blank?
|
43
|
+
|
38
44
|
coords_array = matches.values.flatten(1)[hl_index]
|
39
|
-
return
|
45
|
+
return default_coords unless coords_array
|
46
|
+
|
40
47
|
"#xywh=#{coords_array.join(',')}"
|
41
48
|
end
|
42
49
|
|
50
|
+
def sanitize_query
|
51
|
+
query.match(additional_query_terms_regex)[1].strip
|
52
|
+
end
|
53
|
+
|
43
54
|
##
|
44
55
|
# return the JSON word-coordinates file contents
|
45
56
|
# @return [JSON]
|
46
57
|
def fetch_and_parse_coords
|
47
|
-
coords = IiifPrint
|
58
|
+
coords = IiifPrint.config.ocr_coords_from_json_function.call(file_set_id: file_set_id, document: document)
|
48
59
|
return nil if coords.blank?
|
49
60
|
begin
|
50
61
|
JSON.parse(coords)
|
@@ -53,6 +64,23 @@ module IiifPrint
|
|
53
64
|
end
|
54
65
|
end
|
55
66
|
|
67
|
+
# This is a bit hacky but it is checking if any of the properties contain the query term
|
68
|
+
# if there are no coords and there is a metadata property match
|
69
|
+
# then we return the default coords
|
70
|
+
# else we insert a invalid match text to be stripped out at a later point
|
71
|
+
# @see IiifPrint::IiifSearchResponseDecorator#annotation_list
|
72
|
+
def derived_coords_json_and_properties(sanitized_query)
|
73
|
+
property = @document.keys.detect do |key|
|
74
|
+
(key.ends_with?("_tesim") || key.ends_with?("_tsim")) && property_includes_sanitized_query?(key, sanitized_query)
|
75
|
+
end
|
76
|
+
|
77
|
+
property ? default_coords : INVALID_MATCH_TEXT
|
78
|
+
end
|
79
|
+
|
80
|
+
def property_includes_sanitized_query?(property, sanitized_query)
|
81
|
+
@document[property].join.downcase.include?(sanitized_query)
|
82
|
+
end
|
83
|
+
|
56
84
|
##
|
57
85
|
# a default set of coordinates
|
58
86
|
# @return [String]
|
@@ -75,9 +103,33 @@ module IiifPrint
|
|
75
103
|
def file_set_id
|
76
104
|
return document['id'] if document.file_set?
|
77
105
|
|
78
|
-
file_set_ids = document['
|
106
|
+
file_set_ids = document['member_ids_ssim']
|
79
107
|
raise "#{self.class}: NO FILE SET ID" if file_set_ids.blank?
|
80
|
-
|
108
|
+
|
109
|
+
# Since a parent work's `member_ids_ssim` can contain child work ids as well as file set ids,
|
110
|
+
# this will ensure that the file set id is indeed a `FileSet`
|
111
|
+
file_set_ids.detect { |id| SolrDocument.find(id).file_set? }
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# This method is a workaround to compensate for overriding the solr_params method in
|
116
|
+
# BlacklightIiifSearch::IiifSearch. In the override, the solr_params method adds an additional filter to the query
|
117
|
+
# to include either the object_relation_field OR the parent document's id and removes the :f parameter from the
|
118
|
+
# query. This resulted in the query split here returning more than the actual query term.
|
119
|
+
#
|
120
|
+
# @see IiifPrint::IiifSearchDecorator#solr_params
|
121
|
+
# @return [Regexp] A regular expression to find the last AND and everything after it
|
122
|
+
# @example
|
123
|
+
# 'foo AND (is_page_of_ssim:\"123123\" OR id:\"123123\")' #=> 'foo'
|
124
|
+
def additional_query_terms_regex
|
125
|
+
/(.*)(?= AND (\(.+\)|\w+)$)/
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# @return [IIIF::Presentation::Resource]
|
130
|
+
def text_resource_for_annotation
|
131
|
+
IIIF::Presentation::Resource.new('@type' => 'cnt:ContentAsText',
|
132
|
+
'chars' => sanitize_query)
|
81
133
|
end
|
82
134
|
end
|
83
135
|
end
|
@@ -11,19 +11,23 @@ module IiifPrint
|
|
11
11
|
include IiifPrint::HighlightSearchParams
|
12
12
|
# TODO: Do we need the following as a module? It hides the behavior
|
13
13
|
include IiifPrint::ExcludeModels
|
14
|
+
include IiifPrint::AllinsonFlexFields
|
14
15
|
|
15
16
|
# NOTE: If you are using advanced_search, the :exclude_models and :highlight_search_params must
|
16
17
|
# be added after the advanced_search methods (which are not part of this gem). In other tests,
|
17
18
|
# we found that having the advanced search processing after the two aforementioned processors
|
18
19
|
# resulted in improper evaluation of keyword querying.
|
19
|
-
self.default_processor_chain += [:exclude_models,
|
20
|
+
self.default_processor_chain += [:exclude_models,
|
21
|
+
:highlight_search_params,
|
22
|
+
:show_parents_only,
|
23
|
+
:include_allinson_flex_fields]
|
20
24
|
|
21
25
|
# rubocop:enable Naming/PredicateName
|
22
26
|
def show_parents_only(solr_parameters)
|
23
27
|
query = if blacklight_params["include_child_works"] == 'true'
|
24
|
-
|
28
|
+
IiifPrint.solr_construct_query(is_child_bsi: 'true')
|
25
29
|
else
|
26
|
-
|
30
|
+
IiifPrint.solr_construct_query(is_child_bsi: nil)
|
27
31
|
end
|
28
32
|
solr_parameters[:fq] += [query]
|
29
33
|
end
|