iiif_print 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +98 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
- data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
- data/app/models/concerns/iiif_print/solr/document.rb +14 -0
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +10 -9
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +2 -1
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
- data/lib/iiif_print/catalog_search_builder.rb +5 -1
- data/lib/iiif_print/configuration.rb +145 -8
- data/lib/iiif_print/data/fileset_helper.rb +1 -1
- data/lib/iiif_print/data/work_derivatives.rb +3 -3
- data/lib/iiif_print/engine.rb +7 -13
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
- data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
- data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/lib/iiif_print/lineage_service.rb +29 -8
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +167 -12
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/spec/factories/newspaper_page_solr_document.rb +9 -1
- data/spec/fixtures/authorities/licenses.yml +4 -0
- data/spec/fixtures/authorities/rights_statements.yml +4 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
- data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
- data/spec/iiif_print/configuration_spec.rb +141 -15
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
- data/spec/iiif_print/lineage_service_spec.rb +1 -1
- data/spec/iiif_print/metadata_spec.rb +157 -23
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
- data/spec/iiif_print_spec.rb +125 -5
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
- data/spec/samvera/derivatives/configuration_spec.rb +41 -0
- data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
- data/spec/samvera/derivatives_spec.rb +54 -0
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +123 -35
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
@@ -1,14 +1,20 @@
|
|
1
1
|
module IiifPrint
|
2
|
+
# rubocop:disable Metrics/ModuleLength
|
2
3
|
module ManifestBuilderServiceBehavior
|
3
4
|
def initialize(*args,
|
4
5
|
version: IiifPrint.config.default_iiif_manifest_version,
|
5
6
|
iiif_manifest_factory: iiif_manifest_factory_for(version),
|
6
7
|
&block)
|
7
|
-
|
8
|
+
# Ensure we're setting the version before we go any further.
|
8
9
|
@version = version.to_i
|
10
|
+
@child_works = nil
|
11
|
+
super(*args, iiif_manifest_factory: iiif_manifest_factory, &block)
|
9
12
|
end
|
10
13
|
|
14
|
+
attr_reader :child_works, :version
|
15
|
+
|
11
16
|
def manifest_for(presenter:)
|
17
|
+
@child_works = get_solr_hits(member_ids_for(presenter))
|
12
18
|
build_manifest(presenter: presenter)
|
13
19
|
end
|
14
20
|
|
@@ -36,62 +42,113 @@ module IiifPrint
|
|
36
42
|
# ManifestFactory interface?
|
37
43
|
manifest = manifest_factory.new(presenter).to_h
|
38
44
|
hash = JSON.parse(manifest.to_json)
|
39
|
-
|
40
|
-
send("
|
45
|
+
parent_and_child_solr_hits = parent_and_child_solr_hits(presenter) if @child_works.present?
|
46
|
+
hash = send("sanitize_v#{@version}", hash: hash, presenter: presenter, solr_doc_hits: parent_and_child_solr_hits)
|
47
|
+
if @child_works.present? && IiifPrint.config.sort_iiif_manifest_canvases_by
|
48
|
+
send("sort_canvases_v#{@version}",
|
49
|
+
hash: hash,
|
50
|
+
sort_field: IiifPrint.config.sort_iiif_manifest_canvases_by)
|
51
|
+
end
|
52
|
+
hash
|
41
53
|
end
|
42
54
|
|
43
|
-
def sanitize_v2(hash:, presenter:)
|
55
|
+
def sanitize_v2(hash:, presenter:, solr_doc_hits:)
|
44
56
|
hash['label'] = CGI.unescapeHTML(sanitize_value(hash['label'])) if hash.key?('label')
|
45
57
|
hash.delete('description') # removes default description since it's in the metadata fields
|
46
58
|
hash['sequences']&.each do |sequence|
|
47
59
|
sequence['canvases']&.each do |canvas|
|
48
60
|
canvas['label'] = CGI.unescapeHTML(sanitize_value(canvas['label']))
|
49
|
-
|
61
|
+
apply_metadata_to_canvas(canvas: canvas, presenter: presenter, solr_doc_hits: solr_doc_hits)
|
50
62
|
end
|
51
63
|
end
|
52
64
|
hash
|
53
65
|
end
|
54
66
|
|
55
|
-
def sanitize_v3(hash:,
|
56
|
-
|
67
|
+
def sanitize_v3(hash:, presenter:, solr_doc_hits:)
|
68
|
+
hash['label']['none'].map! { |text| CGI.unescapeHTML(sanitize_value(text)) } if hash.key('label')
|
69
|
+
hash['items'].each do |canvas|
|
70
|
+
canvas['label']['none'].map! { |text| CGI.unescapeHTML(sanitize_value(text)) }
|
71
|
+
apply_metadata_to_canvas(canvas: canvas, presenter: presenter, solr_doc_hits: solr_doc_hits)
|
72
|
+
end
|
57
73
|
hash
|
58
74
|
end
|
59
75
|
|
60
|
-
def
|
61
|
-
|
62
|
-
|
63
|
-
|
76
|
+
def apply_metadata_to_canvas(canvas:, presenter:, solr_doc_hits:)
|
77
|
+
return if @child_works.empty?
|
78
|
+
|
79
|
+
# uses the 'id' property for v3 manifest and `@id' for v2, which is a URL that contains the FileSet id
|
80
|
+
file_set_id = (canvas['id'] || canvas['@id']).split('/').last
|
64
81
|
# finds the image that the FileSet is attached to and creates metadata on that canvas
|
65
|
-
image =
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
82
|
+
image = solr_doc_hits.find { |hit| hit[:member_ids_ssim]&.include?(file_set_id) }
|
83
|
+
return unless image
|
84
|
+
# prevents duplicating the child and parent metadata
|
85
|
+
return if image.id == presenter.id
|
86
|
+
|
87
|
+
canvas['metadata'] = IiifPrint.manifest_metadata_from(work: image, presenter: presenter)
|
88
|
+
end
|
89
|
+
|
90
|
+
LARGEST_SORT_ORDER_CHAR = '~'.freeze
|
91
|
+
|
92
|
+
def sort_canvases_v2(hash:, sort_field:)
|
93
|
+
return sort_by_label_v2(hash) if sort_field == :label
|
94
|
+
|
95
|
+
sort_field = Hyrax::Renderers::AttributeRenderer.new(sort_field, nil).label
|
96
|
+
hash['sequences']&.first&.[]('canvases')&.sort_by! do |canvas|
|
97
|
+
selection = canvas['metadata'].select { |h| h['label'] == sort_field }
|
98
|
+
fallback = [{ label: sort_field,
|
99
|
+
value: [LARGEST_SORT_ORDER_CHAR] }]
|
100
|
+
sort_field_metadata = selection.presence || fallback
|
101
|
+
sort_field_metadata.first['value'] if sort_field_metadata.present?
|
102
|
+
end
|
103
|
+
hash
|
70
104
|
end
|
71
105
|
|
72
|
-
def
|
106
|
+
def sort_canvases_v3(hash:, sort_field:)
|
73
107
|
sort_field = Hyrax::Renderers::AttributeRenderer.new(sort_field, nil).label
|
74
|
-
hash[
|
75
|
-
selection =
|
76
|
-
fallback = [{ label:
|
77
|
-
|
78
|
-
|
108
|
+
hash['items']&.sort_by! do |item|
|
109
|
+
selection = item['metadata'].select { |h| h['label'][I18n.locale.to_s] == [sort_field] }
|
110
|
+
fallback = [{ label: { "#{I18n.locale}": [sort_field] },
|
111
|
+
value: { none: [LARGEST_SORT_ORDER_CHAR] } }]
|
112
|
+
sort_field_metadata = selection.presence || fallback
|
113
|
+
sort_field_metadata.first['value']['none'] if sort_field_metadata.present?
|
79
114
|
end
|
80
115
|
hash
|
81
116
|
end
|
82
117
|
|
83
|
-
|
84
|
-
|
118
|
+
# TODO: implement this for v3
|
119
|
+
def sort_by_label_v2(hash)
|
120
|
+
hash['sequences']&.first&.[]('canvases')&.sort_by! do |canvas|
|
121
|
+
canvas['label']
|
122
|
+
end
|
85
123
|
hash
|
86
124
|
end
|
87
125
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
126
|
+
def member_ids_for(presenter)
|
127
|
+
member_ids = presenter.try(:ordered_ids) || presenter.try(:member_ids)
|
128
|
+
member_ids.nil? ? [] : member_ids
|
129
|
+
end
|
130
|
+
|
131
|
+
def parent_and_child_solr_hits(presenter)
|
132
|
+
get_solr_hits([presenter.id]) + @child_works
|
133
|
+
end
|
134
|
+
|
135
|
+
SOLR_QUERY_PAGE_SIZE = 512
|
136
|
+
##
|
137
|
+
# return an array of work SolrHits, gathered via paginated segmentation of the ids list
|
138
|
+
# to avoid Solr's limit on 1024 logical connections
|
139
|
+
# @param ids [Array]
|
140
|
+
# @return [Array<ActiveFedora::SolrHit>]
|
141
|
+
def get_solr_hits(ids)
|
142
|
+
results = []
|
143
|
+
ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |paged_ids|
|
144
|
+
query = "id:(#{paged_ids.join(' OR ')})"
|
145
|
+
results += ActiveFedora::SolrService.query(
|
146
|
+
query,
|
147
|
+
{ fq: "-has_model_ssim:FileSet", rows: paged_ids.size, method: :post }
|
148
|
+
)
|
149
|
+
end
|
150
|
+
results
|
95
151
|
end
|
96
152
|
end
|
153
|
+
# rubocop:enable Metrics/ClassLength
|
97
154
|
end
|
@@ -39,7 +39,7 @@ class IiifPrint::PluggableDerivativeService
|
|
39
39
|
# multiple plugins, some of which may or may not be valid, so
|
40
40
|
# validity checks happen within as well.
|
41
41
|
def valid?
|
42
|
-
!valid_plugins.
|
42
|
+
!valid_plugins.empty?
|
43
43
|
end
|
44
44
|
|
45
45
|
# get derivative services relevant to method name and file_set context
|
@@ -105,16 +105,10 @@ class IiifPrint::PluggableDerivativeService
|
|
105
105
|
# set would use. That "possibility" is based on the work. Later, we will check the plugin's
|
106
106
|
# "valid?" which would now look at the specific file_set for validity.
|
107
107
|
def plugins_for(file_set)
|
108
|
-
parent = parent_for(file_set)
|
108
|
+
parent = IiifPrint.parent_for(file_set)
|
109
109
|
return Array(default_plugins) if parent.nil?
|
110
110
|
return Array(default_plugins) unless parent.respond_to?(:iiif_print_config)
|
111
111
|
|
112
|
-
(
|
113
|
-
end
|
114
|
-
|
115
|
-
def parent_for(file_set)
|
116
|
-
# fallback to Fedora-stored relationships if work's aggregation of
|
117
|
-
# file set is not indexed in Solr
|
118
|
-
file_set.parent || file_set.member_of.find(&:work?)
|
112
|
+
(parent.iiif_print_config.derivative_service_plugins + Array(default_plugins)).flatten.compact.uniq
|
119
113
|
end
|
120
114
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<%# OVERRIDE Hyrax 2.9.6 to show parent_query params if metadata is found in parent record %>
|
2
|
+
|
3
|
+
<div class="search-results-title-row">
|
4
|
+
<h3 class="search-result-title">
|
5
|
+
<% if params['q'].present? && document.any_highlighting? %>
|
6
|
+
<%= link_to document.title_or_label, [document, { parent_query: params['q'] }] %></h3>
|
7
|
+
<% elsif params['q'].present? %>
|
8
|
+
<%= link_to document.title_or_label, [document, { query: params['q'] }] %></h3>
|
9
|
+
<% else %>
|
10
|
+
<%= link_to document.title_or_label, document %></h3>
|
11
|
+
<% end %>
|
12
|
+
</h3>
|
13
|
+
</div>
|
@@ -1,9 +1,10 @@
|
|
1
|
-
<% if presenter.
|
2
|
-
<% if defined?(viewer) && viewer %>
|
1
|
+
<% if presenter.representative_id.present? && presenter.representative_presenter.present? %>
|
2
|
+
<% if defined?(viewer) && viewer && presenter.iiif_viewer? %>
|
3
3
|
<%= iiif_viewer_display presenter %>
|
4
4
|
<% else %>
|
5
5
|
<%= render media_display_partial(presenter.representative_presenter), file_set: presenter.representative_presenter %>
|
6
6
|
<% end %>
|
7
7
|
<% else %>
|
8
|
-
|
8
|
+
<% alt = block_for(name: 'default_work_image_text') || 'Default work thumbnail' %>
|
9
|
+
<%= image_tag default_work_image, class: "canonical-image", alt: alt %>
|
9
10
|
<% end %>
|
@@ -1,7 +1,7 @@
|
|
1
1
|
<div class="viewer-wrapper">
|
2
2
|
<iframe
|
3
3
|
id="uv-iframe"
|
4
|
-
src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %>"
|
4
|
+
src="<%= universal_viewer_base_url %>#?manifest=<%= main_app.polymorphic_url [main_app, :manifest, presenter], { locale: nil } %>&config=<%= universal_viewer_config_url %><%= uv_search_param %>"
|
5
5
|
allowfullscreen="true"
|
6
6
|
frameborder="0"
|
7
7
|
></iframe>
|
@@ -32,7 +32,8 @@
|
|
32
32
|
<%= link_to 'Download', hyrax.download_path(file_set),
|
33
33
|
title: "Download #{file_set.to_s.inspect}", target: "_blank" %>
|
34
34
|
</li>
|
35
|
-
<% IiifPrint::Data::WorkDerivatives.new(file_set
|
35
|
+
<% work_deriv = IiifPrint::Data::WorkDerivatives.new(fileset: file_set) %>
|
36
|
+
<% work_deriv.keys.each do |name| %>
|
36
37
|
<li role="menuitem" tabindex="-1">
|
37
38
|
<a href="<%= "/downloads/#{file_set.id}?locale=en&file=#{name}" %>" download>
|
38
39
|
Download <em>(as <%= name %>)</em>
|
@@ -0,0 +1,24 @@
|
|
1
|
+
<div class="form-actions">
|
2
|
+
<% if Hyrax.config.analytics? %>
|
3
|
+
<% # turbolinks needs to be turned off or the page will use the cache and the %>
|
4
|
+
<% # analytics graph will not show unless the page is refreshed. %>
|
5
|
+
<%= link_to t('.analytics'), @presenter.stats_path, id: 'stats', class: 'btn btn-default', data: { turbolinks: false } %>
|
6
|
+
<% end %>
|
7
|
+
|
8
|
+
<%# Hyrax 2.9.6 does not respond to workflow_restriction; that is something added in later versions. %>
|
9
|
+
<% if @presenter.editor? && (!respond_to?(:workflow_restriction?) || !workflow_restriction?(@presenter)) %>
|
10
|
+
<%= link_to t(".edit_this", type: @presenter.human_readable_type), edit_polymorphic_path([main_app, @presenter]),
|
11
|
+
class: 'btn btn-default' %>
|
12
|
+
<%= link_to t(".delete_this", type: @presenter.human_readable_type), [main_app, @presenter],
|
13
|
+
class: 'btn btn-danger', data: { confirm: t(".confirm_delete_this", type: @presenter.human_readable_type) },
|
14
|
+
method: :delete %>
|
15
|
+
<% end %>
|
16
|
+
|
17
|
+
<% if @presenter.editor? && @presenter.pdf? %>
|
18
|
+
<%= link_to t("iiif_print.file_set.split_this"), iiif_print.split_pdf_path(@presenter),
|
19
|
+
class: 'btn btn-default', data: { confirm: t("iiif_print.file_set.confirm_split_this") },
|
20
|
+
method: :post %>
|
21
|
+
<% end %>
|
22
|
+
|
23
|
+
<%= render 'social_media' %>
|
24
|
+
</div>
|
@@ -52,6 +52,10 @@ en:
|
|
52
52
|
label: 'Place of publication'
|
53
53
|
publication_title:
|
54
54
|
label: 'Publication'
|
55
|
+
file_set:
|
56
|
+
split_this: 'Re-Split PDF'
|
57
|
+
confirm_split_this: 'Re-Split PDF'
|
58
|
+
split_submitted: 'Submitted PDF re-splitting job for FileSet ID=%{id}'
|
55
59
|
newspapers_search:
|
56
60
|
title: 'Search Newspapers'
|
57
61
|
text: 'Use this form to search for full-text newspaper content.'
|
data/config/routes.rb
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
class AddModelDetailsToIiifPrintPendingRelationships < ActiveRecord::Migration[5.2]
|
2
|
+
def change
|
3
|
+
add_column :iiif_print_pending_relationships, :parent_model, :string
|
4
|
+
add_column :iiif_print_pending_relationships, :child_model, :string
|
5
|
+
add_column :iiif_print_pending_relationships, :file_id, :string
|
6
|
+
end
|
7
|
+
end
|
data/docker-compose.yml
CHANGED
@@ -85,12 +85,12 @@ services:
|
|
85
85
|
environment:
|
86
86
|
- VIRTUAL_PORT=3000
|
87
87
|
- VIRTUAL_HOST=.hyku.test
|
88
|
-
command: tail -f /dev/null
|
88
|
+
# command: tail -f /dev/null
|
89
89
|
##
|
90
90
|
## Similar to the above, except we will bundle and then tell the container
|
91
91
|
## to wait. You'll then need to bash into the web container to do much of
|
92
92
|
## anything.
|
93
|
-
|
93
|
+
command: sh -l -c "bundle install && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
|
94
94
|
depends_on:
|
95
95
|
db:
|
96
96
|
condition: service_started
|
data/iiif_print.gemspec
CHANGED
@@ -12,21 +12,18 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.email = ['sean.upton@utah.edu', 'jacob.reed@utah.edu',
|
13
13
|
'brian.mcbride@utah.edu', 'eenglish@bpl.org', 'kirk.wang@scientist.com',
|
14
14
|
'larita@scientist.com', 'jeremy.n.friesen@gmail.com']
|
15
|
-
spec.homepage = 'https://github.com/
|
15
|
+
spec.homepage = 'https://github.com/scientist-softserv/iiif_print/'
|
16
16
|
spec.description = 'Gem/Engine for IIIF Print works in Hyrax-based Samvera Application.'
|
17
17
|
spec.summary = <<-SUMMARY
|
18
|
-
|
19
|
-
functions to Hyrax-based Samvera applications, for management of
|
20
|
-
(primarily scanned) content.
|
18
|
+
IiifPrint is a gem (Rails "engine") for Hyrax-based digital repository applications to support displaying parent/child works in the same viewer (Universal Viewer) and the ability to search OCR from the parent work to the child work(s). IiifPring was originally based off of the samvera-labs Newspaper gem.
|
21
19
|
SUMMARY
|
22
20
|
spec.license = 'Apache-2.0'
|
23
21
|
spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
|
24
22
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
25
|
-
spec.add_dependency 'blacklight_iiif_search', '
|
26
|
-
spec.add_dependency '
|
27
|
-
spec.add_dependency 'hyrax', '>= 2.5', '<
|
23
|
+
spec.add_dependency 'blacklight_iiif_search', '>= 1.0', '< 3.0'
|
24
|
+
spec.add_dependency 'derivative-rodeo', "~> 0.5"
|
25
|
+
spec.add_dependency 'hyrax', '>= 2.5', '< 6'
|
28
26
|
spec.add_dependency 'nokogiri', '>=1.13.2'
|
29
|
-
spec.add_dependency 'rails', '~> 5.0'
|
30
27
|
spec.add_dependency 'rdf-vocab', '~> 3.0'
|
31
28
|
|
32
29
|
spec.add_development_dependency 'bixby'
|
@@ -34,10 +31,14 @@ SUMMARY
|
|
34
31
|
spec.add_development_dependency 'engine_cart', '~> 2.2'
|
35
32
|
spec.add_development_dependency "factory_bot", '~> 4.4'
|
36
33
|
spec.add_development_dependency 'fcrepo_wrapper', '~> 0.5', '>= 0.5.1'
|
34
|
+
# TODO: We want to remove dependency on this
|
37
35
|
spec.add_development_dependency 'newspaper_works_fixtures', '~> 0.3', '>=0.3.1'
|
38
36
|
spec.add_development_dependency 'rails-controller-testing', '~> 1'
|
39
|
-
spec.add_development_dependency '
|
37
|
+
spec.add_development_dependency 'json-canonicalization', '0.3.1'
|
38
|
+
spec.add_development_dependency 'rspec-rails'
|
40
39
|
spec.add_development_dependency 'rspec-activemodel-mocks'
|
41
40
|
spec.add_development_dependency 'shoulda-matchers', '~> 3.1'
|
42
41
|
spec.add_development_dependency 'solr_wrapper', '>= 1.1', '< 3.0'
|
42
|
+
spec.add_development_dependency 'solargraph'
|
43
|
+
spec.add_development_dependency 'yard'
|
43
44
|
end
|
@@ -15,13 +15,21 @@ module IiifPrint
|
|
15
15
|
say_status('info',
|
16
16
|
'BLACKLIGHT IIIF SEARCH NOT INSTALLED; INSTALLING BLACKLIGHT IIIF SEARCH',
|
17
17
|
:blue)
|
18
|
-
generate 'blacklight_iiif_search:install'
|
18
|
+
generate 'blacklight_iiif_search:install --skip-solr'
|
19
19
|
end
|
20
20
|
|
21
21
|
def catalog_controller_configuration
|
22
22
|
generate 'iiif_print:catalog_controller'
|
23
23
|
end
|
24
24
|
|
25
|
+
def install_routes
|
26
|
+
return if IO.read('config/routes.rb').include?('mount IiifPrint::Engine')
|
27
|
+
|
28
|
+
inject_into_file 'config/routes.rb', after: /mount Hyrax::Engine\s*\n/ do
|
29
|
+
" mount IiifPrint::Engine, at: '/'\n"\
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
25
33
|
def inject_configuration
|
26
34
|
copy_file 'config/initializers/iiif_print.rb'
|
27
35
|
end
|
@@ -30,6 +38,10 @@ module IiifPrint
|
|
30
38
|
generate 'iiif_print:assets'
|
31
39
|
end
|
32
40
|
|
41
|
+
def inject_helper
|
42
|
+
copy_file 'helpers/iiif_print_helper.rb' 'app/helpers/iiif_print_helper.rb'
|
43
|
+
end
|
44
|
+
|
33
45
|
# Blacklight IIIF Search generator has some linting that does not agree with CircleCI on Hyku
|
34
46
|
# ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/controller_generator.rb
|
35
47
|
# the follow two methods does a clean up to appease Rubocop
|
@@ -48,5 +60,13 @@ module IiifPrint
|
|
48
60
|
contents.insert(0, "# frozen_string_literal: true\n\n")
|
49
61
|
File.write(file, contents)
|
50
62
|
end
|
63
|
+
|
64
|
+
def add_allinson_flex_fields_method_to_iiif_search_builder
|
65
|
+
file_path = "app/models/iiif_search_builder.rb"
|
66
|
+
contents = File.read(file_path)
|
67
|
+
contents.gsub!('include Blacklight::Solr::SearchBuilderBehavior', "include Blacklight::Solr::SearchBuilderBehavior\n include IiifPrint::AllinsonFlexFields")
|
68
|
+
contents.gsub!('self.default_processor_chain += [:ocr_search_params]', 'self.default_processor_chain += %i[ocr_search_params include_allinson_flex_fields]')
|
69
|
+
File.write(file_path, contents)
|
70
|
+
end
|
51
71
|
end
|
52
72
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# rubocop:disable Lint/UnusedBlockArgument
|
1
2
|
IiifPrint.config do |config|
|
2
3
|
# NOTE: WorkTypes and models are used synonymously here.
|
3
4
|
# Add models to be excluded from search so the user
|
@@ -15,8 +16,14 @@ IiifPrint.config do |config|
|
|
15
16
|
# @example
|
16
17
|
# config.excluded_model_name_solr_field_key = 'some_solr_field_key'
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
if Rails.env.development?
|
20
|
+
if DerivativeRodeo.config.aws_s3_access_key_id.present? && DerivativeRodeo.config.aws_s3_secret_access_key.present?
|
21
|
+
Rails.logger.info("DerivativeRodeo S3 Credentials detected using 's3' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
|
22
|
+
IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 's3'
|
23
|
+
else
|
24
|
+
Rails.logger.info("DerivativeRodeo S3 Credentials not-detected using 'file' for IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name")
|
25
|
+
IiifPrint::DerivativeRodeoService.preprocessed_location_adapter_name = 'file'
|
26
|
+
end
|
27
|
+
end
|
22
28
|
end
|
29
|
+
# rubocop:enable Lint/UnusedBlockArgument
|
@@ -25,7 +25,8 @@ module IiifPrint
|
|
25
25
|
# @see IiifPrint::PluggableDerivativeService#plugins_for
|
26
26
|
# @return [Boolean]
|
27
27
|
def valid?
|
28
|
-
|
28
|
+
# @note We are taking a shortcut because currently we are only concerned about images.
|
29
|
+
file_set.class.image_mime_types.include?(file_set.mime_type)
|
29
30
|
end
|
30
31
|
|
31
32
|
def derivative_path_factory
|
@@ -2,6 +2,7 @@
|
|
2
2
|
module IiifPrint
|
3
3
|
module BlacklightIiifSearch
|
4
4
|
module AnnotationDecorator
|
5
|
+
INVALID_MATCH_TEXT = "#xywh=INVALID,INVALID,INVALID,INVALID".freeze
|
5
6
|
##
|
6
7
|
# Create a URL for the annotation
|
7
8
|
# use a Hyrax-y URL syntax:
|
@@ -28,23 +29,33 @@ module IiifPrint
|
|
28
29
|
# @return [String]
|
29
30
|
def coordinates
|
30
31
|
return default_coords if query.blank?
|
32
|
+
|
33
|
+
sanitized_query = sanitize_query.downcase
|
31
34
|
coords_json = fetch_and_parse_coords
|
32
|
-
return
|
33
|
-
|
35
|
+
return derived_coords_json_and_properties(sanitized_query) unless coords_json && coords_json['coords']
|
36
|
+
|
37
|
+
query_terms = sanitized_query.split(' ')
|
38
|
+
|
34
39
|
matches = coords_json['coords'].select do |k, _v|
|
35
40
|
k.downcase =~ /(#{query_terms.join('|')})/
|
36
41
|
end
|
37
42
|
return default_coords if matches.blank?
|
43
|
+
|
38
44
|
coords_array = matches.values.flatten(1)[hl_index]
|
39
|
-
return
|
45
|
+
return default_coords unless coords_array
|
46
|
+
|
40
47
|
"#xywh=#{coords_array.join(',')}"
|
41
48
|
end
|
42
49
|
|
50
|
+
def sanitize_query
|
51
|
+
query.match(additional_query_terms_regex)[1].strip
|
52
|
+
end
|
53
|
+
|
43
54
|
##
|
44
55
|
# return the JSON word-coordinates file contents
|
45
56
|
# @return [JSON]
|
46
57
|
def fetch_and_parse_coords
|
47
|
-
coords = IiifPrint
|
58
|
+
coords = IiifPrint.config.ocr_coords_from_json_function.call(file_set_id: file_set_id, document: document)
|
48
59
|
return nil if coords.blank?
|
49
60
|
begin
|
50
61
|
JSON.parse(coords)
|
@@ -53,6 +64,23 @@ module IiifPrint
|
|
53
64
|
end
|
54
65
|
end
|
55
66
|
|
67
|
+
# This is a bit hacky but it is checking if any of the properties contain the query term
|
68
|
+
# if there are no coords and there is a metadata property match
|
69
|
+
# then we return the default coords
|
70
|
+
# else we insert a invalid match text to be stripped out at a later point
|
71
|
+
# @see IiifPrint::IiifSearchResponseDecorator#annotation_list
|
72
|
+
def derived_coords_json_and_properties(sanitized_query)
|
73
|
+
property = @document.keys.detect do |key|
|
74
|
+
(key.ends_with?("_tesim") || key.ends_with?("_tsim")) && property_includes_sanitized_query?(key, sanitized_query)
|
75
|
+
end
|
76
|
+
|
77
|
+
property ? default_coords : INVALID_MATCH_TEXT
|
78
|
+
end
|
79
|
+
|
80
|
+
def property_includes_sanitized_query?(property, sanitized_query)
|
81
|
+
@document[property].join.downcase.include?(sanitized_query)
|
82
|
+
end
|
83
|
+
|
56
84
|
##
|
57
85
|
# a default set of coordinates
|
58
86
|
# @return [String]
|
@@ -77,7 +105,31 @@ module IiifPrint
|
|
77
105
|
|
78
106
|
file_set_ids = document['file_set_ids_ssim']
|
79
107
|
raise "#{self.class}: NO FILE SET ID" if file_set_ids.blank?
|
80
|
-
|
108
|
+
|
109
|
+
# Since a parent work's `file_set_ids_ssim` can contain child work ids as well as file set ids,
|
110
|
+
# this will ensure that the file set id is indeed a `FileSet`
|
111
|
+
file_set_ids.detect { |id| SolrDocument.find(id).file_set? }
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# This method is a workaround to compensate for overriding the solr_params method in
|
116
|
+
# BlacklightIiifSearch::IiifSearch. In the override, the solr_params method adds an additional filter to the query
|
117
|
+
# to include either the object_relation_field OR the parent document's id and removes the :f parameter from the
|
118
|
+
# query. This resulted in the query split here returning more than the actual query term.
|
119
|
+
#
|
120
|
+
# @see IiifPrint::IiifSearchDecorator#solr_params
|
121
|
+
# @return [Regexp] A regular expression to find the last AND and everything after it
|
122
|
+
# @example
|
123
|
+
# 'foo AND (is_page_of_ssim:\"123123\" OR id:\"123123\")' #=> 'foo'
|
124
|
+
def additional_query_terms_regex
|
125
|
+
/(.*)(?= AND (\(.+\)|\w+)$)/
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# @return [IIIF::Presentation::Resource]
|
130
|
+
def text_resource_for_annotation
|
131
|
+
IIIF::Presentation::Resource.new('@type' => 'cnt:ContentAsText',
|
132
|
+
'chars' => sanitize_query)
|
81
133
|
end
|
82
134
|
end
|
83
135
|
end
|
@@ -11,12 +11,16 @@ module IiifPrint
|
|
11
11
|
include IiifPrint::HighlightSearchParams
|
12
12
|
# TODO: Do we need the following as a module? It hides the behavior
|
13
13
|
include IiifPrint::ExcludeModels
|
14
|
+
include IiifPrint::AllinsonFlexFields
|
14
15
|
|
15
16
|
# NOTE: If you are using advanced_search, the :exclude_models and :highlight_search_params must
|
16
17
|
# be added after the advanced_search methods (which are not part of this gem). In other tests,
|
17
18
|
# we found that having the advanced search processing after the two aforementioned processors
|
18
19
|
# resulted in improper evaluation of keyword querying.
|
19
|
-
self.default_processor_chain += [:exclude_models,
|
20
|
+
self.default_processor_chain += [:exclude_models,
|
21
|
+
:highlight_search_params,
|
22
|
+
:show_parents_only,
|
23
|
+
:include_allinson_flex_fields]
|
20
24
|
|
21
25
|
# rubocop:enable Naming/PredicateName
|
22
26
|
def show_parents_only(solr_parameters)
|