iiif_print 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile.lock +2 -2
- data/README.md +4 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +1 -1
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +37 -22
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/{lib → app/jobs}/iiif_print/jobs/child_works_from_pdf_job.rb +14 -9
- data/{lib → app/jobs}/iiif_print/jobs/create_relationships_job.rb +10 -20
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +1 -1
- data/app/models/concerns/iiif_print/solr/document.rb +5 -3
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +5 -2
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +4 -2
- data/app/services/iiif_print/pluggable_derivative_service.rb +5 -1
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/hyrax/file_sets/_show_actions.html.erb +1 -1
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +3 -3
- data/iiif_print.gemspec +1 -1
- data/lib/iiif_print/base_derivative_service.rb +13 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +2 -2
- data/lib/iiif_print/catalog_search_builder.rb +2 -2
- data/lib/iiif_print/configuration.rb +65 -5
- data/lib/iiif_print/data/fileset_helper.rb +2 -2
- data/lib/iiif_print/data/work_derivatives.rb +1 -1
- data/lib/iiif_print/engine.rb +46 -2
- data/lib/iiif_print/homepage_search_builder.rb +2 -2
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +19 -6
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +11 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +19 -9
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +5 -16
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +79 -44
- metadata +18 -191
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -40
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -46
- data/bin/rails +0 -13
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -20
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/authorities/licenses.yml +0 -4
- data/spec/fixtures/authorities/rights_statements.yml +0 -4
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -28
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -59
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -193
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -35
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -118
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -249
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +0 -27
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +0 -80
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +0 -92
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +0 -22
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +0 -18
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +0 -19
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -171
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +0 -27
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -70
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/samvera/derivatives/configuration_spec.rb +0 -41
- data/spec/samvera/derivatives/hyrax_spec.rb +0 -62
- data/spec/samvera/derivatives_spec.rb +0 -54
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +0 -103
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +0 -20
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -175
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
- /data/{lib → app/jobs}/iiif_print/jobs/request_split_pdf_job.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d4f22906e84ce3a40c02f62c3a6c0d0272b3f267186fd8efcafb635c56b9b789
|
4
|
+
data.tar.gz: f0cbb7d93bf521ca711148b51c0c51ffc993272589daabc8735949f3dd36908d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bd7ce954cce06910bc219df4ae19333b67d4307767fa40d740b419a8c118f843c0cd4d0d9f199eb5623f3577ddae43f96dd3e339ac82829ae0576d0d6a8b9969
|
7
|
+
data.tar.gz: 83fa85c26ab115d40e161e3f53ad091e6ceb3df3deb1fbc4f0e3752ab868b84c106f53831570b24d232a0c3024471b5154fa27b7ca731b006eea78a77159267f
|
data/.rubocop.yml
CHANGED
@@ -72,7 +72,7 @@ Metrics/MethodLength:
|
|
72
72
|
- 'lib/generators/iiif_print/catalog_controller_generator.rb'
|
73
73
|
- 'lib/iiif_print/ingest/ndnp/ndnp_mets_helper.rb'
|
74
74
|
- 'lib/iiif_print/ingest/pdf_issue_ingester.rb'
|
75
|
-
- '
|
75
|
+
- 'app/jobs/iiif_print/jobs/create_relationships_job.rb'
|
76
76
|
- 'spec/model_shared.rb'
|
77
77
|
|
78
78
|
Naming/PredicateName:
|
data/Gemfile.lock
CHANGED
@@ -206,7 +206,7 @@ GEM
|
|
206
206
|
railties (>= 4.1.0)
|
207
207
|
responders
|
208
208
|
warden (~> 1.2.3)
|
209
|
-
devise-guests (0.8.
|
209
|
+
devise-guests (0.8.2)
|
210
210
|
devise
|
211
211
|
diff-lcs (1.5.0)
|
212
212
|
disposable (0.4.7)
|
@@ -913,7 +913,7 @@ GEM
|
|
913
913
|
actionpack (>= 5.2)
|
914
914
|
activesupport (>= 5.2)
|
915
915
|
sprockets (>= 3.0.0)
|
916
|
-
sqlite3 (1.
|
916
|
+
sqlite3 (1.7.2)
|
917
917
|
mini_portile2 (~> 2.8.0)
|
918
918
|
ssrf_filter (1.0.8)
|
919
919
|
sxp (1.2.4)
|
data/README.md
CHANGED
@@ -115,6 +115,10 @@ uv = createUV('#uv', {
|
|
115
115
|
## Configuration to enable IiifPrint features
|
116
116
|
**NOTE: WorkTypes and models are used synonymously here.**
|
117
117
|
|
118
|
+
### Persistence Layer Adapter
|
119
|
+
|
120
|
+
We created IiifPrint with an assumption of ActiveFedora. However, as Hyrax now supports Valkyrie, we need an alternate approach. We introduced `IiifPrint::Configuration#persistence_layer` as a configuration option. By default it will use `ActiveFedora` methods; but you can switch adapters to use Valkyrie instead. (See `IiifPrint::PersistentLayer` for more details).
|
121
|
+
|
118
122
|
### IIIF URL configuration
|
119
123
|
|
120
124
|
If you set EXTERNAL_IIIF_URL in your environment, then IiifPrint will use that URL as the root for your IIIF URLs. It will also switch from using the file set ID to using the SHA1 of the file as the identifier. This enables using serverless_iiif or Cantaloupe (refered to as the service) by pointing the service to the same S3 bucket that FCREPO writes the uploaded files to. By setting it up that way you do not need the service to connect to FCREPO or Hyrax at all, both natively support connecting to an S3 bucket to get their data.
|
@@ -48,7 +48,7 @@ module IiifPrint
|
|
48
48
|
# we destroy the children before the file_set, because we need the parent relationship
|
49
49
|
IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
|
50
50
|
file_set: file_set,
|
51
|
-
work: file_set
|
51
|
+
work: IiifPrint.parent_for(file_set)
|
52
52
|
)
|
53
53
|
# and now back to your regularly scheduled programming
|
54
54
|
super
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IiifPrint
|
4
|
+
module ChildWorkIndexer
|
5
|
+
def to_solr
|
6
|
+
super.tap do |index_document|
|
7
|
+
index_solr_doc(index_document)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def generate_solr_document
|
12
|
+
super.tap do |solr_doc|
|
13
|
+
index_solr_doc(solr_doc)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def index_solr_doc(solr_doc)
|
20
|
+
object ||= @object || resource
|
21
|
+
solr_doc['is_child_bsi'] ||= object.try(:is_child)
|
22
|
+
solr_doc['split_from_pdf_id_ssi'] ||= object.try(:split_from_pdf_id)
|
23
|
+
solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object)
|
24
|
+
solr_doc['member_ids_ssim'] = iiif_print_lineage_service.descendent_member_ids_for(object)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -2,39 +2,54 @@
|
|
2
2
|
|
3
3
|
module IiifPrint
|
4
4
|
module FileSetIndexer
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# @param base [Class]
|
10
|
-
# @return [Class] the given base, now decorated in all of it's glory
|
11
|
-
def self.decorate(base)
|
12
|
-
base.prepend(self)
|
13
|
-
base.class_attribute :iiif_print_lineage_service, default: IiifPrint::LineageService
|
14
|
-
base
|
5
|
+
def to_solr
|
6
|
+
super.tap do |index_document|
|
7
|
+
index_solr_doc(index_document)
|
8
|
+
end
|
15
9
|
end
|
16
10
|
|
17
11
|
def generate_solr_document
|
18
12
|
super.tap do |solr_doc|
|
19
|
-
|
20
|
-
solr_doc['is_page_of_ssim'] = iiif_print_lineage_service.ancestor_ids_for(object) if object.mime_type&.match(/image/)
|
21
|
-
# index for full text search
|
22
|
-
solr_doc['all_text_timv'] = all_text
|
23
|
-
solr_doc['all_text_tsimv'] = all_text
|
24
|
-
solr_doc['digest_ssim'] = digest_from_content
|
13
|
+
index_solr_doc(solr_doc)
|
25
14
|
end
|
26
15
|
end
|
27
16
|
|
28
17
|
private
|
29
18
|
|
30
|
-
def
|
31
|
-
|
32
|
-
|
19
|
+
def index_solr_doc(solr_doc)
|
20
|
+
object ||= @object || resource
|
21
|
+
# only UV viewable images should have is_page_of, it is only used for iiif search
|
22
|
+
solr_doc['is_page_of_ssim'] = IiifPrint::LineageService.ancestor_ids_for(object) if image?(object)
|
23
|
+
# index for full text search
|
24
|
+
solr_doc['all_text_tsimv'] = solr_doc['all_text_timv'] = all_text(object)
|
25
|
+
solr_doc['digest_ssim'] = find_checksum(object)
|
26
|
+
end
|
27
|
+
|
28
|
+
def image?(object)
|
29
|
+
mime_type = object.try(:mime_type) || object.original_file.try(:mime_type)
|
30
|
+
mime_type&.match(/image/)
|
31
|
+
end
|
32
|
+
|
33
|
+
def find_checksum(object)
|
34
|
+
file = object.original_file
|
35
|
+
return unless file
|
36
|
+
|
37
|
+
digest ||= if file.is_a?(Hyrax::FileMetadata)
|
38
|
+
Array.wrap(file.checksum).first
|
39
|
+
else # file is a Hydra::PCDM::File (ActiveFedora)
|
40
|
+
file.digest.first
|
41
|
+
end
|
42
|
+
return unless digest
|
43
|
+
|
44
|
+
digest.to_s
|
33
45
|
end
|
34
46
|
|
35
|
-
def all_text
|
36
|
-
|
37
|
-
return
|
47
|
+
def all_text(object)
|
48
|
+
file = object.original_file
|
49
|
+
return unless file
|
50
|
+
|
51
|
+
text = IiifPrint.extract_text_for(file_set: object)
|
52
|
+
return text if text.blank?
|
38
53
|
|
39
54
|
text.tr("\n", ' ').squeeze(' ')
|
40
55
|
end
|
@@ -2,7 +2,8 @@ module IiifPrint
|
|
2
2
|
module Jobs
|
3
3
|
# TODO: Consider inheriting from ::Application job. That means we would have the upstreams
|
4
4
|
# based job behavior.
|
5
|
-
class ApplicationJob <
|
5
|
+
class ApplicationJob < ::ApplicationJob
|
6
|
+
queue_as ::IiifPrint.config.ingest_queue_name
|
6
7
|
end
|
7
8
|
end
|
8
9
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'iiif_print/jobs/application_job'
|
2
|
+
|
1
3
|
module IiifPrint
|
2
4
|
module Jobs
|
3
5
|
# @deprecated
|
@@ -10,7 +12,9 @@ module IiifPrint
|
|
10
12
|
# @param user: [User]
|
11
13
|
# @param admin_set_id: [<String>]
|
12
14
|
# rubocop:disable Metrics/MethodLength
|
13
|
-
def perform(
|
15
|
+
def perform(id, pdf_paths, user, admin_set_id, *)
|
16
|
+
candidate_for_parency = IiifPrint.find_by(id: id)
|
17
|
+
|
14
18
|
##
|
15
19
|
# We know that we have cases where parent_work is nil, this will definitely raise an
|
16
20
|
# exception; which is fine because we were going to do it later anyway.
|
@@ -29,7 +33,7 @@ module IiifPrint
|
|
29
33
|
# However, there seem to be cases where we still don't have the file when we get here, so to be sure, we
|
30
34
|
# re-do the same command that was previously used to prepare the file path. If the file is already here, it
|
31
35
|
# simply returns the path, but if not it will copy the file there, giving us one more chance to have what we need.
|
32
|
-
pdf_paths = [Hyrax::WorkingDirectory.find_or_retrieve(pdf_file_set.
|
36
|
+
pdf_paths = [Hyrax::WorkingDirectory.find_or_retrieve(pdf_file_set.original_file.id, pdf_file_set.id, pdf_paths.first)] if pdf_file_set
|
33
37
|
# handle each input pdf (when input is a file set, we will only have one).
|
34
38
|
pdf_paths.each do |original_pdf_path|
|
35
39
|
split_pdf(original_pdf_path, user, child_model, pdf_file_set)
|
@@ -42,7 +46,7 @@ module IiifPrint
|
|
42
46
|
# @param child_model: [<String>] child model
|
43
47
|
IiifPrint::Jobs::CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
44
48
|
user: user,
|
45
|
-
parent_id: @parent_work.id,
|
49
|
+
parent_id: @parent_work.id.to_s,
|
46
50
|
parent_model: @parent_work.class.to_s,
|
47
51
|
child_model: child_model.to_s
|
48
52
|
)
|
@@ -56,17 +60,18 @@ module IiifPrint
|
|
56
60
|
# rubocop:disable Metrics/ParameterLists
|
57
61
|
# rubocop:disable Metrics/MethodLength
|
58
62
|
def split_pdf(original_pdf_path, user, child_model, pdf_file_set)
|
59
|
-
|
63
|
+
user = User.find_by_user_key(user) unless user.is_a?(User)
|
64
|
+
image_files = @parent_work.iiif_print_config.pdf_splitter_service.call(original_pdf_path)
|
60
65
|
|
61
66
|
# give as much info as possible if we don't have image files to work with.
|
62
67
|
if image_files.blank?
|
63
|
-
raise "#{@parent_work.class} (ID=#{@parent_work.id} "
|
64
|
-
"to_param:#{@parent_work.to_param}) "
|
65
|
-
"original_pdf_path #{original_pdf_path.inspect} "
|
68
|
+
raise "#{@parent_work.class} (ID=#{@parent_work.id} " \
|
69
|
+
"to_param:#{@parent_work.to_param}) " \
|
70
|
+
"original_pdf_path #{original_pdf_path.inspect} " \
|
66
71
|
"pdf_file_set #{pdf_file_set.inspect}"
|
67
72
|
end
|
68
73
|
|
69
|
-
@split_from_pdf_id = pdf_file_set&.id
|
74
|
+
@split_from_pdf_id = pdf_file_set&.id.to_s
|
70
75
|
prepare_import_data(original_pdf_path, image_files, user)
|
71
76
|
|
72
77
|
# submit the job to create all the child works for one PDF
|
@@ -133,7 +138,7 @@ module IiifPrint
|
|
133
138
|
def create_uploaded_file(user, path)
|
134
139
|
# TODO: Could we create a remote path?
|
135
140
|
uf = Hyrax::UploadedFile.new
|
136
|
-
uf.user_id = user.id
|
141
|
+
uf.user_id = user.try(:id) || user
|
137
142
|
uf.file = CarrierWave::SanitizedFile.new(path)
|
138
143
|
uf.save!
|
139
144
|
uf.id
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
1
2
|
module IiifPrint
|
2
3
|
module Jobs
|
3
4
|
# Link newly created child works to the parent
|
@@ -34,12 +35,12 @@ module IiifPrint
|
|
34
35
|
@pending_children.each(&:destroy)
|
35
36
|
raise "CreateRelationshipsJob for parent id: #{@parent_id} " \
|
36
37
|
"added #{@number_of_successes} children, " \
|
37
|
-
"expected #{@pending_children} children."
|
38
|
+
"expected #{@pending_children.count} children."
|
38
39
|
else
|
39
40
|
# report failures & keep pending relationships
|
40
41
|
raise "CreateRelationshipsJob failed for parent id: #{@parent_id} " \
|
41
42
|
"had #{@number_of_successes} successes & #{@number_of_failures} failures, " \
|
42
|
-
"with errors: #{@errors}. Wanted #{@pending_children} children."
|
43
|
+
"with errors: #{@errors}. Wanted #{@pending_children.count} children."
|
43
44
|
end
|
44
45
|
else
|
45
46
|
# if we aren't ready yet, reschedule the job and end this one normally
|
@@ -61,7 +62,7 @@ module IiifPrint
|
|
61
62
|
# find child works (skip out if any haven't yet been created)
|
62
63
|
@pending_children.each do |child|
|
63
64
|
# find by title... if any aren't found, the child works are not yet ready
|
64
|
-
found_children =
|
65
|
+
found_children = IiifPrint.find_by_title_for(title: child.child_title, model: @child_model)
|
65
66
|
found_all_children = false if found_children.empty?
|
66
67
|
break unless found_all_children == true
|
67
68
|
@child_works += found_children
|
@@ -70,13 +71,8 @@ module IiifPrint
|
|
70
71
|
found_all_children
|
71
72
|
end
|
72
73
|
|
73
|
-
def find_children_by_title_for(title, model)
|
74
|
-
# We should only find one, but there is no guarantee of that and `:where` returns an array.
|
75
|
-
model.constantize.where(title: title)
|
76
|
-
end
|
77
|
-
|
78
74
|
def add_children_to_parent
|
79
|
-
parent_work =
|
75
|
+
parent_work = IiifPrint.find_by(id: @parent_id)
|
80
76
|
create_relationships(parent: parent_work, ordered_children: @child_works)
|
81
77
|
end
|
82
78
|
|
@@ -103,25 +99,19 @@ module IiifPrint
|
|
103
99
|
@errors << e
|
104
100
|
end
|
105
101
|
end
|
106
|
-
|
102
|
+
|
103
|
+
IiifPrint.save(object: parent) if @parent_record_members_added && @number_of_failures.zero?
|
107
104
|
end
|
108
105
|
|
109
106
|
# Bulkrax no longer reindexes file_sets, but IiifPrint needs both to add is_page_of_ssim for universal viewer.
|
110
107
|
# This is where child works need to be indexed (AFTER the parent save), as opposed to how Bulkrax does it.
|
111
|
-
|
112
|
-
child_work.update_index
|
113
|
-
child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets)
|
114
|
-
end
|
108
|
+
IiifPrint.index_works(objects: ordered_children)
|
115
109
|
end
|
116
110
|
|
117
111
|
def add_to_work(child_record:, parent_record:)
|
118
|
-
|
119
|
-
|
120
|
-
parent_record.ordered_members << child_record
|
121
|
-
@parent_record_members_added = true
|
122
|
-
# Bulkrax does child_record.save! here, but it makes no sense
|
123
|
-
# as there is nothing to save or index at this point.
|
112
|
+
@parent_record_members_added = IiifPrint.create_relationship_between(child_record: child_record, parent_record: parent_record)
|
124
113
|
end
|
125
114
|
end
|
126
115
|
end
|
116
|
+
# rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
127
117
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
##
|
2
|
+
# @see https://github.com/samvera/hyrax/wiki/Hyrax's-Event-Bus-(Hyrax::Publisher)
|
3
|
+
# @see https://www.rubydoc.info/gems/hyrax/Hyrax/Publisher
|
4
|
+
# @see https://dry-rb.org/gems/dry-events
|
5
|
+
module IiifPrint
|
6
|
+
class Listener
|
7
|
+
##
|
8
|
+
# Responsible for conditionally enqueuing the creation of child works from a PDF.
|
9
|
+
#
|
10
|
+
# @param event [#[]] a hash like construct with keys :user and :file_set
|
11
|
+
# @param service [#conditionally_enqueue]
|
12
|
+
#
|
13
|
+
# @see Hyrax::WorkUploadsHandler
|
14
|
+
def on_file_characterized(event, service: IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService)
|
15
|
+
file_set = event[:file_set]
|
16
|
+
return false unless file_set
|
17
|
+
return false unless file_set.file_set?
|
18
|
+
return false unless file_set.original_file.pdf?
|
19
|
+
|
20
|
+
work = IiifPrint.parent_for(file_set)
|
21
|
+
# A short-circuit to avoid fetching the underlying file.
|
22
|
+
return false unless work
|
23
|
+
|
24
|
+
user = work.depositor
|
25
|
+
# TODO: Verify that this is the correct thing to be sending off for conditional enquing. That
|
26
|
+
# will require a more involved integration test.
|
27
|
+
file = file_set.original_file
|
28
|
+
service.conditionally_enqueue(file_set: file_set, work: work, file: file, user: user)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -39,16 +39,18 @@ module IiifPrint::Solr::Document
|
|
39
39
|
|
40
40
|
def method_missing(method_name, *args, &block)
|
41
41
|
super unless iiif_print_solr_field_names.include? method_name.to_s
|
42
|
-
self[
|
42
|
+
self[IiifPrint.solr_name(method_name.to_s)]
|
43
43
|
end
|
44
44
|
|
45
45
|
def respond_to_missing?(method_name, include_private = false)
|
46
46
|
iiif_print_solr_field_names.include?(method_name.to_s) || super
|
47
47
|
end
|
48
48
|
|
49
|
-
#
|
49
|
+
# @see https://github.com/samvera/hyrax/commit/7108409c619cd2ba4ae8c836b9f3b429a7e9837b
|
50
50
|
def file_set_ids
|
51
|
-
|
51
|
+
# Yes, this looks a little odd. But the truth is the prior key (e.g. `file_set_ids_ssim`) was
|
52
|
+
# an alias of `member_ids_ssim`.
|
53
|
+
self['member_ids_ssim']
|
52
54
|
end
|
53
55
|
|
54
56
|
def any_highlighting?
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IiifPrint
|
4
|
+
module FileSetPresenterDecorator
|
5
|
+
# uses Hyku's TenantConfig to determine whether to allow PDF splitting button
|
6
|
+
def show_split_button?
|
7
|
+
return parent.try(:split_pdfs?) if parent.respond_to?(:split_pdfs?)
|
8
|
+
true
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -14,7 +14,7 @@ module IiifPrint
|
|
14
14
|
presenter_class.for(solr_doc)
|
15
15
|
elsif Hyrax.config.curation_concerns.include?(solr_doc.hydra_model)
|
16
16
|
# look up file set ids and loop through those
|
17
|
-
file_set_docs = load_file_set_docs(solr_doc.try(:
|
17
|
+
file_set_docs = load_file_set_docs(solr_doc.try(:member_ids) || solr_doc.try(:[], 'member_ids_ssim'))
|
18
18
|
file_set_docs.map { |doc| presenter_class.for(doc) } if file_set_docs.length
|
19
19
|
end
|
20
20
|
end.flatten.compact
|
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
module IiifPrint
|
4
4
|
module WorkShowPresenterDecorator
|
5
|
-
delegate :
|
5
|
+
delegate :member_ids, to: :solr_document
|
6
|
+
alias file_set_ids member_ids
|
6
7
|
|
7
8
|
# OVERRIDE Hyrax 2.9.6 to remove check for representative_presenter.image?
|
8
9
|
# @return [Boolean] render a IIIF viewer
|
@@ -20,8 +21,10 @@ module IiifPrint
|
|
20
21
|
# overriding Hyrax to include file sets for both work and child works (file set ids include both)
|
21
22
|
# process each id, short-circuiting the loop once one true value is found. This speeds up the test
|
22
23
|
# by not loading more member_presenters than needed.
|
24
|
+
#
|
25
|
+
# @todo Review if this is necessary for Hyrax 5.
|
23
26
|
def members_include_viewable_image?
|
24
|
-
all_member_ids =
|
27
|
+
all_member_ids = solr_document.try(:member_ids) || solr_document.try(:[], 'member_ids_ssim')
|
25
28
|
Array.wrap(all_member_ids).each do |id|
|
26
29
|
return true if file_type_and_permissions_valid?(member_presenters_for([id]).first)
|
27
30
|
end
|
@@ -142,9 +142,11 @@ module IiifPrint
|
|
142
142
|
results = []
|
143
143
|
ids.each_slice(SOLR_QUERY_PAGE_SIZE) do |paged_ids|
|
144
144
|
query = "id:(#{paged_ids.join(' OR ')})"
|
145
|
-
results +=
|
145
|
+
results += IiifPrint.solr_query(
|
146
146
|
query,
|
147
|
-
|
147
|
+
fq: "-has_model_ssim:FileSet",
|
148
|
+
rows: paged_ids.size,
|
149
|
+
method: :post
|
148
150
|
)
|
149
151
|
end
|
150
152
|
results
|
@@ -27,7 +27,11 @@ class IiifPrint::PluggableDerivativeService
|
|
27
27
|
class_attribute :derivative_path_factory, default: Hyrax::DerivativePath
|
28
28
|
|
29
29
|
def initialize(file_set, plugins: plugins_for(file_set))
|
30
|
-
@file_set = file_set
|
30
|
+
@file_set = if file_set.is_a?(Hyrax::FileMetadata)
|
31
|
+
Hyrax.query_service.find_by(id: file_set.file_set_id)
|
32
|
+
else
|
33
|
+
file_set
|
34
|
+
end
|
31
35
|
@plugins = Array.wrap(plugins)
|
32
36
|
@valid_plugins = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
|
33
37
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Hyrax
|
4
|
+
module Transactions
|
5
|
+
##
|
6
|
+
# This decorator does the following:
|
7
|
+
#
|
8
|
+
# - Prepend the {ConditionallyDestroyChildrenFromSplit} transaction to the "file_set.destroy"
|
9
|
+
# step. The prependment corresponds to the behavior for
|
10
|
+
# {IiifPrint::Actors::FileSetActorDecorator#destroy}
|
11
|
+
#
|
12
|
+
# For more information about adjusting transactions, see
|
13
|
+
# [Transitioning workshop solution for adding transaction](https://github.com/samvera-labs/transitioning-to-valkyrie-workshop/commit/bcab2bb8f65078e88395c68f72be00e7ffad57ec)
|
14
|
+
#
|
15
|
+
# @see https://github.com/samvera/hyrax/blob/f875d61dc87229cf1f05eb2bb6d414b5ef314616/lib/hyrax/transactions/container.rb
|
16
|
+
class IiifPrintContainerDecorator
|
17
|
+
extend Dry::Container::Mixin
|
18
|
+
|
19
|
+
namespace 'file_set' do |ops|
|
20
|
+
ops.register 'iiif_print_conditionally_destroy_spawned_children' do
|
21
|
+
Steps::ConditionallyDestroyChildrenFromSplit.new
|
22
|
+
end
|
23
|
+
ops.register 'destroy' do
|
24
|
+
Hyrax::Transactions::FileSetDestroy.new(
|
25
|
+
steps: (['file_set.iiif_print_conditionally_destroy_spawned_children'] +
|
26
|
+
Hyrax::Transactions::FileSetDestroy::DEFAULT_STEPS)
|
27
|
+
)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
"Hyrax::Transactions::Container".safe_constantize&.merge(Hyrax::Transactions::IiifPrintContainerDecorator)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Hyrax
|
2
|
+
module Transactions
|
3
|
+
module Steps
|
4
|
+
##
|
5
|
+
# For a FileSet that is a PDF, we need to delete any works and file_sets that are the result of
|
6
|
+
# splitting that PDF into constituent images of each page of the PDF. This is responsible for
|
7
|
+
# that work.
|
8
|
+
class ConditionallyDestroyChildrenFromSplit
|
9
|
+
include Dry::Monads[:result]
|
10
|
+
|
11
|
+
##
|
12
|
+
# @param resource [Hyrax::FileSet]
|
13
|
+
def call(resource, user: nil)
|
14
|
+
return Failure(:resource_not_persisted) unless resource.persisted?
|
15
|
+
|
16
|
+
parent = IiifPrint.persistence_adapter.parent_for(resource)
|
17
|
+
return Success(true) unless parent
|
18
|
+
|
19
|
+
# We do not care about the results of this call; as it is conditionally looking for things
|
20
|
+
# to destroy.
|
21
|
+
IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
|
22
|
+
file_set: resource,
|
23
|
+
work: parent,
|
24
|
+
user: user
|
25
|
+
)
|
26
|
+
|
27
|
+
Success(resource)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# OVERRIDE Hyrax 5.0.0rc2 to add file_set.iiif_print_conditionally_destroy_spawned_children with user args
|
4
|
+
|
5
|
+
module Hyrax
|
6
|
+
module Transactions
|
7
|
+
module Steps
|
8
|
+
module DeleteAllFileSetsDecorator
|
9
|
+
include Dry::Monads[:result]
|
10
|
+
|
11
|
+
##
|
12
|
+
# @param [Valkyrie::Resource] resource
|
13
|
+
# @param [::User] the user resposible for the delete action
|
14
|
+
#
|
15
|
+
# @return [Dry::Monads::Result]
|
16
|
+
def call(resource, user: nil)
|
17
|
+
return Failure(:resource_not_persisted) unless resource.persisted?
|
18
|
+
|
19
|
+
@query_service.custom_queries.find_child_file_sets(resource: resource).each do |file_set|
|
20
|
+
return Failure[:failed_to_delete_file_set, file_set] unless
|
21
|
+
Hyrax::Transactions::Container['file_set.destroy']
|
22
|
+
.with_step_args('file_set.remove_from_work' => { user: user },
|
23
|
+
'file_set.delete' => { user: user },
|
24
|
+
'file_set.iiif_print_conditionally_destroy_spawned_children' => { user: user })
|
25
|
+
.call(file_set).success?
|
26
|
+
rescue ::Ldp::Gone
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
Success(resource)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -14,7 +14,7 @@
|
|
14
14
|
method: :delete %>
|
15
15
|
<% end %>
|
16
16
|
|
17
|
-
<% if @presenter.editor? && @presenter.pdf? %>
|
17
|
+
<% if @presenter.show_split_button? && @presenter.editor? && @presenter.pdf? %>
|
18
18
|
<%= link_to t("iiif_print.file_set.split_this"), iiif_print.split_pdf_path(@presenter),
|
19
19
|
class: 'btn btn-default', data: { confirm: t("iiif_print.file_set.confirm_split_this") },
|
20
20
|
method: :post %>
|
@@ -0,0 +1 @@
|
|
1
|
+
"Hyrax::SimpleSchemaLoader".safe_constantize&.prepend(IiifPrint::SimpleSchemaLoaderDecorator)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
attributes:
|
2
|
+
is_child:
|
3
|
+
type: bool
|
4
|
+
multiple: false
|
5
|
+
index_keys:
|
6
|
+
- "is_child_bsi"
|
7
|
+
form:
|
8
|
+
required: false
|
9
|
+
primary: false
|
10
|
+
multiple: false
|
11
|
+
predicate: "http://id.loc.gov/vocabulary/identifiers/isChild"
|
12
|
+
split_from_pdf_id:
|
13
|
+
type: string
|
14
|
+
multiple: false
|
15
|
+
index_keys:
|
16
|
+
- "split_from_pdf_id_ssi"
|
17
|
+
form:
|
18
|
+
required: false
|
19
|
+
primary: false
|
20
|
+
multiple: false
|
21
|
+
predicate: "http://id.loc.gov/vocabulary/identifiers/splitFromPdfId"
|
@@ -1,12 +1,14 @@
|
|
1
1
|
class CreateIiifPrintDerivativeAttachments < ActiveRecord::Migration[5.0]
|
2
2
|
def change
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
unless table_exists?(:iiif_print_derivative_attachments)
|
4
|
+
create_table :iiif_print_derivative_attachments do |t|
|
5
|
+
t.string :fileset_id
|
6
|
+
t.string :path
|
7
|
+
t.string :destination_name
|
7
8
|
|
8
|
-
|
9
|
+
t.timestamps
|
10
|
+
end
|
11
|
+
add_index :iiif_print_derivative_attachments, :fileset_id
|
9
12
|
end
|
10
|
-
add_index :iiif_print_derivative_attachments, :fileset_id
|
11
13
|
end
|
12
14
|
end
|