iiif_print 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +102 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
- data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
- data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
- data/app/models/concerns/iiif_print/solr/document.rb +19 -3
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +11 -10
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +14 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
- data/lib/iiif_print/catalog_search_builder.rb +7 -3
- data/lib/iiif_print/configuration.rb +205 -8
- data/lib/iiif_print/data/fileset_helper.rb +3 -3
- data/lib/iiif_print/data/work_derivatives.rb +4 -4
- data/lib/iiif_print/engine.rb +53 -15
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +47 -13
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +210 -20
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +111 -196
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
- data/bin/rails +0 -13
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
- data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -12
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -67
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -115
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -51
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
data/lib/iiif_print/metadata.rb
CHANGED
@@ -17,48 +17,43 @@ module IiifPrint
|
|
17
17
|
@base_url = base_url
|
18
18
|
end
|
19
19
|
|
20
|
-
attr_reader :work, :version, :fields
|
20
|
+
attr_reader :work, :version, :fields, :current_ability
|
21
21
|
|
22
22
|
def build_metadata
|
23
|
-
send("build_metadata_for_v#{version}")
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def build_metadata_for_v2
|
29
23
|
fields.map do |field|
|
30
|
-
|
31
|
-
if field.name == :collection && member_of_collection?
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
next if field_is_empty?(field)
|
38
|
-
{ 'label' => label,
|
39
|
-
'value' => cast_to_value(field_name: field.name, options: field.options) }
|
24
|
+
values = values_for(field_name: field)
|
25
|
+
if field.name == :collection && member_of_collection? && viewable_collections.present?
|
26
|
+
{ 'label' => metadata_map(field, :label),
|
27
|
+
'value' => metadata_map(field, :collection) }
|
28
|
+
elsif values.present? && !empty_string?(values)
|
29
|
+
{ 'label' => metadata_map(field, :label),
|
30
|
+
'value' => metadata_map(field, :value) }
|
40
31
|
end
|
41
32
|
end.compact
|
42
33
|
end
|
43
34
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
}
|
57
|
-
|
35
|
+
private
|
36
|
+
|
37
|
+
def metadata_map(field, property)
|
38
|
+
if version == 2
|
39
|
+
case property
|
40
|
+
when :label then field.label
|
41
|
+
when :value then cast_to_value(field_name: field.name, options: field.options)
|
42
|
+
when :collection then make_collection_link(viewable_collections)
|
43
|
+
end
|
44
|
+
elsif version == 3
|
45
|
+
case property
|
46
|
+
when :label then { I18n.locale.to_s => [field.label] }
|
47
|
+
when :value then { 'none' => cast_to_value(field_name: field.name, options: field.options) }
|
48
|
+
when :collection then { 'none' => make_collection_link(viewable_collections) }
|
49
|
+
end
|
50
|
+
end
|
58
51
|
end
|
59
52
|
|
60
|
-
|
61
|
-
|
53
|
+
# Bulkrax imports values as [""] if there isn't a value but still a header,
|
54
|
+
# these fields should not show in the metadata pane
|
55
|
+
def empty_string?(values)
|
56
|
+
values.uniq.size == 1 ? values.first == "" : false
|
62
57
|
end
|
63
58
|
|
64
59
|
def member_of_collection?
|
@@ -71,21 +66,41 @@ module IiifPrint
|
|
71
66
|
|
72
67
|
def cast_to_value(field_name:, options:)
|
73
68
|
if options&.[](:render_as) == :faceted
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
"f[#{search_field}][]": value, locale: I18n.locale
|
78
|
-
)
|
79
|
-
path += '&include_child_works=true' if work["is_child_bsi"] == true
|
80
|
-
"<a href='#{File.join(@base_url, path)}'>#{value}</a>"
|
81
|
-
end
|
69
|
+
faceted_values_for(field_name: field_name)
|
70
|
+
elsif qa_field?(field_name: options&.dig(:render_as) || field_name)
|
71
|
+
authority_values_for(field_name: field_name)
|
82
72
|
else
|
83
73
|
make_link(values_for(field_name: field_name))
|
84
74
|
end
|
85
75
|
end
|
86
76
|
|
77
|
+
def faceted_values_for(field_name:)
|
78
|
+
values_for(field_name: field_name).map do |value|
|
79
|
+
search_field = field_name.to_s + "_sim"
|
80
|
+
path = Rails.application.routes.url_helpers.search_catalog_path(
|
81
|
+
"f[#{search_field}][]": value, locale: I18n.locale
|
82
|
+
)
|
83
|
+
path += '&include_child_works=true' if work["is_child_bsi"] == true
|
84
|
+
"<a href='#{File.join(@base_url, path)}'>#{value}</a>"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def qa_field?(field_name:, questioning_authority_fields: IiifPrint.config.questioning_authority_fields)
|
89
|
+
questioning_authority_fields.include?(field_name.to_s)
|
90
|
+
end
|
91
|
+
|
92
|
+
def authority_values_for(field_name:)
|
93
|
+
authority = Qa::Authorities::Local.subauthority_for(field_name.to_s.pluralize)
|
94
|
+
values_for(field_name: field_name).map do |value|
|
95
|
+
id, term = authority.find(value).values_at('id', 'term')
|
96
|
+
"<a href='#{id}'>#{term}</a>"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
87
100
|
def values_for(field_name:)
|
88
|
-
|
101
|
+
field_name = field_name.try(:name) || field_name
|
102
|
+
# TODO: we are assuming tesim or dtsi (for dates), might want to account for other suffixes in the future
|
103
|
+
Array(work["#{field_name}_tesim"] || work["#{field_name}_dtsi"]&.to_date.try(:to_formatted_s, :standard))
|
89
104
|
end
|
90
105
|
|
91
106
|
def make_collection_link(collection_documents)
|
@@ -94,11 +109,16 @@ module IiifPrint
|
|
94
109
|
end
|
95
110
|
end
|
96
111
|
|
97
|
-
|
112
|
+
def viewable_collections
|
113
|
+
Hyrax::CollectionMemberService.run(SolrDocument.find(work.id), current_ability)
|
114
|
+
end
|
115
|
+
|
116
|
+
# @note This method turns link looking strings into links and assumes https if not protocol was given
|
98
117
|
def make_link(texts)
|
99
118
|
texts.map do |t|
|
100
119
|
t.to_s.gsub(MAKE_LINK_REGEX) do |url|
|
101
|
-
|
120
|
+
protocol = url.start_with?('www.') ? 'https://' : ''
|
121
|
+
"<a href='#{protocol}#{url}' target='_blank'>#{url}</a>"
|
102
122
|
end
|
103
123
|
end
|
104
124
|
end
|
@@ -106,10 +126,9 @@ module IiifPrint
|
|
106
126
|
MAKE_LINK_REGEX = %r{
|
107
127
|
\b
|
108
128
|
(
|
109
|
-
(?:
|
110
|
-
(?:
|
111
|
-
|
112
|
-
[a-z0-9.\-]+[.][a-z]{2,4}/
|
129
|
+
(?:
|
130
|
+
(?:https?://) |
|
131
|
+
(?:www\.)
|
113
132
|
)
|
114
133
|
(?:
|
115
134
|
[^\s()<>]+ | \(([^\s()<>]+|(\([^\s()<>]+\)))*\)
|
@@ -24,7 +24,9 @@ module IiifPrint
|
|
24
24
|
# JP2 source, and whether we have color or grayscale material.
|
25
25
|
def convert_cmd
|
26
26
|
template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
|
27
|
-
format(template, source_file: @source_path, out_file: @dest_path)
|
27
|
+
data = format(template, source_file: @source_path, out_file: @dest_path)
|
28
|
+
IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
|
29
|
+
data
|
28
30
|
end
|
29
31
|
|
30
32
|
def create_derivatives(filename)
|
@@ -0,0 +1,189 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
module PersistenceLayer
|
3
|
+
class ActiveFedoraAdapter < AbstractAdapter
|
4
|
+
##
|
5
|
+
# @param object [ActiveFedora::Base]
|
6
|
+
# @return [Array<SolrDocument>]
|
7
|
+
def self.object_in_works(object)
|
8
|
+
object.in_works
|
9
|
+
end
|
10
|
+
|
11
|
+
##
|
12
|
+
# @param object [ActiveFedora::Base]
|
13
|
+
# @return [Array<SolrDocument>]
|
14
|
+
def self.object_ordered_works(object)
|
15
|
+
object.ordered_works
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# @param work_type [Class<ActiveFedora::Base>]
|
20
|
+
# @return indexer for the given :work_type
|
21
|
+
def self.decorate_with_adapter_logic(work_type:)
|
22
|
+
work_type.send(:include, IiifPrint::SetChildFlag) unless work_type.included_modules.include?(IiifPrint::SetChildFlag)
|
23
|
+
work_type.indexer
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# @param work_type [Class<ActiveFedora::Base>]
|
28
|
+
# @return indexer for the given :work_type
|
29
|
+
def self.decorate_form_with_adapter_logic(work_type:)
|
30
|
+
work_type.indexer
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Return the immediate parent of the given :file_set.
|
35
|
+
#
|
36
|
+
# @param file_set [FileSet]
|
37
|
+
# @return [#work?, Hydra::PCDM::Work]
|
38
|
+
# @return [NilClass] when no parent is found.
|
39
|
+
def self.parent_for(file_set)
|
40
|
+
# fallback to Fedora-stored relationships if work's aggregation of
|
41
|
+
# file set is not indexed in Solr
|
42
|
+
file_set.parent || file_set.member_of.find(&:work?)
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Return the parent's parent of the given :file_set.
|
47
|
+
#
|
48
|
+
# @param file_set [FileSet]
|
49
|
+
# @return [#work?, Hydra::PCDM::Work]
|
50
|
+
# @return [NilClass] when no grand parent is found.
|
51
|
+
def self.grandparent_for(file_set)
|
52
|
+
parent_of_file_set = parent_for(file_set)
|
53
|
+
# HACK: This is an assumption about the file_set structure, namely that an image page split from
|
54
|
+
# a PDF is part of a file set that is a child of a work that is a child of a single work. That
|
55
|
+
# is, it only has one grand parent. Which is a reasonable assumption for IIIF Print but is not
|
56
|
+
# valid when extended beyond IIIF Print. That is GenericWork does not have a parent method but
|
57
|
+
# does have a parents method.
|
58
|
+
parent_of_file_set.try(:parent_works).try(:first) ||
|
59
|
+
parent_of_file_set.try(:parents).try(:first) ||
|
60
|
+
parent_of_file_set&.member_of&.find(&:work?)
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.solr_construct_query(*args)
|
64
|
+
if defined?(Hyrax::SolrQueryBuilderService)
|
65
|
+
Hyrax::SolrQueryBuilderService.construct_query(*args)
|
66
|
+
else
|
67
|
+
ActiveFedora::SolrQueryBuilder.construct_query(*args)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.clean_for_tests!
|
72
|
+
super do
|
73
|
+
ActiveFedora::Cleaner.clean!
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.solr_query(query, **args)
|
78
|
+
if defined?(ActiveFedora::SolrService)
|
79
|
+
ActiveFedora::SolrService.query(query, **args)
|
80
|
+
else
|
81
|
+
Hyrax::SolrService.query(query, **args)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.solr_name(field_name)
|
86
|
+
if defined?(Hyrax) && Hyrax.config.respond_to?(:index_field_mapper)
|
87
|
+
Hyrax.config.index_field_mapper.solr_name(field_name.to_s)
|
88
|
+
else
|
89
|
+
::ActiveFedora.index_field_mapper.solr_name(field_name.to_s)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# @param file_set [Object]
|
95
|
+
# @param work [Object]
|
96
|
+
# @param model [Class] The class name for which we'll split children.
|
97
|
+
def self.destroy_children_split_from(file_set:, work:, model:, **_args)
|
98
|
+
# look first for children by the file set id they were split from
|
99
|
+
children = model.where(split_from_pdf_id: file_set.id)
|
100
|
+
if children.blank?
|
101
|
+
# find works where file name and work `to_param` are both in the title
|
102
|
+
children = model.where(title: file_set.label).where(title: work.to_param)
|
103
|
+
end
|
104
|
+
return if children.blank?
|
105
|
+
children.each do |rcd|
|
106
|
+
rcd.destroy(eradicate: true)
|
107
|
+
end
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.pdf?(file_set)
|
112
|
+
file_set.class.pdf_mime_types.include?(file_set.mime_type)
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Add a child record as a member of a parent record
|
117
|
+
#
|
118
|
+
# @param model [child_record] an ActiveFedora::Base model
|
119
|
+
# @param model [parent_record] an ActiveFedora::Base model
|
120
|
+
# @return [TrueClass]
|
121
|
+
def self.create_relationship_between(child_record:, parent_record:)
|
122
|
+
return true if parent_record.ordered_members.to_a.include?(child_record)
|
123
|
+
parent_record.ordered_members << child_record
|
124
|
+
true
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# find a work by title
|
129
|
+
# We should only find one, but there is no guarantee of that and `:where` returns an array.
|
130
|
+
#
|
131
|
+
# @param title [String]
|
132
|
+
# @param model [String] an ActiveFedora::Base model
|
133
|
+
def self.find_by_title_for(title:, model:)
|
134
|
+
work_type = model.constantize
|
135
|
+
|
136
|
+
work_type.where(title: title)
|
137
|
+
end
|
138
|
+
|
139
|
+
##
|
140
|
+
# find a work or file_set
|
141
|
+
#
|
142
|
+
# @param id [String]
|
143
|
+
# @return [Array<ActiveFedora::Base]
|
144
|
+
def self.find_by(id:)
|
145
|
+
ActiveFedora::Base.find(id)
|
146
|
+
end
|
147
|
+
|
148
|
+
##
|
149
|
+
# save a work
|
150
|
+
#
|
151
|
+
# @param object [Array<ActiveFedora::Base]
|
152
|
+
def self.save(object:)
|
153
|
+
object.save!
|
154
|
+
end
|
155
|
+
|
156
|
+
##
|
157
|
+
# reindex an array of works and their file_sets
|
158
|
+
#
|
159
|
+
# @param objects [Array<ActiveFedora::Base]
|
160
|
+
# @return [TrueClass]
|
161
|
+
def self.index_works(objects:)
|
162
|
+
objects.each do |work|
|
163
|
+
work.update_index
|
164
|
+
work.file_sets.each(&:update_index) if work.respond_to?(:file_sets)
|
165
|
+
end
|
166
|
+
true
|
167
|
+
end
|
168
|
+
|
169
|
+
##
|
170
|
+
# does nothing for ActiveFedora;
|
171
|
+
# allows valkyrie works to have an extra step to create the Hyrax::Metadata objects.
|
172
|
+
#
|
173
|
+
# @param []
|
174
|
+
# @return [TrueClass]
|
175
|
+
def self.copy_derivatives_from_data_store(*)
|
176
|
+
true
|
177
|
+
end
|
178
|
+
|
179
|
+
##
|
180
|
+
# Extract text from the derivatives
|
181
|
+
#
|
182
|
+
# @param [FileSet] an ActiveFedora fileset
|
183
|
+
# @return [String] Text from fileset's file
|
184
|
+
def self.extract_text_for(file_set:)
|
185
|
+
IiifPrint.config.all_text_generator_function.call(object: file_set) || ''
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
module PersistenceLayer
|
3
|
+
class ValkyrieAdapter < AbstractAdapter
|
4
|
+
##
|
5
|
+
# @param object [Valkyrie::Resource]
|
6
|
+
# @return [Array<Valkyrie::Resource>]
|
7
|
+
def self.object_in_works(object)
|
8
|
+
Array.wrap(Hyrax.custom_queries.find_parent_work(resource: object))
|
9
|
+
end
|
10
|
+
|
11
|
+
##
|
12
|
+
# @param object [Valkyrie::Resource]
|
13
|
+
# @return [Array<Valkyrie::Resource>]
|
14
|
+
def self.object_ordered_works(object)
|
15
|
+
child_file_sets = Hyrax.custom_queries.find_child_file_sets(resource: object).to_a
|
16
|
+
child_works = Hyrax.custom_queries.find_child_works(resource: object).to_a
|
17
|
+
child_works + child_file_sets
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# @param work_type [Class<Valkyrie::Resource>]
|
22
|
+
# @return the indexer for the given :work_type
|
23
|
+
def self.decorate_with_adapter_logic(work_type:)
|
24
|
+
work_type.send(:include, Hyrax::Schema(:child_works_from_pdf_splitting)) unless work_type.included_modules.include?(Hyrax::Schema(:child_works_from_pdf_splitting))
|
25
|
+
# TODO: Use `Hyrax::ValkyrieIndexer.indexer_class_for` once changes are merged.
|
26
|
+
indexer = "#{work_type}Indexer".constantize
|
27
|
+
indexer.send(:include, Hyrax::Indexer(:child_works_from_pdf_splitting)) unless indexer.included_modules.include?(Hyrax::Indexer(:child_works_from_pdf_splitting))
|
28
|
+
indexer
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# @param work_type [Class<ActiveFedora::Base>]
|
33
|
+
# @return form for the given :work_type
|
34
|
+
def self.decorate_form_with_adapter_logic(work_type:)
|
35
|
+
form = "#{work_type}Form".constantize
|
36
|
+
form.send(:include, Hyrax::FormFields(:child_works_from_pdf_splitting)) unless form.included_modules.include?(Hyrax::FormFields(:child_works_from_pdf_splitting))
|
37
|
+
form
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# Return the immediate parent of the given :file_set.
|
42
|
+
#
|
43
|
+
# @param file_set [Hyrax::FileMetadata or FileSet]
|
44
|
+
# @return [#work?, Hydra::PCDM::Work]
|
45
|
+
# @return [NilClass] when no parent is found.
|
46
|
+
def self.parent_for(file_set)
|
47
|
+
file_set = Hyrax.query_service.find_by(id: file_set.file_set_id) if file_set.is_a?(Hyrax::FileMetadata)
|
48
|
+
Hyrax.query_service.find_parents(resource: file_set).first
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# Return the parent's parent of the given :file_set.
|
53
|
+
#
|
54
|
+
# @param file_set [Hyrax::FileMetadata or FileSet]
|
55
|
+
# @return [#work?, Hydra::PCDM::Work]
|
56
|
+
# @return [NilClass] when no grand parent is found.
|
57
|
+
def self.grandparent_for(file_set)
|
58
|
+
parent = parent_for(file_set)
|
59
|
+
return nil unless parent
|
60
|
+
Hyrax.query_service.find_parents(resource: parent).first
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.solr_construct_query(*args)
|
64
|
+
Hyrax::SolrQueryBuilderService.construct_query(*args)
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.clean_for_tests!
|
68
|
+
# For Fedora backed repositories, we'll want to consider some cleaning mechanism. For
|
69
|
+
# database backed repositories, we can rely on the database_cleaner gem.
|
70
|
+
raise NotImplementedError
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.solr_query(query, **args)
|
74
|
+
Hyrax::SolrService.query(query, **args)
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.solr_name(field_name)
|
78
|
+
Hyrax.config.index_field_mapper.solr_name(field_name.to_s)
|
79
|
+
end
|
80
|
+
|
81
|
+
# rubocop:disable Lint/UnusedMethodArgument
|
82
|
+
def self.destroy_children_split_from(file_set:, work:, model:, user:)
|
83
|
+
# rubocop:enable Lint/UnusedMethodArgument
|
84
|
+
# look for child records by the file set id they were split from
|
85
|
+
Hyrax.query_service.find_inverse_references_by(resource: file_set, property: :split_from_pdf_id, model: model).each do |child|
|
86
|
+
Hyrax.persister.delete(resource: child)
|
87
|
+
Hyrax.indexing_service.delete(resource: child)
|
88
|
+
Hyrax.publisher.publish('object.deleted', object: child, user: user)
|
89
|
+
end
|
90
|
+
true
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.pdf?(file_set)
|
94
|
+
file_set.original_file.pdf?
|
95
|
+
end
|
96
|
+
|
97
|
+
##
|
98
|
+
# Add a child record as a member of a parent record
|
99
|
+
#
|
100
|
+
# @param model [child_record] a Valkyrie::Resource model
|
101
|
+
# @param model [parent_record] a Valkyrie::Resource model
|
102
|
+
# @return [TrueClass]
|
103
|
+
def self.create_relationship_between(child_record:, parent_record:)
|
104
|
+
return true if parent_record.member_ids.include?(child_record.id)
|
105
|
+
parent_record.member_ids << child_record.id
|
106
|
+
true
|
107
|
+
end
|
108
|
+
|
109
|
+
##
|
110
|
+
# find a work by title
|
111
|
+
# We should only find one, but there is no guarantee of that
|
112
|
+
# @param title [String]
|
113
|
+
# @param model [String] a Valkyrie::Resource model
|
114
|
+
# @return [Array<Valkyrie::Resource]
|
115
|
+
def self.find_by_title_for(title:, model:)
|
116
|
+
work_type = model.constantize
|
117
|
+
# TODO: This creates a hard dependency on Bulkrax because that is where this custom query is defined
|
118
|
+
# Is this adequate?
|
119
|
+
Array.wrap(Hyrax.query_service.custom_query.find_by_model_and_property_value(model: work_type,
|
120
|
+
property: :title,
|
121
|
+
value: title))
|
122
|
+
end
|
123
|
+
|
124
|
+
##
|
125
|
+
# find a work or file_set
|
126
|
+
#
|
127
|
+
# @param id [String]
|
128
|
+
def self.find_by(id:)
|
129
|
+
Hyrax.query_service.find_by(id: id)
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# save a work
|
134
|
+
#
|
135
|
+
# @param object [Array<Valkyrie::Resource]
|
136
|
+
def self.save(object:)
|
137
|
+
Hyrax.persister.save(resource: object)
|
138
|
+
Hyrax.index_adapter.save(resource: object)
|
139
|
+
|
140
|
+
Hyrax.publisher.publish('object.membership.updated', object: object, user: object.depositor)
|
141
|
+
end
|
142
|
+
|
143
|
+
##
|
144
|
+
# reindex an array of works and their file_sets
|
145
|
+
#
|
146
|
+
# @param objects [Array<Valkyrie::Resource]
|
147
|
+
# @return [TrueClass]
|
148
|
+
def self.index_works(objects:)
|
149
|
+
objects.each do |work|
|
150
|
+
Hyrax.index_adapter.save(resource: work)
|
151
|
+
Hyrax.custom_queries.find_child_file_sets(resource: work).each do |file_set|
|
152
|
+
Hyrax.index_adapter.save(resource: file_set)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
true
|
156
|
+
end
|
157
|
+
|
158
|
+
##
|
159
|
+
# Performs an extra step to create the Hyrax::Metadata objects
|
160
|
+
# for derivatives.
|
161
|
+
#
|
162
|
+
# @param []
|
163
|
+
# @return [TrueClass]
|
164
|
+
def self.copy_derivatives_from_data_store(stream:, directives:)
|
165
|
+
Hyrax::ValkyriePersistDerivatives.call(stream, directives)
|
166
|
+
end
|
167
|
+
|
168
|
+
##
|
169
|
+
# Extract text from the derivatives
|
170
|
+
#
|
171
|
+
# @param [Hyrax::FileSet] a Valkyrie fileset
|
172
|
+
# @return [String] Text from fileset's file
|
173
|
+
def self.extract_text_for(file_set:)
|
174
|
+
fm = Hyrax.custom_queries.find_many_file_metadata_by_use(resource: file_set,
|
175
|
+
use: Hyrax::FileMetadata::Use.uri_for(use: :extracted_file))
|
176
|
+
return if fm.empty?
|
177
|
+
text_fm = fm.find { |t| t.mime_type == Marcel::MimeType.for(extension: 'txt') }
|
178
|
+
return if text_fm.nil?
|
179
|
+
text_fm.content
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
##
|
3
|
+
# The PersistenceLayer module provides the namespace for other adapters:
|
4
|
+
#
|
5
|
+
# - {IiifPrint::PersistenceLayer::ActiveFedoraAdapter}
|
6
|
+
# - {IiifPrint::PersistenceLayer::ValkyrieAdapter}
|
7
|
+
#
|
8
|
+
# And the defining interface in the {IiifPrint::PersistenceLayer::AbstractAdapter}
|
9
|
+
module PersistenceLayer
|
10
|
+
# @abstract
|
11
|
+
class AbstractAdapter
|
12
|
+
##
|
13
|
+
# @param object [Object]
|
14
|
+
# @return [Array<Object>]
|
15
|
+
def self.object_in_works(object)
|
16
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
17
|
+
end
|
18
|
+
|
19
|
+
##
|
20
|
+
# @param object [Object]
|
21
|
+
# @return [Array<Object>]
|
22
|
+
def self.object_ordered_works(object)
|
23
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# @param work_type [Class]
|
28
|
+
# @return the corresponding indexer for the work_type
|
29
|
+
def self.decorate_with_adapter_logic(work_type:)
|
30
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# @param work_type [Class]
|
35
|
+
# @return the corresponding indexer for the work_type
|
36
|
+
def self.decorate_form_with_adapter_logic(work_type:)
|
37
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
38
|
+
end
|
39
|
+
|
40
|
+
##
|
41
|
+
# @param file_set [Object]
|
42
|
+
# @param work [Object]
|
43
|
+
# @param model [Class] The class name for which we'll split children.
|
44
|
+
def self.destroy_children_split_from(file_set:, work:, model:)
|
45
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# @abstract
|
50
|
+
def self.parent_for(*)
|
51
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
# @abstract
|
56
|
+
def self.grandparent_for(*)
|
57
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# @abstract
|
62
|
+
def self.solr_field_query(*)
|
63
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# @abstract
|
68
|
+
def self.clean_for_tests!
|
69
|
+
return false unless Rails.env.test?
|
70
|
+
yield
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# @abstract
|
75
|
+
def self.solr_query(*args)
|
76
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# @abstract
|
81
|
+
def self.solr_name(*args)
|
82
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.pdf?(_file_set)
|
86
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.create_relationship_between(child_record:, parent_record:)
|
90
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.find_by_title_for(title:, model:)
|
94
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.find_by(id:)
|
98
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
99
|
+
end
|
100
|
+
|
101
|
+
def self.save(object:)
|
102
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.index_works(objects:)
|
106
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.copy_derivatives_from_data_store(stream:, directives:)
|
110
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.extract_text_for(file_set:)
|
114
|
+
raise NotImplementedError, "#{self}.{__method__}"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|