iiif_print 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +102 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
  19. data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
  20. data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
  21. data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
  22. data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  23. data/app/listeners/iiif_print/listener.rb +31 -0
  24. data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
  25. data/app/models/concerns/iiif_print/solr/document.rb +19 -3
  26. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  27. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  28. data/app/models/iiif_print/pending_relationship.rb +3 -0
  29. data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
  30. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  31. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  32. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
  33. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  34. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  35. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  36. data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
  37. data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
  38. data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
  39. data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
  40. data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
  41. data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
  42. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  43. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  44. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  45. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  46. data/config/initializers/simple_schema_loader.rb +1 -0
  47. data/config/locales/iiif_print.en.yml +4 -0
  48. data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
  49. data/config/routes.rb +3 -0
  50. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
  51. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
  52. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
  53. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  54. data/docker-compose.yml +2 -2
  55. data/iiif_print.gemspec +11 -10
  56. data/lib/generators/iiif_print/install_generator.rb +21 -1
  57. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  58. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  59. data/lib/iiif_print/base_derivative_service.rb +14 -2
  60. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
  61. data/lib/iiif_print/catalog_search_builder.rb +7 -3
  62. data/lib/iiif_print/configuration.rb +205 -8
  63. data/lib/iiif_print/data/fileset_helper.rb +3 -3
  64. data/lib/iiif_print/data/work_derivatives.rb +4 -4
  65. data/lib/iiif_print/engine.rb +53 -15
  66. data/lib/iiif_print/errors.rb +18 -0
  67. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  68. data/lib/iiif_print/image_tool.rb +12 -8
  69. data/lib/iiif_print/jp2_derivative_service.rb +4 -1
  70. data/lib/iiif_print/lineage_service.rb +47 -13
  71. data/lib/iiif_print/metadata.rb +67 -48
  72. data/lib/iiif_print/pdf_derivative_service.rb +3 -1
  73. data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
  74. data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
  75. data/lib/iiif_print/persistence_layer.rb +118 -0
  76. data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
  77. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
  78. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  79. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
  80. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  81. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  82. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  83. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  84. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  85. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  86. data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
  87. data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
  88. data/lib/iiif_print/tiff_derivative_service.rb +3 -1
  89. data/lib/iiif_print/version.rb +1 -1
  90. data/lib/iiif_print.rb +210 -20
  91. data/lib/samvera/derivatives/configuration.rb +83 -0
  92. data/lib/samvera/derivatives/hyrax.rb +129 -0
  93. data/lib/samvera/derivatives.rb +238 -0
  94. data/tasks/copy_authorities_to_test_app.rake +11 -0
  95. data/tasks/iiif_print_dev.rake +4 -4
  96. metadata +111 -196
  97. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  98. data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
  99. data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
  100. data/bin/rails +0 -13
  101. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
  102. data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
  103. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  104. data/spec/.keep.txt +0 -1
  105. data/spec/factories/ability.rb +0 -6
  106. data/spec/factories/newspaper_issue.rb +0 -7
  107. data/spec/factories/newspaper_page.rb +0 -7
  108. data/spec/factories/newspaper_page_solr_document.rb +0 -12
  109. data/spec/factories/newspaper_title.rb +0 -8
  110. data/spec/factories/uploaded_pdf_file.rb +0 -9
  111. data/spec/factories/uploaded_txt_file.rb +0 -9
  112. data/spec/factories/user.rb +0 -13
  113. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  114. data/spec/fixtures/files/4.1.07.tiff +0 -0
  115. data/spec/fixtures/files/README.md +0 -7
  116. data/spec/fixtures/files/alto-2-0.xsd +0 -714
  117. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  118. data/spec/fixtures/files/credits.md +0 -16
  119. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  120. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  121. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  122. data/spec/fixtures/files/minimal-alto.xml +0 -31
  123. data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
  124. data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
  125. data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
  126. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  127. data/spec/fixtures/files/ocr_alto.xml +0 -202
  128. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
  129. data/spec/fixtures/files/ocr_color.tiff +0 -0
  130. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  131. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  132. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  133. data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
  134. data/spec/fixtures/files/page1.tiff +0 -0
  135. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  136. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  137. data/spec/fixtures/files/thumbnail.jpg +0 -0
  138. data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
  139. data/spec/helpers/iiif_print_helper_spec.rb +0 -43
  140. data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
  141. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
  142. data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
  143. data/spec/iiif_print/configuration_spec.rb +0 -67
  144. data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
  145. data/spec/iiif_print/data/work_file_spec.rb +0 -99
  146. data/spec/iiif_print/data/work_files_spec.rb +0 -237
  147. data/spec/iiif_print/image_tool_spec.rb +0 -109
  148. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
  149. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
  150. data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
  151. data/spec/iiif_print/lineage_service_spec.rb +0 -13
  152. data/spec/iiif_print/metadata_spec.rb +0 -115
  153. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
  154. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
  155. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
  156. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
  157. data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
  158. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
  159. data/spec/iiif_print_spec.rb +0 -51
  160. data/spec/misc_shared.rb +0 -111
  161. data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
  162. data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
  163. data/spec/models/solr_document_spec.rb +0 -14
  164. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
  165. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
  166. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
  167. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
  168. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
  169. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
  170. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
  171. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
  172. data/spec/spec_helper.rb +0 -181
  173. data/spec/support/controller_level_helpers.rb +0 -28
  174. data/spec/support/iiif_print_models.rb +0 -127
  175. data/spec/test_app_templates/blacklight.yml +0 -9
  176. data/spec/test_app_templates/fedora.yml +0 -15
  177. data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
  178. data/spec/test_app_templates/redis.yml +0 -9
  179. data/spec/test_app_templates/solr/conf/schema.xml +0 -362
  180. data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
  181. data/spec/test_app_templates/solr.yml +0 -7
@@ -17,48 +17,43 @@ module IiifPrint
17
17
  @base_url = base_url
18
18
  end
19
19
 
20
- attr_reader :work, :version, :fields
20
+ attr_reader :work, :version, :fields, :current_ability
21
21
 
22
22
  def build_metadata
23
- send("build_metadata_for_v#{version}")
24
- end
25
-
26
- private
27
-
28
- def build_metadata_for_v2
29
23
  fields.map do |field|
30
- label = Hyrax::Renderers::AttributeRenderer.new(field.name, nil).label
31
- if field.name == :collection && member_of_collection?
32
- viewable_collections = Hyrax::CollectionMemberService.run(work, @current_ability)
33
- next if viewable_collections.empty?
34
- { 'label' => label,
35
- 'value' => make_collection_link(viewable_collections) }
36
- else
37
- next if field_is_empty?(field)
38
- { 'label' => label,
39
- 'value' => cast_to_value(field_name: field.name, options: field.options) }
24
+ values = values_for(field_name: field)
25
+ if field.name == :collection && member_of_collection? && viewable_collections.present?
26
+ { 'label' => metadata_map(field, :label),
27
+ 'value' => metadata_map(field, :collection) }
28
+ elsif values.present? && !empty_string?(values)
29
+ { 'label' => metadata_map(field, :label),
30
+ 'value' => metadata_map(field, :value) }
40
31
  end
41
32
  end.compact
42
33
  end
43
34
 
44
- def build_metadata_for_v3
45
- fields.map do |field|
46
- values = Array(work.try(field.name)).map { |value| scrub(value.to_s) }
47
- next if values.empty?
48
- {
49
- 'label' => {
50
- # Since we're using I18n to translate the field, we're setting the locale used in the translation.
51
- I18n.locale.to_s => [Hyrax::Renderers::AttributeRenderer.new(field.name, nil).label]
52
- },
53
- 'value' => {
54
- 'none' => values
55
- }
56
- }
57
- end.compact
35
+ private
36
+
37
+ def metadata_map(field, property)
38
+ if version == 2
39
+ case property
40
+ when :label then field.label
41
+ when :value then cast_to_value(field_name: field.name, options: field.options)
42
+ when :collection then make_collection_link(viewable_collections)
43
+ end
44
+ elsif version == 3
45
+ case property
46
+ when :label then { I18n.locale.to_s => [field.label] }
47
+ when :value then { 'none' => cast_to_value(field_name: field.name, options: field.options) }
48
+ when :collection then { 'none' => make_collection_link(viewable_collections) }
49
+ end
50
+ end
58
51
  end
59
52
 
60
- def field_is_empty?(field)
61
- Array(work.try(field.name)).empty?
53
+ # Bulkrax imports values as [""] if there isn't a value but still a header,
54
+ # these fields should not show in the metadata pane
55
+ def empty_string?(values)
56
+ values.uniq.size == 1 ? values.first == "" : false
62
57
  end
63
58
 
64
59
  def member_of_collection?
@@ -71,21 +66,41 @@ module IiifPrint
71
66
 
72
67
  def cast_to_value(field_name:, options:)
73
68
  if options&.[](:render_as) == :faceted
74
- values_for(field_name: field_name).map do |value|
75
- search_field = field_name.to_s + "_sim"
76
- path = Rails.application.routes.url_helpers.search_catalog_path(
77
- "f[#{search_field}][]": value, locale: I18n.locale
78
- )
79
- path += '&include_child_works=true' if work["is_child_bsi"] == true
80
- "<a href='#{File.join(@base_url, path)}'>#{value}</a>"
81
- end
69
+ faceted_values_for(field_name: field_name)
70
+ elsif qa_field?(field_name: options&.dig(:render_as) || field_name)
71
+ authority_values_for(field_name: field_name)
82
72
  else
83
73
  make_link(values_for(field_name: field_name))
84
74
  end
85
75
  end
86
76
 
77
+ def faceted_values_for(field_name:)
78
+ values_for(field_name: field_name).map do |value|
79
+ search_field = field_name.to_s + "_sim"
80
+ path = Rails.application.routes.url_helpers.search_catalog_path(
81
+ "f[#{search_field}][]": value, locale: I18n.locale
82
+ )
83
+ path += '&include_child_works=true' if work["is_child_bsi"] == true
84
+ "<a href='#{File.join(@base_url, path)}'>#{value}</a>"
85
+ end
86
+ end
87
+
88
+ def qa_field?(field_name:, questioning_authority_fields: IiifPrint.config.questioning_authority_fields)
89
+ questioning_authority_fields.include?(field_name.to_s)
90
+ end
91
+
92
+ def authority_values_for(field_name:)
93
+ authority = Qa::Authorities::Local.subauthority_for(field_name.to_s.pluralize)
94
+ values_for(field_name: field_name).map do |value|
95
+ id, term = authority.find(value).values_at('id', 'term')
96
+ "<a href='#{id}'>#{term}</a>"
97
+ end
98
+ end
99
+
87
100
  def values_for(field_name:)
88
- Array(work.send(field_name))
101
+ field_name = field_name.try(:name) || field_name
102
+ # TODO: we are assuming tesim or dtsi (for dates), might want to account for other suffixes in the future
103
+ Array(work["#{field_name}_tesim"] || work["#{field_name}_dtsi"]&.to_date.try(:to_formatted_s, :standard))
89
104
  end
90
105
 
91
106
  def make_collection_link(collection_documents)
@@ -94,11 +109,16 @@ module IiifPrint
94
109
  end
95
110
  end
96
111
 
97
- # @note This method turns link looking strings into links
112
+ def viewable_collections
113
+ Hyrax::CollectionMemberService.run(SolrDocument.find(work.id), current_ability)
114
+ end
115
+
116
+ # @note This method turns link looking strings into links and assumes https if not protocol was given
98
117
  def make_link(texts)
99
118
  texts.map do |t|
100
119
  t.to_s.gsub(MAKE_LINK_REGEX) do |url|
101
- "<a href='#{url}' target='_blank'>#{url}</a>"
120
+ protocol = url.start_with?('www.') ? 'https://' : ''
121
+ "<a href='#{protocol}#{url}' target='_blank'>#{url}</a>"
102
122
  end
103
123
  end
104
124
  end
@@ -106,10 +126,9 @@ module IiifPrint
106
126
  MAKE_LINK_REGEX = %r{
107
127
  \b
108
128
  (
109
- (?: [a-z][\w-]+:
110
- (?: /{1,3} | [a-z0-9%] ) |
111
- www\d{0,3}[.] |
112
- [a-z0-9.\-]+[.][a-z]{2,4}/
129
+ (?:
130
+ (?:https?://) |
131
+ (?:www\.)
113
132
  )
114
133
  (?:
115
134
  [^\s()<>]+ | \(([^\s()<>]+|(\([^\s()<>]+\)))*\)
@@ -24,7 +24,9 @@ module IiifPrint
24
24
  # JP2 source, and whether we have color or grayscale material.
25
25
  def convert_cmd
26
26
  template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
27
- format(template, source_file: @source_path, out_file: @dest_path)
27
+ data = format(template, source_file: @source_path, out_file: @dest_path)
28
+ IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
29
+ data
28
30
  end
29
31
 
30
32
  def create_derivatives(filename)
@@ -0,0 +1,189 @@
1
+ module IiifPrint
2
+ module PersistenceLayer
3
+ class ActiveFedoraAdapter < AbstractAdapter
4
+ ##
5
+ # @param object [ActiveFedora::Base]
6
+ # @return [Array<SolrDocument>]
7
+ def self.object_in_works(object)
8
+ object.in_works
9
+ end
10
+
11
+ ##
12
+ # @param object [ActiveFedora::Base]
13
+ # @return [Array<SolrDocument>]
14
+ def self.object_ordered_works(object)
15
+ object.ordered_works
16
+ end
17
+
18
+ ##
19
+ # @param work_type [Class<ActiveFedora::Base>]
20
+ # @return indexer for the given :work_type
21
+ def self.decorate_with_adapter_logic(work_type:)
22
+ work_type.send(:include, IiifPrint::SetChildFlag) unless work_type.included_modules.include?(IiifPrint::SetChildFlag)
23
+ work_type.indexer
24
+ end
25
+
26
+ ##
27
+ # @param work_type [Class<ActiveFedora::Base>]
28
+ # @return indexer for the given :work_type
29
+ def self.decorate_form_with_adapter_logic(work_type:)
30
+ work_type.indexer
31
+ end
32
+
33
+ ##
34
+ # Return the immediate parent of the given :file_set.
35
+ #
36
+ # @param file_set [FileSet]
37
+ # @return [#work?, Hydra::PCDM::Work]
38
+ # @return [NilClass] when no parent is found.
39
+ def self.parent_for(file_set)
40
+ # fallback to Fedora-stored relationships if work's aggregation of
41
+ # file set is not indexed in Solr
42
+ file_set.parent || file_set.member_of.find(&:work?)
43
+ end
44
+
45
+ ##
46
+ # Return the parent's parent of the given :file_set.
47
+ #
48
+ # @param file_set [FileSet]
49
+ # @return [#work?, Hydra::PCDM::Work]
50
+ # @return [NilClass] when no grand parent is found.
51
+ def self.grandparent_for(file_set)
52
+ parent_of_file_set = parent_for(file_set)
53
+ # HACK: This is an assumption about the file_set structure, namely that an image page split from
54
+ # a PDF is part of a file set that is a child of a work that is a child of a single work. That
55
+ # is, it only has one grand parent. Which is a reasonable assumption for IIIF Print but is not
56
+ # valid when extended beyond IIIF Print. That is GenericWork does not have a parent method but
57
+ # does have a parents method.
58
+ parent_of_file_set.try(:parent_works).try(:first) ||
59
+ parent_of_file_set.try(:parents).try(:first) ||
60
+ parent_of_file_set&.member_of&.find(&:work?)
61
+ end
62
+
63
+ def self.solr_construct_query(*args)
64
+ if defined?(Hyrax::SolrQueryBuilderService)
65
+ Hyrax::SolrQueryBuilderService.construct_query(*args)
66
+ else
67
+ ActiveFedora::SolrQueryBuilder.construct_query(*args)
68
+ end
69
+ end
70
+
71
+ def self.clean_for_tests!
72
+ super do
73
+ ActiveFedora::Cleaner.clean!
74
+ end
75
+ end
76
+
77
+ def self.solr_query(query, **args)
78
+ if defined?(ActiveFedora::SolrService)
79
+ ActiveFedora::SolrService.query(query, **args)
80
+ else
81
+ Hyrax::SolrService.query(query, **args)
82
+ end
83
+ end
84
+
85
+ def self.solr_name(field_name)
86
+ if defined?(Hyrax) && Hyrax.config.respond_to?(:index_field_mapper)
87
+ Hyrax.config.index_field_mapper.solr_name(field_name.to_s)
88
+ else
89
+ ::ActiveFedora.index_field_mapper.solr_name(field_name.to_s)
90
+ end
91
+ end
92
+
93
+ ##
94
+ # @param file_set [Object]
95
+ # @param work [Object]
96
+ # @param model [Class] The class name for which we'll split children.
97
+ def self.destroy_children_split_from(file_set:, work:, model:, **_args)
98
+ # look first for children by the file set id they were split from
99
+ children = model.where(split_from_pdf_id: file_set.id)
100
+ if children.blank?
101
+ # find works where file name and work `to_param` are both in the title
102
+ children = model.where(title: file_set.label).where(title: work.to_param)
103
+ end
104
+ return if children.blank?
105
+ children.each do |rcd|
106
+ rcd.destroy(eradicate: true)
107
+ end
108
+ true
109
+ end
110
+
111
+ def self.pdf?(file_set)
112
+ file_set.class.pdf_mime_types.include?(file_set.mime_type)
113
+ end
114
+
115
+ ##
116
+ # Add a child record as a member of a parent record
117
+ #
118
+ # @param model [child_record] an ActiveFedora::Base model
119
+ # @param model [parent_record] an ActiveFedora::Base model
120
+ # @return [TrueClass]
121
+ def self.create_relationship_between(child_record:, parent_record:)
122
+ return true if parent_record.ordered_members.to_a.include?(child_record)
123
+ parent_record.ordered_members << child_record
124
+ true
125
+ end
126
+
127
+ ##
128
+ # find a work by title
129
+ # We should only find one, but there is no guarantee of that and `:where` returns an array.
130
+ #
131
+ # @param title [String]
132
+ # @param model [String] an ActiveFedora::Base model
133
+ def self.find_by_title_for(title:, model:)
134
+ work_type = model.constantize
135
+
136
+ work_type.where(title: title)
137
+ end
138
+
139
+ ##
140
+ # find a work or file_set
141
+ #
142
+ # @param id [String]
143
+ # @return [Array<ActiveFedora::Base]
144
+ def self.find_by(id:)
145
+ ActiveFedora::Base.find(id)
146
+ end
147
+
148
+ ##
149
+ # save a work
150
+ #
151
+ # @param object [Array<ActiveFedora::Base]
152
+ def self.save(object:)
153
+ object.save!
154
+ end
155
+
156
+ ##
157
+ # reindex an array of works and their file_sets
158
+ #
159
+ # @param objects [Array<ActiveFedora::Base]
160
+ # @return [TrueClass]
161
+ def self.index_works(objects:)
162
+ objects.each do |work|
163
+ work.update_index
164
+ work.file_sets.each(&:update_index) if work.respond_to?(:file_sets)
165
+ end
166
+ true
167
+ end
168
+
169
+ ##
170
+ # does nothing for ActiveFedora;
171
+ # allows valkyrie works to have an extra step to create the Hyrax::Metadata objects.
172
+ #
173
+ # @param []
174
+ # @return [TrueClass]
175
+ def self.copy_derivatives_from_data_store(*)
176
+ true
177
+ end
178
+
179
+ ##
180
+ # Extract text from the derivatives
181
+ #
182
+ # @param [FileSet] an ActiveFedora fileset
183
+ # @return [String] Text from fileset's file
184
+ def self.extract_text_for(file_set:)
185
+ IiifPrint.config.all_text_generator_function.call(object: file_set) || ''
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,183 @@
1
+ module IiifPrint
2
+ module PersistenceLayer
3
+ class ValkyrieAdapter < AbstractAdapter
4
+ ##
5
+ # @param object [Valkyrie::Resource]
6
+ # @return [Array<Valkyrie::Resource>]
7
+ def self.object_in_works(object)
8
+ Array.wrap(Hyrax.custom_queries.find_parent_work(resource: object))
9
+ end
10
+
11
+ ##
12
+ # @param object [Valkyrie::Resource]
13
+ # @return [Array<Valkyrie::Resource>]
14
+ def self.object_ordered_works(object)
15
+ child_file_sets = Hyrax.custom_queries.find_child_file_sets(resource: object).to_a
16
+ child_works = Hyrax.custom_queries.find_child_works(resource: object).to_a
17
+ child_works + child_file_sets
18
+ end
19
+
20
+ ##
21
+ # @param work_type [Class<Valkyrie::Resource>]
22
+ # @return the indexer for the given :work_type
23
+ def self.decorate_with_adapter_logic(work_type:)
24
+ work_type.send(:include, Hyrax::Schema(:child_works_from_pdf_splitting)) unless work_type.included_modules.include?(Hyrax::Schema(:child_works_from_pdf_splitting))
25
+ # TODO: Use `Hyrax::ValkyrieIndexer.indexer_class_for` once changes are merged.
26
+ indexer = "#{work_type}Indexer".constantize
27
+ indexer.send(:include, Hyrax::Indexer(:child_works_from_pdf_splitting)) unless indexer.included_modules.include?(Hyrax::Indexer(:child_works_from_pdf_splitting))
28
+ indexer
29
+ end
30
+
31
+ ##
32
+ # @param work_type [Class<ActiveFedora::Base>]
33
+ # @return form for the given :work_type
34
+ def self.decorate_form_with_adapter_logic(work_type:)
35
+ form = "#{work_type}Form".constantize
36
+ form.send(:include, Hyrax::FormFields(:child_works_from_pdf_splitting)) unless form.included_modules.include?(Hyrax::FormFields(:child_works_from_pdf_splitting))
37
+ form
38
+ end
39
+
40
+ ##
41
+ # Return the immediate parent of the given :file_set.
42
+ #
43
+ # @param file_set [Hyrax::FileMetadata or FileSet]
44
+ # @return [#work?, Hydra::PCDM::Work]
45
+ # @return [NilClass] when no parent is found.
46
+ def self.parent_for(file_set)
47
+ file_set = Hyrax.query_service.find_by(id: file_set.file_set_id) if file_set.is_a?(Hyrax::FileMetadata)
48
+ Hyrax.query_service.find_parents(resource: file_set).first
49
+ end
50
+
51
+ ##
52
+ # Return the parent's parent of the given :file_set.
53
+ #
54
+ # @param file_set [Hyrax::FileMetadata or FileSet]
55
+ # @return [#work?, Hydra::PCDM::Work]
56
+ # @return [NilClass] when no grand parent is found.
57
+ def self.grandparent_for(file_set)
58
+ parent = parent_for(file_set)
59
+ return nil unless parent
60
+ Hyrax.query_service.find_parents(resource: parent).first
61
+ end
62
+
63
+ def self.solr_construct_query(*args)
64
+ Hyrax::SolrQueryBuilderService.construct_query(*args)
65
+ end
66
+
67
+ def self.clean_for_tests!
68
+ # For Fedora backed repositories, we'll want to consider some cleaning mechanism. For
69
+ # database backed repositories, we can rely on the database_cleaner gem.
70
+ raise NotImplementedError
71
+ end
72
+
73
+ def self.solr_query(query, **args)
74
+ Hyrax::SolrService.query(query, **args)
75
+ end
76
+
77
+ def self.solr_name(field_name)
78
+ Hyrax.config.index_field_mapper.solr_name(field_name.to_s)
79
+ end
80
+
81
+ # rubocop:disable Lint/UnusedMethodArgument
82
+ def self.destroy_children_split_from(file_set:, work:, model:, user:)
83
+ # rubocop:enable Lint/UnusedMethodArgument
84
+ # look for child records by the file set id they were split from
85
+ Hyrax.query_service.find_inverse_references_by(resource: file_set, property: :split_from_pdf_id, model: model).each do |child|
86
+ Hyrax.persister.delete(resource: child)
87
+ Hyrax.indexing_service.delete(resource: child)
88
+ Hyrax.publisher.publish('object.deleted', object: child, user: user)
89
+ end
90
+ true
91
+ end
92
+
93
+ def self.pdf?(file_set)
94
+ file_set.original_file.pdf?
95
+ end
96
+
97
+ ##
98
+ # Add a child record as a member of a parent record
99
+ #
100
+ # @param model [child_record] a Valkyrie::Resource model
101
+ # @param model [parent_record] a Valkyrie::Resource model
102
+ # @return [TrueClass]
103
+ def self.create_relationship_between(child_record:, parent_record:)
104
+ return true if parent_record.member_ids.include?(child_record.id)
105
+ parent_record.member_ids << child_record.id
106
+ true
107
+ end
108
+
109
+ ##
110
+ # find a work by title
111
+ # We should only find one, but there is no guarantee of that
112
+ # @param title [String]
113
+ # @param model [String] a Valkyrie::Resource model
114
+ # @return [Array<Valkyrie::Resource]
115
+ def self.find_by_title_for(title:, model:)
116
+ work_type = model.constantize
117
+ # TODO: This creates a hard dependency on Bulkrax because that is where this custom query is defined
118
+ # Is this adequate?
119
+ Array.wrap(Hyrax.query_service.custom_query.find_by_model_and_property_value(model: work_type,
120
+ property: :title,
121
+ value: title))
122
+ end
123
+
124
+ ##
125
+ # find a work or file_set
126
+ #
127
+ # @param id [String]
128
+ def self.find_by(id:)
129
+ Hyrax.query_service.find_by(id: id)
130
+ end
131
+
132
+ ##
133
+ # save a work
134
+ #
135
+ # @param object [Array<Valkyrie::Resource]
136
+ def self.save(object:)
137
+ Hyrax.persister.save(resource: object)
138
+ Hyrax.index_adapter.save(resource: object)
139
+
140
+ Hyrax.publisher.publish('object.membership.updated', object: object, user: object.depositor)
141
+ end
142
+
143
+ ##
144
+ # reindex an array of works and their file_sets
145
+ #
146
+ # @param objects [Array<Valkyrie::Resource]
147
+ # @return [TrueClass]
148
+ def self.index_works(objects:)
149
+ objects.each do |work|
150
+ Hyrax.index_adapter.save(resource: work)
151
+ Hyrax.custom_queries.find_child_file_sets(resource: work).each do |file_set|
152
+ Hyrax.index_adapter.save(resource: file_set)
153
+ end
154
+ end
155
+ true
156
+ end
157
+
158
+ ##
159
+ # Performs an extra step to create the Hyrax::Metadata objects
160
+ # for derivatives.
161
+ #
162
+ # @param []
163
+ # @return [TrueClass]
164
+ def self.copy_derivatives_from_data_store(stream:, directives:)
165
+ Hyrax::ValkyriePersistDerivatives.call(stream, directives)
166
+ end
167
+
168
+ ##
169
+ # Extract text from the derivatives
170
+ #
171
+ # @param [Hyrax::FileSet] a Valkyrie fileset
172
+ # @return [String] Text from fileset's file
173
+ def self.extract_text_for(file_set:)
174
+ fm = Hyrax.custom_queries.find_many_file_metadata_by_use(resource: file_set,
175
+ use: Hyrax::FileMetadata::Use.uri_for(use: :extracted_file))
176
+ return if fm.empty?
177
+ text_fm = fm.find { |t| t.mime_type == Marcel::MimeType.for(extension: 'txt') }
178
+ return if text_fm.nil?
179
+ text_fm.content
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,118 @@
1
+ module IiifPrint
2
+ ##
3
+ # The PersistenceLayer module provides the namespace for other adapters:
4
+ #
5
+ # - {IiifPrint::PersistenceLayer::ActiveFedoraAdapter}
6
+ # - {IiifPrint::PersistenceLayer::ValkyrieAdapter}
7
+ #
8
+ # And the defining interface in the {IiifPrint::PersistenceLayer::AbstractAdapter}
9
+ module PersistenceLayer
10
+ # @abstract
11
+ class AbstractAdapter
12
+ ##
13
+ # @param object [Object]
14
+ # @return [Array<Object>]
15
+ def self.object_in_works(object)
16
+ raise NotImplementedError, "#{self}.{__method__}"
17
+ end
18
+
19
+ ##
20
+ # @param object [Object]
21
+ # @return [Array<Object>]
22
+ def self.object_ordered_works(object)
23
+ raise NotImplementedError, "#{self}.{__method__}"
24
+ end
25
+
26
+ ##
27
+ # @param work_type [Class]
28
+ # @return the corresponding indexer for the work_type
29
+ def self.decorate_with_adapter_logic(work_type:)
30
+ raise NotImplementedError, "#{self}.{__method__}"
31
+ end
32
+
33
+ ##
34
+ # @param work_type [Class]
35
+ # @return the corresponding indexer for the work_type
36
+ def self.decorate_form_with_adapter_logic(work_type:)
37
+ raise NotImplementedError, "#{self}.{__method__}"
38
+ end
39
+
40
+ ##
41
+ # @param file_set [Object]
42
+ # @param work [Object]
43
+ # @param model [Class] The class name for which we'll split children.
44
+ def self.destroy_children_split_from(file_set:, work:, model:)
45
+ raise NotImplementedError, "#{self}.{__method__}"
46
+ end
47
+
48
+ ##
49
+ # @abstract
50
+ def self.parent_for(*)
51
+ raise NotImplementedError, "#{self}.{__method__}"
52
+ end
53
+
54
+ ##
55
+ # @abstract
56
+ def self.grandparent_for(*)
57
+ raise NotImplementedError, "#{self}.{__method__}"
58
+ end
59
+
60
+ ##
61
+ # @abstract
62
+ def self.solr_field_query(*)
63
+ raise NotImplementedError, "#{self}.{__method__}"
64
+ end
65
+
66
+ ##
67
+ # @abstract
68
+ def self.clean_for_tests!
69
+ return false unless Rails.env.test?
70
+ yield
71
+ end
72
+
73
+ ##
74
+ # @abstract
75
+ def self.solr_query(*args)
76
+ raise NotImplementedError, "#{self}.{__method__}"
77
+ end
78
+
79
+ ##
80
+ # @abstract
81
+ def self.solr_name(*args)
82
+ raise NotImplementedError, "#{self}.{__method__}"
83
+ end
84
+
85
+ def self.pdf?(_file_set)
86
+ raise NotImplementedError, "#{self}.{__method__}"
87
+ end
88
+
89
+ def self.create_relationship_between(child_record:, parent_record:)
90
+ raise NotImplementedError, "#{self}.{__method__}"
91
+ end
92
+
93
+ def self.find_by_title_for(title:, model:)
94
+ raise NotImplementedError, "#{self}.{__method__}"
95
+ end
96
+
97
+ def self.find_by(id:)
98
+ raise NotImplementedError, "#{self}.{__method__}"
99
+ end
100
+
101
+ def self.save(object:)
102
+ raise NotImplementedError, "#{self}.{__method__}"
103
+ end
104
+
105
+ def self.index_works(objects:)
106
+ raise NotImplementedError, "#{self}.{__method__}"
107
+ end
108
+
109
+ def self.copy_derivatives_from_data_store(stream:, directives:)
110
+ raise NotImplementedError, "#{self}.{__method__}"
111
+ end
112
+
113
+ def self.extract_text_for(file_set:)
114
+ raise NotImplementedError, "#{self}.{__method__}"
115
+ end
116
+ end
117
+ end
118
+ end