iiif_print 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +102 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
  19. data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
  20. data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
  21. data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
  22. data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  23. data/app/listeners/iiif_print/listener.rb +31 -0
  24. data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
  25. data/app/models/concerns/iiif_print/solr/document.rb +19 -3
  26. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  27. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  28. data/app/models/iiif_print/pending_relationship.rb +3 -0
  29. data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
  30. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  31. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  32. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
  33. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  34. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  35. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  36. data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
  37. data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
  38. data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
  39. data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
  40. data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
  41. data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
  42. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  43. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  44. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  45. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  46. data/config/initializers/simple_schema_loader.rb +1 -0
  47. data/config/locales/iiif_print.en.yml +4 -0
  48. data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
  49. data/config/routes.rb +3 -0
  50. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
  51. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
  52. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
  53. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  54. data/docker-compose.yml +2 -2
  55. data/iiif_print.gemspec +11 -10
  56. data/lib/generators/iiif_print/install_generator.rb +21 -1
  57. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  58. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  59. data/lib/iiif_print/base_derivative_service.rb +14 -2
  60. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
  61. data/lib/iiif_print/catalog_search_builder.rb +7 -3
  62. data/lib/iiif_print/configuration.rb +205 -8
  63. data/lib/iiif_print/data/fileset_helper.rb +3 -3
  64. data/lib/iiif_print/data/work_derivatives.rb +4 -4
  65. data/lib/iiif_print/engine.rb +53 -15
  66. data/lib/iiif_print/errors.rb +18 -0
  67. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  68. data/lib/iiif_print/image_tool.rb +12 -8
  69. data/lib/iiif_print/jp2_derivative_service.rb +4 -1
  70. data/lib/iiif_print/lineage_service.rb +47 -13
  71. data/lib/iiif_print/metadata.rb +67 -48
  72. data/lib/iiif_print/pdf_derivative_service.rb +3 -1
  73. data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
  74. data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
  75. data/lib/iiif_print/persistence_layer.rb +118 -0
  76. data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
  77. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
  78. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  79. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
  80. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  81. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  82. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  83. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  84. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  85. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  86. data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
  87. data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
  88. data/lib/iiif_print/tiff_derivative_service.rb +3 -1
  89. data/lib/iiif_print/version.rb +1 -1
  90. data/lib/iiif_print.rb +210 -20
  91. data/lib/samvera/derivatives/configuration.rb +83 -0
  92. data/lib/samvera/derivatives/hyrax.rb +129 -0
  93. data/lib/samvera/derivatives.rb +238 -0
  94. data/tasks/copy_authorities_to_test_app.rake +11 -0
  95. data/tasks/iiif_print_dev.rake +4 -4
  96. metadata +111 -196
  97. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  98. data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
  99. data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
  100. data/bin/rails +0 -13
  101. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
  102. data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
  103. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  104. data/spec/.keep.txt +0 -1
  105. data/spec/factories/ability.rb +0 -6
  106. data/spec/factories/newspaper_issue.rb +0 -7
  107. data/spec/factories/newspaper_page.rb +0 -7
  108. data/spec/factories/newspaper_page_solr_document.rb +0 -12
  109. data/spec/factories/newspaper_title.rb +0 -8
  110. data/spec/factories/uploaded_pdf_file.rb +0 -9
  111. data/spec/factories/uploaded_txt_file.rb +0 -9
  112. data/spec/factories/user.rb +0 -13
  113. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  114. data/spec/fixtures/files/4.1.07.tiff +0 -0
  115. data/spec/fixtures/files/README.md +0 -7
  116. data/spec/fixtures/files/alto-2-0.xsd +0 -714
  117. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  118. data/spec/fixtures/files/credits.md +0 -16
  119. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  120. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  121. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  122. data/spec/fixtures/files/minimal-alto.xml +0 -31
  123. data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
  124. data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
  125. data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
  126. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  127. data/spec/fixtures/files/ocr_alto.xml +0 -202
  128. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
  129. data/spec/fixtures/files/ocr_color.tiff +0 -0
  130. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  131. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  132. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  133. data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
  134. data/spec/fixtures/files/page1.tiff +0 -0
  135. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  136. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  137. data/spec/fixtures/files/thumbnail.jpg +0 -0
  138. data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
  139. data/spec/helpers/iiif_print_helper_spec.rb +0 -43
  140. data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
  141. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
  142. data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
  143. data/spec/iiif_print/configuration_spec.rb +0 -67
  144. data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
  145. data/spec/iiif_print/data/work_file_spec.rb +0 -99
  146. data/spec/iiif_print/data/work_files_spec.rb +0 -237
  147. data/spec/iiif_print/image_tool_spec.rb +0 -109
  148. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
  149. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
  150. data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
  151. data/spec/iiif_print/lineage_service_spec.rb +0 -13
  152. data/spec/iiif_print/metadata_spec.rb +0 -115
  153. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
  154. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
  155. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
  156. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
  157. data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
  158. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
  159. data/spec/iiif_print_spec.rb +0 -51
  160. data/spec/misc_shared.rb +0 -111
  161. data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
  162. data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
  163. data/spec/models/solr_document_spec.rb +0 -14
  164. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
  165. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
  166. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
  167. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
  168. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
  169. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
  170. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
  171. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
  172. data/spec/spec_helper.rb +0 -181
  173. data/spec/support/controller_level_helpers.rb +0 -28
  174. data/spec/support/iiif_print_models.rb +0 -127
  175. data/spec/test_app_templates/blacklight.yml +0 -9
  176. data/spec/test_app_templates/fedora.yml +0 -15
  177. data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
  178. data/spec/test_app_templates/redis.yml +0 -9
  179. data/spec/test_app_templates/solr/conf/schema.xml +0 -362
  180. data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
  181. data/spec/test_app_templates/solr.yml +0 -7
@@ -1,7 +1,38 @@
1
1
  module IiifPrint
2
+ # rubocop:disable Metrics/ClassLength
2
3
  class Configuration
3
4
  attr_writer :after_create_fileset_handler
4
5
 
6
+ attr_writer :ingest_queue_name
7
+ ##
8
+ # @return [Symbol, Proc]
9
+ def ingest_queue_name
10
+ return @ingest_queue_name if @ingest_queue_name.present?
11
+ if defined?(Hyrax)
12
+ Hyrax.config.ingest_queue_name
13
+ elsif defined?(Bulkrax) && Bulkrax.config.respond_to?(:ingest_queue_name)
14
+ Bulkrax.config.ingest_queue_name
15
+ else
16
+ :ingest
17
+ end
18
+ end
19
+
20
+ attr_writer :persistence_adapter
21
+ def persistence_adapter
22
+ @persistence_adapter || default_persistence_adapter
23
+ end
24
+
25
+ def default_persistence_adapter
26
+ # There's probably some configuration of Hyrax we could use to better refine this; but it's
27
+ # likely a reasonable guess. The main goal is to not break existing implementations and
28
+ # maintain an upgrade path.
29
+ if Gem::Version.new(Hyrax::VERSION) >= Gem::Version.new('6.0.0')
30
+ IiifPrint::PersistenceLayer::ValkyrieAdapter
31
+ else
32
+ IiifPrint::PersistenceLayer::ActiveFedoraAdapter
33
+ end
34
+ end
35
+
5
36
  # @param file_set [FileSet]
6
37
  # @param user [User]
7
38
  def handle_after_create_fileset(file_set, user)
@@ -12,6 +43,17 @@ module IiifPrint
12
43
  end
13
44
  end
14
45
 
46
+ attr_writer :ancestory_identifier_function
47
+ # The function, with arity 1, that receives a work and returns it's identifier (as a string) for
48
+ # the purposes of object ancestry.
49
+ #
50
+ # @return [Proc]
51
+ def ancestory_identifier_function
52
+ # If the work.id is nil, keep it nil. Otherwise cast that id to a string; to deal with the
53
+ # `Valkyrie::ID`.
54
+ @ancestory_identifier_function ||= ->(work) { work.id&.to_s }
55
+ end
56
+
15
57
  attr_writer :excluded_model_name_solr_field_values
16
58
  # By default, this uses an array of human readable types
17
59
  # ex: ['Generic Work', 'Image']
@@ -21,6 +63,45 @@ module IiifPrint
21
63
  @excluded_model_name_solr_field_values = []
22
64
  end
23
65
 
66
+ def skip_splitting_pdf_files_that_end_with_these_texts=(values)
67
+ @skip_splitting_pdf_files_that_end_with_these_texts = Array.wrap(values).map(&:downcase)
68
+ end
69
+
70
+ ##
71
+ # @return [Array<String>] the file suffixes (e.g. [".reader.pdf"]) that we will skip. Per
72
+ # the implementation of {.split_for_path_suffix?}, these values are cast to
73
+ # downcase.
74
+ def skip_splitting_pdf_files_that_end_with_these_texts
75
+ @skip_splitting_pdf_files_that_end_with_these_texts || []
76
+ end
77
+
78
+ attr_writer :unique_child_title_generator_function
79
+
80
+ # The function, with keywords (though maybe you'll want to splat ignore a few), is responsible
81
+ # for generating the child work file title. of object ancestry.
82
+ #
83
+ # The keyword parameters that will be passed to this function are:
84
+ #
85
+ # :original_pdf_path - The fully qualified pathname to the original PDF from which the images
86
+ # were split.
87
+ # :image_path - The fully qualified pathname for an image of the single page from the PDF.
88
+ # :parent_work - The object in which we're "attaching" the image.
89
+ # :page_number - The image is of the N-th page_number of the original PDF
90
+ # :page_padding - A helper number that indicates the number of significant digits of pages
91
+ # (e.g. 150 pages would have a padding of 3).
92
+ #
93
+ # @return [Proc]
94
+ # rubocop:disable Lint/UnusedBlockArgument
95
+ def unique_child_title_generator_function
96
+ @unique_child_title_generator_function ||= lambda { |original_pdf_path:, image_path:, parent_work:, page_number:, page_padding:|
97
+ identifier = parent_work.id
98
+ filename = File.basename(original_pdf_path)
99
+ page_suffix = "Page #{(page_number.to_i + 1).to_s.rjust(page_padding.to_i, '0')}"
100
+ "#{identifier} - #{filename} #{page_suffix}"
101
+ }
102
+ end
103
+ # rubocop:enable Lint/UnusedBlockArgument
104
+
24
105
  # This method wraps Hyrax's configuration so we can sniff out the correct method to use. The
25
106
  # {Hyrax::Configuration#whitelisted_ingest_dirs} is deprecated in favor of
26
107
  # {Hyrax::Configuration#registered_ingest_dirs}.
@@ -44,7 +125,7 @@ module IiifPrint
44
125
 
45
126
  attr_writer :default_iiif_manifest_version
46
127
  def default_iiif_manifest_version
47
- @default_iiif_manifest_version || 2
128
+ @default_iiif_manifest_version.presence || 2
48
129
  end
49
130
 
50
131
  attr_writer :metadata_fields
@@ -81,19 +162,135 @@ module IiifPrint
81
162
  end
82
163
  # rubocop:enable Metrics/MethodLength
83
164
 
165
+ attr_writer :additional_tesseract_options
166
+ ##
167
+ # The additional options to pass to the Tesseract configuration
168
+ #
169
+ # @see https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
170
+ # @return [String]
171
+ def additional_tesseract_options
172
+ @additional_tesseract_options || ""
173
+ end
174
+
175
+ attr_writer :uv_config_path
176
+ ##
177
+ # According to https://github.com/samvera/hyrax/wiki/Hyrax-Management-Guide#universal-viewer-config
178
+ # the name of the UV config file should be /uv/uv_config.json (with an _)
179
+ # However, in most applications, it is /uv/uv-config.json (with a -)
180
+ def uv_config_path
181
+ @uv_config_path || "/uv/uv-config.json"
182
+ end
183
+
184
+ attr_writer :uv_base_path
185
+ ##
186
+ # While we're at it, we're going to go ahead and make the base path configurable as well
187
+ def uv_base_path
188
+ @uv_base_path || "/uv/uv.html"
189
+ end
190
+
191
+ attr_writer :child_work_attributes_function
192
+ ##
193
+ # Here we allow for customization of the child work attributes
194
+ # rubocop:disable Metrics/MethodLength, Metrics/BlockLength
195
+ def child_work_attributes_function
196
+ @child_work_attributes_function ||= lambda do |parent_work:, admin_set_id:|
197
+ embargo = parent_work.embargo
198
+ lease = parent_work.lease
199
+ embargo_params = {}
200
+ lease_params = {}
201
+ visibility_params = {}
202
+
203
+ if embargo
204
+ embargo_params = {
205
+ visibility: 'embargo',
206
+ visibility_after_embargo: embargo.visibility_after_embargo,
207
+ visibility_during_embargo: embargo.visibility_during_embargo,
208
+ embargo_release_date: embargo.embargo_release_date
209
+ }
210
+ elsif lease
211
+ lease_params = {
212
+ visibility: 'lease',
213
+ visibility_after_lease: lease.visibility_after_lease,
214
+ visibility_during_lease: lease.visibility_during_lease,
215
+ lease_release_date: lease.lease_release_date
216
+ }
217
+ else
218
+ visibility_params = { visibility: parent_work.visibility.to_s }
219
+ end
220
+
221
+ params = {
222
+ admin_set_id: admin_set_id.to_s,
223
+ creator: parent_work.creator.to_a,
224
+ rights_statement: parent_work.rights_statement.to_a,
225
+ is_child: true
226
+ }
227
+
228
+ params.merge!(embargo_params).merge!(lease_params).merge!(visibility_params)
229
+ end
230
+ end
231
+ # rubocop:enable Metrics/MethodLength, Metrics/BlockLength
232
+
84
233
  attr_writer :sort_iiif_manifest_canvases_by
234
+ ##
235
+ # Normally, the canvases are sorted by the `ordered_members` association.
236
+ # However, if you want it to be sorted by another property, you can set this
237
+ # configuration. Change `nil` to something like `:title` or `:identifier`.
238
+ #
239
+ # Should you want to sort by the filename of the image, you
240
+ # set `nil` to `:label`. This looks at the canvas label, which is typically set
241
+ # to the filename of the image.
85
242
  def sort_iiif_manifest_canvases_by
86
- @sort_iiif_manifest_canvases_by || :title
243
+ @sort_iiif_manifest_canvases_by || nil
87
244
  end
88
245
 
89
- attr_writer :additional_tessearct_options
246
+ attr_writer :ocr_coords_from_json_function
90
247
  ##
91
- # The additional options to pass to the Tesseract configuration
248
+ # This is used to determine where to pull the OCR coordinates from. By default, it will
249
+ # pull from the JSON file that is generated by the OCR engine. However, if you have a
250
+ # different source, you can set this configuration. Current implementation has access to
251
+ # the `file_set_id`` and the `document` [SolrDocument].
92
252
  #
93
- # @see https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
94
- # @return [String]
95
- def additional_tessearct_options
96
- @additional_tessearct_options || ""
253
+ # @see IiifPrint::BlacklightIiifSearch::AnnotationDecorator#fetch_and_parse_coords
254
+ def ocr_coords_from_json_function
255
+ @ocr_coords_from_json_function ||= lambda do |file_set_id:, **|
256
+ IiifPrint::Data::WorkDerivatives.data(from: file_set_id, of_type: 'json')
257
+ end
258
+ end
259
+
260
+ attr_writer :all_text_generator_function
261
+ ##
262
+ # This configuration determines where to pull the full text from. By default, it will
263
+ # pull from the TXT file that is generated by the OCR engine. However, if your
264
+ # application has its own implementation of generating the full text, then you can
265
+ # set your own configuration here.
266
+ def all_text_generator_function
267
+ @all_text_generator_function ||= lambda do |object:|
268
+ IiifPrint::Data::WorkDerivatives.data(from: object, of_type: 'txt')
269
+ end
270
+ end
271
+
272
+ attr_writer :iiif_metadata_field_presentation_order
273
+ ##
274
+ # This is the default sorter for the metadata. It will sort by the order of the keys specificied.
275
+ # By default, this is turned off as it returns nil. If you want to turn it on, you can set this
276
+ # this to an array of symbols the properties on the work.
277
+ #
278
+ # @example [:title, :description, :date_created]
279
+ # @return [Array<Symbol>]
280
+ def iiif_metadata_field_presentation_order
281
+ @iiif_metadata_field_presentation_order || nil
282
+ end
283
+
284
+ def questioning_authority_fields=(fields)
285
+ @questioning_authority_fields = Array.wrap(fields).map(&:to_s)
286
+ end
287
+
288
+ ##
289
+ # This is used to explicitly set which fields should be rendered as a Questioning Authority in the UV.
290
+ # By default, we render `rights_statement` and `license` as QA fields.
291
+ def questioning_authority_fields
292
+ @questioning_authority_fields ||= ['rights_statement', 'license']
97
293
  end
98
294
  end
295
+ # rubocop:enable Metrics/ModuleLength
99
296
  end
@@ -7,7 +7,7 @@ module IiifPrint
7
7
  # if context is itself a string, presume it is a file set id
8
8
  return @work if @work.is_a? String
9
9
  # if context is not a String, presume a work or fileset context:
10
- fileset.nil? ? nil : fileset.id
10
+ fileset&.id
11
11
  end
12
12
 
13
13
  def first_fileset
@@ -15,9 +15,9 @@ module IiifPrint
15
15
  # get the fileset from that id
16
16
  return FileSet.find(@work) if @work.is_a?(String)
17
17
  # if "work" context is a FileSet, not actual work, return it
18
- return @work if @work.is_a? FileSet
18
+ return @work if @work.is_a?(Hyrax::FileSet) || @work.is_a?(FileSet)
19
19
  # in most cases, get from work's members:
20
- filesets = @work.members.select { |m| m.is_a? FileSet }
20
+ filesets = @work.members.select { |m| m.is_a?(Hyrax::FileSet) || m.is_a?(FileSet) }
21
21
  filesets.empty? ? nil : filesets[0]
22
22
  end
23
23
  end
@@ -42,16 +42,16 @@ module IiifPrint
42
42
  #
43
43
  # @return [String]
44
44
  def self.data(from:, of_type:)
45
- new(from).data(of_type)
45
+ new(work: from).data(of_type)
46
46
  end
47
47
 
48
48
  # alternate constructor spelling:
49
49
  def self.of(work, fileset = nil, parent = nil)
50
- new(work, fileset, parent)
50
+ new(work: work, fileset: fileset, parent: parent)
51
51
  end
52
52
 
53
53
  # Adapt work and either specific or first fileset
54
- def initialize(work, fileset = nil, parent = nil)
54
+ def initialize(work: nil, fileset: nil, parent: nil)
55
55
  # adapted context usually work, may be string id of FileSet
56
56
  @work = work
57
57
  @fileset = fileset.nil? ? first_fileset : fileset
@@ -239,7 +239,7 @@ module IiifPrint
239
239
  # of the first assigned file path for single-file work.
240
240
  work_file = parent
241
241
  return if work_file.nil?
242
- work_files = work_file.parent
242
+ work_files = IiifPrint.parent_for(work_file)
243
243
  return if work_files.nil?
244
244
  work_files.assigned[0]
245
245
  else
@@ -1,6 +1,7 @@
1
1
  require 'active_fedora'
2
2
  require 'hyrax'
3
3
  require 'blacklight_iiif_search'
4
+ require 'derivative_rodeo'
4
5
 
5
6
  module IiifPrint
6
7
  # module constants:
@@ -10,8 +11,18 @@ module IiifPrint
10
11
  class Engine < ::Rails::Engine
11
12
  isolate_namespace IiifPrint
12
13
 
14
+ config.eager_load_paths += %W[#{config.root}/app/transactions]
15
+
16
+ initializer 'requires' do
17
+ require 'hyrax/transactions/iiif_print_container_decorator'
18
+ require 'iiif_print/persistence_layer'
19
+ require 'iiif_print/persistence_layer/active_fedora_adapter' if defined?(ActiveFedora)
20
+ require 'iiif_print/persistence_layer/valkyrie_adapter' if defined?(Valkyrie)
21
+ end
22
+
13
23
  # rubocop:disable Metrics/BlockLength
14
24
  config.to_prepare do
25
+ require "iiif_print/jobs/create_relationships_job"
15
26
  # We don't have a hard requirement of Bullkrax but in our experience, lingering on earlier
16
27
  # versions can introduce bugs of both Bulkrax and some of the assumptions that we've resolved.
17
28
  # Very early versions of Bulkrax do not have VERSION defined
@@ -35,32 +46,57 @@ module IiifPrint
35
46
  IiifPrint::PluggableDerivativeService
36
47
  )
37
48
 
49
+ Hyrax.publisher.subscribe(IiifPrint::Listener.new) if Hyrax.respond_to?(:publisher)
50
+
38
51
  Hyrax::IiifManifestPresenter.prepend(IiifPrint::IiifManifestPresenterBehavior)
39
52
  Hyrax::IiifManifestPresenter::Factory.prepend(IiifPrint::IiifManifestPresenterFactoryBehavior)
40
53
  Hyrax::ManifestBuilderService.prepend(IiifPrint::ManifestBuilderServiceBehavior)
41
54
  Hyrax::Renderers::FacetedAttributeRenderer.prepend(Hyrax::Renderers::FacetedAttributeRendererDecorator)
42
55
  Hyrax::WorksControllerBehavior.prepend(IiifPrint::WorksControllerBehaviorDecorator)
56
+ "Hyrax::Transactions::Steps::DeleteAllFileSets".safe_constantize&.prepend(Hyrax::Transactions::Steps::DeleteAllFileSetsDecorator)
57
+ # Hyku::WorksControllerBehavior was introduced in Hyku v6.0.0+. Yes we don't depend on Hyku,
58
+ # but this allows us to do minimal Hyku antics with IiifPrint.
59
+ 'Hyku::WorksControllerBehavior'.safe_constantize&.prepend(IiifPrint::WorksControllerBehaviorDecorator)
60
+
61
+ Hyrax::FileSetPresenter.prepend(IiifPrint::FileSetPresenterDecorator)
43
62
  Hyrax::WorkShowPresenter.prepend(IiifPrint::WorkShowPresenterDecorator)
63
+ Hyrax::IiifHelper.prepend(IiifPrint::IiifHelperDecorator)
44
64
 
45
- IiifPrint::ChildIndexer.decorate_work_types!
46
- IiifPrint::FileSetIndexer.decorate(Hyrax::FileSetIndexer)
65
+ if ActiveModel::Type::Boolean.new.cast(ENV.fetch('HYRAX_VALKYRIE', false))
66
+ # Newer versions of Hyrax favor `Hyrax::Indexers::FileSetIndexer` and deprecate
67
+ # `Hyrax::ValkyrieFileSetIndexer`.
68
+ 'Hyrax::Indexers::FileSetIndexer'.safe_constantize&.prepend(IiifPrint::FileSetIndexer)
47
69
 
48
- ::BlacklightIiifSearch::IiifSearchResponse.prepend(IiifPrint::IiifSearchResponseDecorator)
49
- ::BlacklightIiifSearch::IiifSearchAnnotation.prepend(IiifPrint::BlacklightIiifSearch::AnnotationDecorator)
50
- Hyrax::Actors::FileSetActor.prepend(IiifPrint::Actors::FileSetActorDecorator)
70
+ # Versions 3.0+ of Hyrax have `Hyrax::ValkyrieFileSetIndexer` so we want to decorate that as
71
+ # well. We want to use the elsif construct because later on Hyrax::ValkyrieFileSetIndexer
72
+ # inherits from Hyrax::Indexers::FileSetIndexer and only implements:
73
+ # `def initialize(*args); super; end`
74
+ 'Hyrax::ValkyrieFileSetIndexer'.safe_constantize&.prepend(IiifPrint::FileSetIndexer)
51
75
 
52
- # Extending the presenter to the base url which includes the protocol.
53
- # We need the base url to render the facet links and normalize the interface.
54
- Hyrax::IiifManifestPresenter.send(:attr_accessor, :base_url)
55
- Hyrax::IiifManifestPresenter::DisplayImagePresenter.send(:attr_accessor, :base_url)
56
- # Extending this class because there is an #ability= but not #ability and this definition
57
- # mirrors the Hyrax::IiifManifestPresenter#ability.
58
- module Hyrax::IiifManifestPresenter::DisplayImagePresenterDecorator
59
- def ability
60
- @ability ||= NullAbility.new
76
+ # Newer versions of Hyrax favor `Hyrax::Indexers::PcdmObjectIndexer` and deprecate
77
+ # `Hyrax::ValkyrieWorkIndexer`
78
+ indexers = Hyrax.config.curation_concerns.map do |concern|
79
+ "#{concern}ResourceIndexer".safe_constantize
61
80
  end
81
+ indexers.each { |indexer| indexer.prepend(IiifPrint::ChildWorkIndexer) }
82
+
83
+ # Versions 3.0+ of Hyrax have `Hyrax::ValkyrieWorkIndexer` so we want to decorate that as
84
+ # well. We want to use the elsif construct because later on Hyrax::ValkyrieWorkIndexer
85
+ # inherits from Hyrax::Indexers::PcdmObjectIndexer and only implements:
86
+ # `def initialize(*args); super; end`
87
+ 'Hyrax::ValkyrieWorkIndexer'.safe_constantize&.prepend(IiifPrint::ChildWorkIndexer)
88
+ else
89
+ # The ActiveFedora::Base indexer for FileSets
90
+ Hyrax::FileSetIndexer.prepend(IiifPrint::FileSetIndexer)
91
+ # The ActiveFedora::Base indexer for Works
92
+ Hyrax::WorkIndexer.prepend(IiifPrint::ChildWorkIndexer)
62
93
  end
63
- Hyrax::IiifManifestPresenter::DisplayImagePresenter.prepend(Hyrax::IiifManifestPresenter::DisplayImagePresenterDecorator)
94
+
95
+ ::BlacklightIiifSearch::IiifSearchResponse.prepend(IiifPrint::IiifSearchResponseDecorator)
96
+ ::BlacklightIiifSearch::IiifSearchAnnotation.prepend(IiifPrint::BlacklightIiifSearch::AnnotationDecorator)
97
+ ::BlacklightIiifSearch::IiifSearch.prepend(IiifPrint::IiifSearchDecorator)
98
+ Hyrax::Actors::FileSetActor.prepend(IiifPrint::Actors::FileSetActorDecorator)
99
+ Hyrax::Actors::CleanupFileSetsActor.prepend(IiifPrint::Actors::CleanupFileSetsActorDecorator)
64
100
 
65
101
  Hyrax.config do |config|
66
102
  config.callback.set(:after_create_fileset) do |file_set, user|
@@ -71,6 +107,8 @@ module IiifPrint
71
107
 
72
108
  config.after_initialize do
73
109
  IiifPrint::Solr::Document.decorate(SolrDocument)
110
+ Hyrax::IiifManifestPresenter::DisplayImagePresenter
111
+ .prepend(IiifPrint::IiifManifestPresenterBehavior::DisplayImagePresenterBehavior)
74
112
  end
75
113
  # rubocop:enable Metrics/BlockLength
76
114
  end
@@ -6,4 +6,22 @@ module IiifPrint
6
6
  # Data transformation or read-error:
7
7
  class DataError < IiifPrintError
8
8
  end
9
+
10
+ class MissingFileError < IiifPrintError
11
+ end
12
+
13
+ class WorkNotConfiguredToSplitFileSetError < IiifPrintError
14
+ def initialize(file_set:, work:)
15
+ message = "Expected that we would be splitting #{file_set.class} ID=#{file_set&.id} #to_param=#{file_set&.to_param} " \
16
+ "for work #{work.class} ID=#{work&.id} #to_param=#{work&.to_param}. " \
17
+ "However it was not configured for PDF splitting."
18
+ super(message)
19
+ end
20
+ end
21
+
22
+ class UnexpectedMimeTypeError < IiifPrintError
23
+ def initialize(file_set:, mime_type:)
24
+ super "Unexpected mime_type #{mime_type} for #{file_set.class} ID=#{file_set.id.inspect}"
25
+ end
26
+ end
9
27
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Overrides Hyrax to add show_parents_only to processor chain
4
+ module IiifPrint
5
+ class HomepageSearchBuilder < Hyrax::HomepageSearchBuilder
6
+ self.default_processor_chain += [:show_parents_only]
7
+
8
+ def show_parents_only(solr_parameters)
9
+ query = if blacklight_params["include_child_works"] == 'true'
10
+ IiifPrint.solr_construct_query(is_child_bsi: 'true')
11
+ else
12
+ IiifPrint.solr_construct_query(is_child_bsi: nil)
13
+ end
14
+ solr_parameters[:fq] += [query]
15
+ end
16
+ end
17
+ end
@@ -3,11 +3,10 @@ require 'tmpdir'
3
3
 
4
4
  module IiifPrint
5
5
  class ImageTool
6
- attr_accessor :path, :ftype
6
+ attr_accessor :path
7
7
 
8
8
  def initialize(path)
9
9
  @path = path
10
- @ftype = magic
11
10
  @metadata = nil
12
11
  end
13
12
 
@@ -60,7 +59,7 @@ module IiifPrint
60
59
  end
61
60
 
62
61
  def im_line_select(lines, key)
63
- line = lines.find { |l| l.scrub.downcase.strip.start_with?(key) }
62
+ line = lines.find { |l| l.scrub.downcase.strip.start_with?(key.downcase) }
64
63
  # Given "key: value" line, return the value as String stripped of
65
64
  # leading and trailing whitespace
66
65
  return line if line.nil?
@@ -75,20 +74,25 @@ module IiifPrint
75
74
 
76
75
  # @return [Array<String>] lines of output from imagemagick `identify`
77
76
  def im_identify
78
- cmd = "identify -verbose #{path}"
77
+ cmd = "identify -format 'Geometry: %G\nDepth: %[bit-depth]\nColorspace: %[colorspace]\nAlpha: %A\nMIME type: %m\n' #{path}"
79
78
  `#{cmd}`.lines
80
79
  end
81
80
 
82
81
  def im_mime(lines)
83
82
  return 'application/pdf' if pdf? # workaround older imagemagick bug
84
- im_line_select(lines, 'mime type')
83
+
84
+ format = im_line_select(lines, 'mime type')
85
+ return if format.blank?
86
+
87
+ # `identify -format` with the `%m` switch only gives the format, we are coercing it into an image mime type
88
+ Mime::Type.lookup_by_extension(format.downcase).to_s
85
89
  end
86
90
 
87
91
  def populate_im_color!(lines, result)
88
92
  bpc = im_line_select(lines, 'depth').split('-')[0].to_i # '1-bit' -> 1
89
93
  colorspace = im_line_select(lines, 'colorspace')
90
94
  color = colorspace == 'Gray' ? 'gray' : 'color'
91
- has_alpha = !im_line_select(lines, 'Alpha').nil?
95
+ has_alpha = !im_line_select(lines, 'alpha') == 'Undefined'
92
96
  result[:num_components] = (color == 'gray' ? 1 : 3) + (has_alpha ? 1 : 0)
93
97
  result[:color] = bpc == 1 ? 'monochrome' : color
94
98
  result[:bits_per_component] = bpc
@@ -105,11 +109,11 @@ module IiifPrint
105
109
  end
106
110
 
107
111
  def magic
108
- File.read(@path, 23, 0)
112
+ @magic ||= File.read(@path, 23, 0)
109
113
  end
110
114
 
111
115
  def jp2?
112
- @ftype.end_with?('ftypjp2')
116
+ magic.end_with?('ftypjp2')
113
117
  end
114
118
 
115
119
  def pdf?
@@ -53,7 +53,10 @@ module IiifPrint
53
53
  render_cmd = opj_command
54
54
 
55
55
  # Run the generated command to make derivative file at @dest_path
56
- `#{render_cmd}`
56
+ data = `#{render_cmd}`
57
+
58
+ # Create Hyrax::FileMetadata object for the derivatives (if Valkyrie)
59
+ IiifPrint.copy_derivatives_from_data_store(stream: data, directives: { url: file_set.id.to_s, container: 'service_file', mime_type: mime_type_for(target_extension) })
57
60
 
58
61
  # Clean up any intermediate files or symlinks used during creation
59
62
  cleanup_intermediate
@@ -2,7 +2,8 @@ module IiifPrint
2
2
  # The purpose of this module is to encode lineage related services:
3
3
  #
4
4
  # - {.ancestor_ids_for}
5
- # - {.descendent_file_set_ids_for}
5
+ # - {.descendent_member_ids_for}
6
+ # - {.ancestor_identifier_for}
6
7
  #
7
8
  # The ancestor and descendent_file_sets are useful for ensuring we index together related items.
8
9
  # For example, when I have a work that is a book, and one file set per page of that book, when I
@@ -15,27 +16,60 @@ module IiifPrint
15
16
  #
16
17
  # @param object [#in_works] An object that responds to #in_works
17
18
  # @return [Array<String>]
19
+ #
20
+ # @note For those implementing their own lineage service, verify that you are not returning
21
+ # an array of
18
22
  def self.ancestor_ids_for(object)
19
23
  ancestor_ids ||= []
20
- object.in_works.each do |work|
21
- ancestor_ids << work.id
22
- ancestor_ids += ancestor_ids_for(work) if work.is_child
24
+ # Yes, we're fetching the works, then compressing those into identifiers. Because in the case
25
+ # of slugs, we need not the identifier, but the slug as the id.
26
+ IiifPrint.object_in_works(object).each do |work|
27
+ ancestor_ids << ancestry_identifier_for(work)
28
+ ancestor_ids += ancestor_ids_for(work) if work.respond_to?(:is_child) && work.is_child
23
29
  end
24
- ancestor_ids.flatten.compact.uniq
30
+ # We must convert these to strings as Valkyrie's identifiers will be cast to hashes when we
31
+ # attempt to write the SolrDocument. Also, per documentation we return an Array of strings, not
32
+ # an Array that might include Valkyrie::ID objects.
33
+ ancestor_ids.flatten.compact.uniq.map(&:to_s)
34
+ end
35
+
36
+ ##
37
+ # @api public
38
+ #
39
+ # Given the :work return it's identifier
40
+ #
41
+ # @param [Object]
42
+ # @return [String]
43
+ def self.ancestry_identifier_for(work)
44
+ IiifPrint.config.ancestory_identifier_function.call(work)
25
45
  end
26
46
 
27
47
  ##
28
48
  # @param object [#ordered_works, #file_sets, #member_ids]
29
- # @return [Array<String>] the ids of associated file sets
30
- def self.descendent_file_set_ids_for(object)
49
+ # @return [Array<String>] the ids of associated file sets and child works
50
+ #
51
+ # @see
52
+ # https://github.com/samvera/hyrax/blob/2b807fe101176d594129ef8a8fe466d3d03a372b/app/indexers/hyrax/work_indexer.rb#L15-L18
53
+ # for "clarification" of the comingling of file_set_ids and member_ids
54
+ def self.descendent_member_ids_for(object)
55
+ return unless object.respond_to?(:member_ids)
56
+
31
57
  # enables us to return parents when searching for child OCR
32
- file_set_ids = object.file_sets.map(&:id)
33
- object.ordered_works&.each do |child|
34
- file_set_ids += descendent_file_set_ids_for(child)
58
+ #
59
+ # https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/models/concerns/work_behavior.rb#L90-L92
60
+ #
61
+ # The Hydara::Works implementation of file_set_ids is "members.select(&:file_set?).map(&:id)";
62
+ # so no sense doing `object.file_set_ids + object.member_ids`
63
+ file_set_ids = object.member_ids
64
+ IiifPrint.object_ordered_works(object)&.each do |child|
65
+ file_set_ids += Array.wrap(descendent_member_ids_for(child))
35
66
  end
36
- # enables us to return parents when searching for child metadata
37
- file_set_ids += object.member_ids
38
- file_set_ids.flatten.uniq.compact
67
+ # We must convert these to strings as Valkyrie's identifiers will be cast to hashes when we
68
+ # attempt to write the SolrDocument.
69
+ file_set_ids.flatten.uniq.compact.map(&:to_s)
70
+ end
71
+ class << self
72
+ alias descendent_file_set_ids_for descendent_member_ids_for
39
73
  end
40
74
  end
41
75
  end