iiif_print 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +98 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
  19. data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
  20. data/app/models/concerns/iiif_print/solr/document.rb +14 -0
  21. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  22. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  23. data/app/models/iiif_print/pending_relationship.rb +3 -0
  24. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  25. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  26. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
  27. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  28. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  29. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  30. data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
  31. data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
  32. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  33. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  34. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  35. data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
  36. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  37. data/config/locales/iiif_print.en.yml +4 -0
  38. data/config/routes.rb +3 -0
  39. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  40. data/docker-compose.yml +2 -2
  41. data/iiif_print.gemspec +10 -9
  42. data/lib/generators/iiif_print/install_generator.rb +21 -1
  43. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  44. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  45. data/lib/iiif_print/base_derivative_service.rb +2 -1
  46. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
  47. data/lib/iiif_print/catalog_search_builder.rb +5 -1
  48. data/lib/iiif_print/configuration.rb +145 -8
  49. data/lib/iiif_print/data/fileset_helper.rb +1 -1
  50. data/lib/iiif_print/data/work_derivatives.rb +3 -3
  51. data/lib/iiif_print/engine.rb +7 -13
  52. data/lib/iiif_print/errors.rb +18 -0
  53. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  54. data/lib/iiif_print/image_tool.rb +12 -8
  55. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
  56. data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
  57. data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  58. data/lib/iiif_print/lineage_service.rb +29 -8
  59. data/lib/iiif_print/metadata.rb +67 -48
  60. data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
  61. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
  62. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  63. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
  64. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  65. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  66. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  67. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  68. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  69. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  70. data/lib/iiif_print/version.rb +1 -1
  71. data/lib/iiif_print.rb +167 -12
  72. data/lib/samvera/derivatives/configuration.rb +83 -0
  73. data/lib/samvera/derivatives/hyrax.rb +129 -0
  74. data/lib/samvera/derivatives.rb +238 -0
  75. data/spec/factories/newspaper_page_solr_document.rb +9 -1
  76. data/spec/fixtures/authorities/licenses.yml +4 -0
  77. data/spec/fixtures/authorities/rights_statements.yml +4 -0
  78. data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
  79. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
  80. data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
  81. data/spec/iiif_print/configuration_spec.rb +141 -15
  82. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
  83. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
  84. data/spec/iiif_print/lineage_service_spec.rb +1 -1
  85. data/spec/iiif_print/metadata_spec.rb +157 -23
  86. data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
  87. data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
  88. data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
  89. data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
  90. data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
  91. data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
  92. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
  93. data/spec/iiif_print_spec.rb +125 -5
  94. data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
  95. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
  96. data/spec/samvera/derivatives/configuration_spec.rb +41 -0
  97. data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
  98. data/spec/samvera/derivatives_spec.rb +54 -0
  99. data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
  100. data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
  101. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
  102. data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
  103. data/tasks/copy_authorities_to_test_app.rake +11 -0
  104. data/tasks/iiif_print_dev.rake +4 -4
  105. metadata +123 -35
  106. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  107. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  108. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
@@ -7,7 +7,7 @@ RSpec.describe IiifPrint::LineageService do
7
7
  xit 'works'
8
8
  end
9
9
 
10
- describe '.descendent_file_set_ids_for' do
10
+ describe '.descendent_member_ids_for' do
11
11
  xit 'works'
12
12
  end
13
13
  end
@@ -2,16 +2,8 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe IiifPrint::Metadata do
4
4
  let(:base_url) { "https://my.dev.test" }
5
- let(:solr_document) { SolrDocument.new(attributes) }
6
- let(:fields) do
7
- metadata_fields.map do |field|
8
- SampleField.new(
9
- name: field.first,
10
- label: Hyrax::Renderers::AttributeRenderer.new(field, nil).label,
11
- options: field.last
12
- )
13
- end
14
- end
5
+ let(:solr_hit) { SolrHit.new(attributes) }
6
+ let(:fields) { IiifPrint.default_fields(fields: metadata_fields) }
15
7
  let(:metadata_fields) do
16
8
  {
17
9
  title: {},
@@ -20,12 +12,10 @@ RSpec.describe IiifPrint::Metadata do
20
12
  }
21
13
  end
22
14
 
23
- SampleField = Struct.new(:name, :label, :options, keyword_init: true)
24
-
25
15
  describe ".build_metadata_for" do
26
16
  subject(:manifest_metadata) do
27
17
  described_class.build_metadata_for(
28
- work: solr_document,
18
+ work: solr_hit,
29
19
  version: version,
30
20
  fields: fields,
31
21
  current_ability: double(Ability),
@@ -73,7 +63,7 @@ RSpec.describe IiifPrint::Metadata do
73
63
  let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
74
64
 
75
65
  it "adds a link to the faceted search" do
76
- expect(manifest_metadata). to eq [
66
+ expect(manifest_metadata).to eq [
77
67
  { "label" => "Creator",
78
68
  "value" =>
79
69
  ["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
@@ -81,6 +71,34 @@ RSpec.describe IiifPrint::Metadata do
81
71
  end
82
72
  end
83
73
 
74
+ context "with an authority option" do
75
+ context "rights statement" do
76
+ let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
77
+ let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
78
+
79
+ it "renders a link and displays a term" do
80
+ expect(manifest_metadata).to eq [
81
+ { "label" => "Rights statement",
82
+ "value" => ["<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"] }
83
+ ]
84
+ end
85
+ end
86
+
87
+ context "license" do
88
+ let(:metadata_fields) { { license: { render_as: :license } } }
89
+ let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
90
+
91
+ it "renders a link and displays a term" do
92
+ expect(manifest_metadata).to eq [
93
+ { "label" => "License",
94
+ "value" => [
95
+ "<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
96
+ ] }
97
+ ]
98
+ end
99
+ end
100
+ end
101
+
84
102
  context "when the work is apart of a collection" do
85
103
  let(:metadata_fields) { { collection: {} } }
86
104
  let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
@@ -88,6 +106,7 @@ RSpec.describe IiifPrint::Metadata do
88
106
  let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
89
107
 
90
108
  it "renders a link to the collection" do
109
+ allow(SolrDocument).to receive(:find)
91
110
  allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
92
111
  expect(manifest_metadata).to eq [
93
112
  { "label" => "Collection",
@@ -95,20 +114,135 @@ RSpec.describe IiifPrint::Metadata do
95
114
  ]
96
115
  end
97
116
  end
117
+
118
+ context "when the value has an empty string" do
119
+ let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
120
+
121
+ it "does not map the field with an empty string" do
122
+ expect(manifest_metadata.flat_map(&:values)).not_to include([""])
123
+ expect(manifest_metadata).to eq [{ "label" => "Title", "value" => ["This is a title."] }]
124
+ end
125
+ end
126
+
127
+ context "when the value is an empty string" do
128
+ let(:attributes) { { "description_tesim" => [""] } }
129
+
130
+ it "returns and empty array" do
131
+ expect(manifest_metadata).to eq []
132
+ end
133
+ end
98
134
  end
99
135
 
100
- context "for version 3 of the IIIF spec", skip: "version 3 metadata not implemented yet" do
136
+ context "for version 3 of the IIIF spec" do
101
137
  let(:version) { 3 }
102
138
 
103
- it "maps the metadata accordingly" do
139
+ context "with a field that has some plain text" do
140
+ let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
141
+
104
142
  # NOTE: this assumes the I18n.locale is set as :en
105
- expect(manifest_metadata).to eq [
106
- { "label" => { "en" => ["Title"] }, "value" => { "none" => ["My Awesome Title"] } },
107
- { "label" => { "en" => ["Description"] },
108
- "value" => { "none" => ["This is and awesome description"] } },
109
- { "label" => { "en" => ["Date modified"] }, "value" => { "none" => ["2011-11-11"] } },
110
- { "label" => { "en" => ["Creator"] }, "value" => { "none" => ["McAuthor, Arthur"] } }
111
- ]
143
+ it "maps the metadata accordingly" do
144
+ expect(manifest_metadata).to eq [{ "label" => { "en" => ["Title"] },
145
+ "value" => { "none" => ["My Awesome Title"] } }]
146
+ end
147
+ end
148
+
149
+ context "with a field that contains a url string" do
150
+ let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
151
+
152
+ it "creates a link for the url string" do
153
+ expect(manifest_metadata).to eq [
154
+ { "label" => { "en" => ["Description"] },
155
+ "value" => { "none" =>
156
+ ["A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"] } }
157
+ ]
158
+ end
159
+ end
160
+
161
+ context "with a date" do
162
+ let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
163
+
164
+ it "displays it just the date" do
165
+ expect(manifest_metadata).to eq [{ "label" => { "en" => ["Date modified"] },
166
+ "value" => { "none" => ["2011-11-11"] } }]
167
+ end
168
+ end
169
+
170
+ context "with a faceted option" do
171
+ let(:metadata_fields) { { creator: { render_as: :faceted } } }
172
+ let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
173
+
174
+ it "adds a link to the faceted search" do
175
+ expect(manifest_metadata). to eq [
176
+ { "label" => { "en" => ["Creator"] },
177
+ "value" => { "none" =>
178
+ ["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] } }
179
+ ]
180
+ end
181
+ end
182
+
183
+ context "with an authority option" do
184
+ context "rights statement" do
185
+ let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
186
+ let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
187
+
188
+ it "renders a link and displays a term" do
189
+ expect(manifest_metadata).to eq [
190
+ { "label" => { "en" => ["Rights statement"] },
191
+ "value" => { "none" => [
192
+ "<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"
193
+ ] } }
194
+ ]
195
+ end
196
+ end
197
+
198
+ context "license" do
199
+ let(:metadata_fields) { { license: { render_as: :license } } }
200
+ let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
201
+
202
+ it "renders a link and displays a term" do
203
+ expect(manifest_metadata).to eq [
204
+ { "label" => { "en" => ["License"] },
205
+ "value" => { "none" => [
206
+ "<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
207
+ ] } }
208
+ ]
209
+ end
210
+ end
211
+ end
212
+
213
+ context "when the work is apart of a collection" do
214
+ let(:metadata_fields) { { collection: {} } }
215
+ let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
216
+ let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
217
+ let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
218
+
219
+ it "renders a link to the collection" do
220
+ allow(SolrDocument).to receive(:find)
221
+ allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
222
+ expect(manifest_metadata).to eq [
223
+ { "label" => { "en" => ["Collection"] },
224
+ "value" => { "none" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] } }
225
+ ]
226
+ end
227
+ end
228
+
229
+ context "when the value has an empty string" do
230
+ let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
231
+
232
+ it "does not map the field with an empty string" do
233
+ expect(manifest_metadata.flat_map(&:values)).not_to include({ "none" => [""] })
234
+ expect(manifest_metadata).to eq [
235
+ { "label" => { "en" => ["Title"] }, "value" => { "none" => ["This is a title."] } }
236
+ ]
237
+ end
238
+ end
239
+
240
+ context "when the value is an empty string" do
241
+ let(:attributes) { { "description_tesim" => [""] } }
242
+
243
+ it "returns and empty array" do
244
+ expect(manifest_metadata).to eq []
245
+ end
112
246
  end
113
247
  end
114
248
  end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::SplitPdfs::BaseSplitter do
4
+ let(:path) { __FILE__ }
5
+ let(:splitter) { described_class.new(path) }
6
+ subject { described_class }
7
+
8
+ it { is_expected.to respond_to(:call) }
9
+
10
+ describe "instance" do
11
+ subject { splitter }
12
+
13
+ it { is_expected.to respond_to :compression }
14
+ it { is_expected.to respond_to :compression? }
15
+ it { is_expected.to respond_to :image_extension }
16
+ it { is_expected.to respond_to :quality }
17
+ end
18
+
19
+ describe '#compression' do
20
+ it 'can be changed within the instance' do
21
+ expect do
22
+ splitter.compression = 'squishy'
23
+ end.not_to change(splitter.class, :compression)
24
+ expect(splitter.compression).to eq('squishy')
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::SplitPdfs::DerivativeRodeoSplitter do
6
+ let(:filename) { __FILE__ }
7
+ let(:work) { double(MyWork, id: 'id-12345', aark_id: '12345') }
8
+ let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
9
+ let(:location_stub) { double(DerivativeRodeo::StorageLocations::BaseLocation, exist?: true) }
10
+
11
+ before do
12
+ allow(DerivativeRodeo::StorageLocations::BaseLocation).to receive(:from_uri).and_return(location_stub)
13
+ end
14
+
15
+ describe 'class' do
16
+ subject { described_class }
17
+
18
+ it { is_expected.to respond_to(:call) }
19
+ end
20
+
21
+ subject(:instance) { described_class.new(filename, file_set: file_set, output_tmp_dir: Dir.tmpdir) }
22
+ let(:generator) { double(DerivativeRodeo::Generators::PdfSplitGenerator, generated_files: []) }
23
+
24
+ before do
25
+ allow(file_set).to receive(:parent).and_return(work)
26
+ # TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but
27
+ # this part is only one piece of the over all integration.
28
+ allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__))
29
+ end
30
+
31
+ it { is_expected.to respond_to :split_files }
32
+
33
+ it 'uses the rodeo to split' do
34
+ expect(DerivativeRodeo::Generators::PdfSplitGenerator).to receive(:new).and_return(generator)
35
+ described_class.call(filename, file_set: file_set)
36
+ end
37
+
38
+ describe '#preprocessed_location_template' do
39
+ let(:derivative_rodeo_preprocessed_file) { IiifPrint::DerivativeRodeoService.derivative_rodeo_uri(file_set: file_set, filename: filename) }
40
+ let(:import_url) { "https://somewhere.com/that/exists.pdf" }
41
+ subject { instance.preprocessed_location_template }
42
+
43
+ context 'when the s3 file exists in the rodeo' do
44
+ it 'is that file' do
45
+ is_expected.to eq(derivative_rodeo_preprocessed_file)
46
+ end
47
+ end
48
+
49
+ context 'when the s3 file does not exist in the rodeo and we have the local file' do
50
+ it 'is the import_url' do
51
+ expect_any_instance_of(DerivativeRodeo::Generators::CopyGenerator).not_to receive(:generated_uris)
52
+ file_set.import_url = import_url
53
+ expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
54
+ expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(true)
55
+ expect(subject).to eq(instance.input_uri)
56
+ end
57
+ end
58
+
59
+ context 'when the s3 file does not exist and we do not have the input URI nor the given import url does NOT exist' do
60
+ let(:generator) { double(DerivativeRodeo::Generators::CopyGenerator, generated_uris: ["file:///generated/uri"]) }
61
+ it 'will invoke the DerivativeRodeo::Generators::CopyGenerator to bring the file locally' do
62
+ allow(DerivativeRodeo::Generators::CopyGenerator).to receive(:new).and_return(generator)
63
+ file_set.import_url = import_url
64
+ expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
65
+ expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(false)
66
+
67
+ expect(subject).to eq(generator.generated_uris.first)
68
+ end
69
+ end
70
+
71
+ context "when the s3 file does not exist and we don't have a remote_url" do
72
+ it 'will use the given filename' do
73
+ file_set.import_url = nil
74
+ expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
75
+
76
+ expect(subject).to eq(nil)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::SplitPdfs::DestroyPdfChildWorksService do
6
+ let(:subject) { described_class.conditionally_destroy_spawned_children_of(file_set: fileset, work: work) }
7
+
8
+ let(:work) { WorkWithIiifPrintConfig.new(title: ['required title'], id: '123') }
9
+ let(:fileset) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
10
+ let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true) }
11
+ let(:pending_rel1) do
12
+ IiifPrint::PendingRelationship.new(
13
+ parent_id: work.id,
14
+ child_title: "Child of #{work.id} file.pdf page 01",
15
+ child_order: "Child of #{work.id} file.pdf page 01",
16
+ parent_model: WorkWithIiifPrintConfig,
17
+ child_model: WorkWithIiifPrintConfig,
18
+ file_id: fileset.id
19
+ )
20
+ end
21
+ let(:pending_rel2) do
22
+ IiifPrint::PendingRelationship.new(
23
+ parent_id: work.id,
24
+ child_title: "Child of #{work.id} another.pdf page 01",
25
+ child_order: "Child of #{work.id} another.pdf page 01",
26
+ parent_model: WorkWithIiifPrintConfig,
27
+ child_model: WorkWithIiifPrintConfig,
28
+ file_id: 'another'
29
+ )
30
+ end
31
+ # let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
32
+ # let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
33
+
34
+ before do
35
+ allow(fileset).to receive(:parent).and_return(work)
36
+ allow(fileset).to receive(:label).and_return('file.pdf')
37
+ allow(fileset).to receive(:mime_type).and_return('application/pdf')
38
+ end
39
+
40
+ describe 'class' do
41
+ subject { described_class }
42
+
43
+ it { is_expected.to respond_to(:conditionally_destroy_spawned_children_of) }
44
+ it { is_expected.not_to respond_to(:destroy_spawned_children) }
45
+ end
46
+
47
+ describe '#conditionally_destroy_spawned_children_of' do
48
+ context 'with child works by fileset id' do
49
+ before do
50
+ allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([child_work])
51
+ end
52
+
53
+ it 'destroys the child works' do
54
+ expect(child_work).to receive(:destroy)
55
+ subject
56
+ end
57
+ end
58
+
59
+ context 'with child works by title' do
60
+ before do
61
+ allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([])
62
+ allow(WorkWithIiifPrintConfig).to receive(:where).and_return([child_work])
63
+ end
64
+
65
+ it 'destroys the child works' do
66
+ expect(child_work).to receive(:destroy)
67
+ subject
68
+ end
69
+ end
70
+
71
+ context 'when fileset is not a PDF mimetype' do
72
+ before do
73
+ allow(fileset).to receive(:mime_type).and_return('not_pdf')
74
+ end
75
+
76
+ it 'returns with no changes' do
77
+ expect(IiifPrint::PendingRelationship).not_to receive(:where)
78
+ end
79
+ end
80
+
81
+ context 'when IiifPrint::PendingRelationship records exist' do
82
+ before do
83
+ pending_rel1.save
84
+ pending_rel2.save
85
+ end
86
+
87
+ it 'deletes only records associated with the specific fileset PDF file' do
88
+ expect { subject }.to change(IiifPrint::PendingRelationship, :count).by(-1)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::SplitPdfs::PagesToJpgsSplitter do
5
+ let(:path) { __FILE__ }
6
+ let(:splitter) { described_class.new(path) }
7
+
8
+ describe '#quality' do
9
+ subject { splitter.quality }
10
+ it { is_expected.to eq(described_class.quality) }
11
+ end
12
+
13
+ describe '#quality?' do
14
+ subject { splitter.quality? }
15
+ it { is_expected.to be_truthy }
16
+ end
17
+
18
+ describe '#image_extension' do
19
+ subject { splitter.image_extension }
20
+ it { is_expected.to eq('jpg') }
21
+ end
22
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::SplitPdfs::PagesToPngsSplitter do
4
+ describe '.compression' do
5
+ subject { described_class.compression }
6
+ it { is_expected.to be_nil }
7
+ end
8
+
9
+ describe '.compression?' do
10
+ subject { described_class.compression? }
11
+ it { is_expected.to be_falsey }
12
+ end
13
+
14
+ describe '.image_extension' do
15
+ subject { described_class.image_extension }
16
+ it { is_expected.to eq('png') }
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::SplitPdfs::PagesToTiffsSplitter do
5
+ describe '.compression' do
6
+ subject { described_class.compression }
7
+ it { is_expected.to eq(described_class::DEFAULT_COMPRESSION) }
8
+ end
9
+
10
+ describe '.compression?' do
11
+ subject { described_class.compression? }
12
+ it { is_expected.to be_truthy }
13
+ end
14
+
15
+ describe '.image_extension' do
16
+ subject { described_class.image_extension }
17
+ it { is_expected.to eq('tiff') }
18
+ end
19
+ end
@@ -32,9 +32,9 @@ RSpec.describe IiifPrint::TextExtraction::HOCRReader do
32
32
  describe "outputs text derivative formats" do
33
33
  it "outputs plain text" do
34
34
  plain_text = reader_minimal.text
35
- expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. "
35
+ expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. Rep"
36
36
  expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
37
- expect(reader_minimal.text.size).to eq 831
37
+ expect(reader_minimal.text.size).to eq 723
38
38
  end
39
39
 
40
40
  it "passes args to WordCoordsBuilder and receives output" do
@@ -1,6 +1,11 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe IiifPrint do
4
+ describe '.skip_splitting_pdf_files_that_end_with_these_texts' do
5
+ subject { described_class }
6
+ it { is_expected.to respond_to :skip_splitting_pdf_files_that_end_with_these_texts }
7
+ end
8
+
4
9
  describe ".manifest_metadata_for" do
5
10
  let(:attributes) do
6
11
  { "id" => "abc123",
@@ -35,17 +40,132 @@ RSpec.describe IiifPrint do
35
40
  end
36
41
 
37
42
  it "has a #pdf_splitter_service" do
38
- expect(record.iiif_print_config.pdf_splitter_service).to be(IiifPrint::SplitPdfs::PagesIntoImagesService)
43
+ expect(record.iiif_print_config.pdf_splitter_service).to be(IiifPrint::SplitPdfs::PagesToJpgsSplitter)
39
44
  end
40
45
 
41
46
  it "has #derivative_service_plugins" do
42
47
  expect(record.iiif_print_config.derivative_service_plugins).to eq(
43
- [IiifPrint::JP2DerivativeService,
44
- IiifPrint::PDFDerivativeService,
45
- IiifPrint::TextExtractionDerivativeService,
46
- IiifPrint::TIFFDerivativeService]
48
+ [IiifPrint::TextExtractionDerivativeService]
49
+ )
50
+ end
51
+ end
52
+ end
53
+
54
+ describe ".fields_for_allinson_flex" do
55
+ subject { described_class.fields_for_allinson_flex(fields: fields, sort_order: sort_order) }
56
+ let(:sort_order) { [] }
57
+
58
+ context "when the fields include an admin only indexing property" do
59
+ let(:fields) do
60
+ [
61
+ IiifPrint::CollectionFieldShim.new(name: :title, value: "My Title"),
62
+ IiifPrint::CollectionFieldShim.new(name: :creator, value: "Hyrax, Sam", indexing: ["admin_only"])
63
+ ]
64
+ end
65
+
66
+ it "does not include the admin only field" do
67
+ # We are mapping from one data structure to another
68
+ expect(subject.map(&:name)).to eq([fields.first.name])
69
+ end
70
+ end
71
+
72
+ context "when the fields include duplicate name properties" do
73
+ let(:fields) do
74
+ [
75
+ IiifPrint::CollectionFieldShim.new(name: :title, value: "My Title"),
76
+ IiifPrint::CollectionFieldShim.new(name: :title, value: "My Other Title")
77
+ ]
78
+ end
79
+
80
+ it "does not include later duplicates" do
81
+ expect(subject.map(&:label)).to eq([fields.first.value])
82
+ end
83
+ end
84
+
85
+ context "when we provide a fields sort order" do
86
+ let(:fields) do
87
+ [
88
+ IiifPrint::CollectionFieldShim.new(name: :title, value: "My Title"),
89
+ IiifPrint::CollectionFieldShim.new(name: :creator, value: "Hyrax, Sam"),
90
+ IiifPrint::CollectionFieldShim.new(name: :date_created, value: "2023-05-02")
91
+ ]
92
+ end
93
+ let(:sort_order) { [:creator, :title] }
94
+
95
+ it "returns the fields in the order specified and puts unspecified fields last" do
96
+ expect(subject.map(&:name)).to eq([:creator, :title, :date_created])
97
+ end
98
+ end
99
+ end
100
+
101
+ describe ".sort_af_fields!" do
102
+ let(:fields) { [:title, :creator, :date_created].map { |name| IiifPrint::Field.new(name: name) } }
103
+ subject(:sort_af_fields) { described_class.sort_af_fields!(fields, sort_order: sort_order) }
104
+
105
+ context "when the sort order is an empty array" do
106
+ let(:sort_order) { [] }
107
+
108
+ it "returns the fields in the order they were given" do
109
+ expect(sort_af_fields).to eq(fields)
110
+ end
111
+ end
112
+
113
+ context "when the sort order specifies some of the fields" do
114
+ let(:sort_order) { [:date_created, :title] }
115
+
116
+ it "returns the fields in the order specified and puts unspecified fields last" do
117
+ expect(sort_af_fields).to eq([:date_created, :title, :creator].map { |name| IiifPrint::Field.new(name: name) })
118
+ end
119
+ end
120
+ end
121
+
122
+ describe '.conditionally_submit_split_for' do
123
+ context 'when the file suffix is one that we skip' do
124
+ subject do
125
+ described_class.conditionally_submit_split_for(
126
+ work: double,
127
+ file_set: double,
128
+ locations: ['hello.reader.pdf'],
129
+ skip_these_endings: ['.reader.pdf'],
130
+ user: double
47
131
  )
48
132
  end
133
+
134
+ it { is_expected.to eq(:no_pdfs_for_splitting) }
135
+ end
136
+ end
137
+
138
+ describe '.split_for_path_suffix?' do
139
+ context 'with default .skip_splitting_pdf_files_that_end_with_these_texts' do
140
+ subject { described_class.split_for_path_suffix?(path) }
141
+ [
142
+ ["hello.pdf", true],
143
+ ["hello.PDF", true],
144
+ ["hello.reader.pdf", true],
145
+ ["hello.png", false],
146
+ ["hello.pdf.png", false]
147
+ ].each do |given_path, expected_value|
148
+ context "with #{given_path.inspect}" do
149
+ let(:path) { given_path }
150
+ it { is_expected.to eq(expected_value) }
151
+ end
152
+ end
153
+ end
154
+
155
+ context 'with customized .skip_splitting_pdf_files_that_end_with_these_texts' do
156
+ subject { described_class.split_for_path_suffix?(path, skip_these_endings: ['.READER.pdf']) }
157
+ [
158
+ ["hello.pdf", true],
159
+ ["hello.PDF", true],
160
+ ["hello.reader.pdf", false],
161
+ ["hello.png", false],
162
+ ["hello.pdf.png", false]
163
+ ].each do |given_path, expected_value|
164
+ context "with #{given_path.inspect}" do
165
+ let(:path) { given_path }
166
+ it { is_expected.to eq(expected_value) }
167
+ end
168
+ end
49
169
  end
50
170
  end
51
171
  end