iiif_print 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +98 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
- data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
- data/app/models/concerns/iiif_print/solr/document.rb +14 -0
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +10 -9
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +2 -1
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
- data/lib/iiif_print/catalog_search_builder.rb +5 -1
- data/lib/iiif_print/configuration.rb +145 -8
- data/lib/iiif_print/data/fileset_helper.rb +1 -1
- data/lib/iiif_print/data/work_derivatives.rb +3 -3
- data/lib/iiif_print/engine.rb +7 -13
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
- data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
- data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/lib/iiif_print/lineage_service.rb +29 -8
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +167 -12
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/spec/factories/newspaper_page_solr_document.rb +9 -1
- data/spec/fixtures/authorities/licenses.yml +4 -0
- data/spec/fixtures/authorities/rights_statements.yml +4 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
- data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
- data/spec/iiif_print/configuration_spec.rb +141 -15
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
- data/spec/iiif_print/lineage_service_spec.rb +1 -1
- data/spec/iiif_print/metadata_spec.rb +157 -23
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
- data/spec/iiif_print_spec.rb +125 -5
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
- data/spec/samvera/derivatives/configuration_spec.rb +41 -0
- data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
- data/spec/samvera/derivatives_spec.rb +54 -0
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +123 -35
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
@@ -2,16 +2,8 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
RSpec.describe IiifPrint::Metadata do
|
4
4
|
let(:base_url) { "https://my.dev.test" }
|
5
|
-
let(:
|
6
|
-
let(:fields)
|
7
|
-
metadata_fields.map do |field|
|
8
|
-
SampleField.new(
|
9
|
-
name: field.first,
|
10
|
-
label: Hyrax::Renderers::AttributeRenderer.new(field, nil).label,
|
11
|
-
options: field.last
|
12
|
-
)
|
13
|
-
end
|
14
|
-
end
|
5
|
+
let(:solr_hit) { SolrHit.new(attributes) }
|
6
|
+
let(:fields) { IiifPrint.default_fields(fields: metadata_fields) }
|
15
7
|
let(:metadata_fields) do
|
16
8
|
{
|
17
9
|
title: {},
|
@@ -20,12 +12,10 @@ RSpec.describe IiifPrint::Metadata do
|
|
20
12
|
}
|
21
13
|
end
|
22
14
|
|
23
|
-
SampleField = Struct.new(:name, :label, :options, keyword_init: true)
|
24
|
-
|
25
15
|
describe ".build_metadata_for" do
|
26
16
|
subject(:manifest_metadata) do
|
27
17
|
described_class.build_metadata_for(
|
28
|
-
work:
|
18
|
+
work: solr_hit,
|
29
19
|
version: version,
|
30
20
|
fields: fields,
|
31
21
|
current_ability: double(Ability),
|
@@ -73,7 +63,7 @@ RSpec.describe IiifPrint::Metadata do
|
|
73
63
|
let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
|
74
64
|
|
75
65
|
it "adds a link to the faceted search" do
|
76
|
-
expect(manifest_metadata).
|
66
|
+
expect(manifest_metadata).to eq [
|
77
67
|
{ "label" => "Creator",
|
78
68
|
"value" =>
|
79
69
|
["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
|
@@ -81,6 +71,34 @@ RSpec.describe IiifPrint::Metadata do
|
|
81
71
|
end
|
82
72
|
end
|
83
73
|
|
74
|
+
context "with an authority option" do
|
75
|
+
context "rights statement" do
|
76
|
+
let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
|
77
|
+
let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
|
78
|
+
|
79
|
+
it "renders a link and displays a term" do
|
80
|
+
expect(manifest_metadata).to eq [
|
81
|
+
{ "label" => "Rights statement",
|
82
|
+
"value" => ["<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"] }
|
83
|
+
]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "license" do
|
88
|
+
let(:metadata_fields) { { license: { render_as: :license } } }
|
89
|
+
let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
|
90
|
+
|
91
|
+
it "renders a link and displays a term" do
|
92
|
+
expect(manifest_metadata).to eq [
|
93
|
+
{ "label" => "License",
|
94
|
+
"value" => [
|
95
|
+
"<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
|
96
|
+
] }
|
97
|
+
]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
84
102
|
context "when the work is apart of a collection" do
|
85
103
|
let(:metadata_fields) { { collection: {} } }
|
86
104
|
let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
|
@@ -88,6 +106,7 @@ RSpec.describe IiifPrint::Metadata do
|
|
88
106
|
let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
|
89
107
|
|
90
108
|
it "renders a link to the collection" do
|
109
|
+
allow(SolrDocument).to receive(:find)
|
91
110
|
allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
|
92
111
|
expect(manifest_metadata).to eq [
|
93
112
|
{ "label" => "Collection",
|
@@ -95,20 +114,135 @@ RSpec.describe IiifPrint::Metadata do
|
|
95
114
|
]
|
96
115
|
end
|
97
116
|
end
|
117
|
+
|
118
|
+
context "when the value has an empty string" do
|
119
|
+
let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
|
120
|
+
|
121
|
+
it "does not map the field with an empty string" do
|
122
|
+
expect(manifest_metadata.flat_map(&:values)).not_to include([""])
|
123
|
+
expect(manifest_metadata).to eq [{ "label" => "Title", "value" => ["This is a title."] }]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context "when the value is an empty string" do
|
128
|
+
let(:attributes) { { "description_tesim" => [""] } }
|
129
|
+
|
130
|
+
it "returns and empty array" do
|
131
|
+
expect(manifest_metadata).to eq []
|
132
|
+
end
|
133
|
+
end
|
98
134
|
end
|
99
135
|
|
100
|
-
context "for version 3 of the IIIF spec"
|
136
|
+
context "for version 3 of the IIIF spec" do
|
101
137
|
let(:version) { 3 }
|
102
138
|
|
103
|
-
|
139
|
+
context "with a field that has some plain text" do
|
140
|
+
let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
|
141
|
+
|
104
142
|
# NOTE: this assumes the I18n.locale is set as :en
|
105
|
-
|
106
|
-
{ "label" => { "en" => ["Title"] },
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
143
|
+
it "maps the metadata accordingly" do
|
144
|
+
expect(manifest_metadata).to eq [{ "label" => { "en" => ["Title"] },
|
145
|
+
"value" => { "none" => ["My Awesome Title"] } }]
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
context "with a field that contains a url string" do
|
150
|
+
let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
|
151
|
+
|
152
|
+
it "creates a link for the url string" do
|
153
|
+
expect(manifest_metadata).to eq [
|
154
|
+
{ "label" => { "en" => ["Description"] },
|
155
|
+
"value" => { "none" =>
|
156
|
+
["A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"] } }
|
157
|
+
]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
context "with a date" do
|
162
|
+
let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
|
163
|
+
|
164
|
+
it "displays it just the date" do
|
165
|
+
expect(manifest_metadata).to eq [{ "label" => { "en" => ["Date modified"] },
|
166
|
+
"value" => { "none" => ["2011-11-11"] } }]
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
context "with a faceted option" do
|
171
|
+
let(:metadata_fields) { { creator: { render_as: :faceted } } }
|
172
|
+
let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
|
173
|
+
|
174
|
+
it "adds a link to the faceted search" do
|
175
|
+
expect(manifest_metadata). to eq [
|
176
|
+
{ "label" => { "en" => ["Creator"] },
|
177
|
+
"value" => { "none" =>
|
178
|
+
["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] } }
|
179
|
+
]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
context "with an authority option" do
|
184
|
+
context "rights statement" do
|
185
|
+
let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
|
186
|
+
let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
|
187
|
+
|
188
|
+
it "renders a link and displays a term" do
|
189
|
+
expect(manifest_metadata).to eq [
|
190
|
+
{ "label" => { "en" => ["Rights statement"] },
|
191
|
+
"value" => { "none" => [
|
192
|
+
"<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"
|
193
|
+
] } }
|
194
|
+
]
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
context "license" do
|
199
|
+
let(:metadata_fields) { { license: { render_as: :license } } }
|
200
|
+
let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
|
201
|
+
|
202
|
+
it "renders a link and displays a term" do
|
203
|
+
expect(manifest_metadata).to eq [
|
204
|
+
{ "label" => { "en" => ["License"] },
|
205
|
+
"value" => { "none" => [
|
206
|
+
"<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
|
207
|
+
] } }
|
208
|
+
]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
context "when the work is apart of a collection" do
|
214
|
+
let(:metadata_fields) { { collection: {} } }
|
215
|
+
let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
|
216
|
+
let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
|
217
|
+
let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
|
218
|
+
|
219
|
+
it "renders a link to the collection" do
|
220
|
+
allow(SolrDocument).to receive(:find)
|
221
|
+
allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
|
222
|
+
expect(manifest_metadata).to eq [
|
223
|
+
{ "label" => { "en" => ["Collection"] },
|
224
|
+
"value" => { "none" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] } }
|
225
|
+
]
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
context "when the value has an empty string" do
|
230
|
+
let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
|
231
|
+
|
232
|
+
it "does not map the field with an empty string" do
|
233
|
+
expect(manifest_metadata.flat_map(&:values)).not_to include({ "none" => [""] })
|
234
|
+
expect(manifest_metadata).to eq [
|
235
|
+
{ "label" => { "en" => ["Title"] }, "value" => { "none" => ["This is a title."] } }
|
236
|
+
]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
context "when the value is an empty string" do
|
241
|
+
let(:attributes) { { "description_tesim" => [""] } }
|
242
|
+
|
243
|
+
it "returns and empty array" do
|
244
|
+
expect(manifest_metadata).to eq []
|
245
|
+
end
|
112
246
|
end
|
113
247
|
end
|
114
248
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe IiifPrint::SplitPdfs::BaseSplitter do
|
4
|
+
let(:path) { __FILE__ }
|
5
|
+
let(:splitter) { described_class.new(path) }
|
6
|
+
subject { described_class }
|
7
|
+
|
8
|
+
it { is_expected.to respond_to(:call) }
|
9
|
+
|
10
|
+
describe "instance" do
|
11
|
+
subject { splitter }
|
12
|
+
|
13
|
+
it { is_expected.to respond_to :compression }
|
14
|
+
it { is_expected.to respond_to :compression? }
|
15
|
+
it { is_expected.to respond_to :image_extension }
|
16
|
+
it { is_expected.to respond_to :quality }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe '#compression' do
|
20
|
+
it 'can be changed within the instance' do
|
21
|
+
expect do
|
22
|
+
splitter.compression = 'squishy'
|
23
|
+
end.not_to change(splitter.class, :compression)
|
24
|
+
expect(splitter.compression).to eq('squishy')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe IiifPrint::SplitPdfs::DerivativeRodeoSplitter do
|
6
|
+
let(:filename) { __FILE__ }
|
7
|
+
let(:work) { double(MyWork, id: 'id-12345', aark_id: '12345') }
|
8
|
+
let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
|
9
|
+
let(:location_stub) { double(DerivativeRodeo::StorageLocations::BaseLocation, exist?: true) }
|
10
|
+
|
11
|
+
before do
|
12
|
+
allow(DerivativeRodeo::StorageLocations::BaseLocation).to receive(:from_uri).and_return(location_stub)
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'class' do
|
16
|
+
subject { described_class }
|
17
|
+
|
18
|
+
it { is_expected.to respond_to(:call) }
|
19
|
+
end
|
20
|
+
|
21
|
+
subject(:instance) { described_class.new(filename, file_set: file_set, output_tmp_dir: Dir.tmpdir) }
|
22
|
+
let(:generator) { double(DerivativeRodeo::Generators::PdfSplitGenerator, generated_files: []) }
|
23
|
+
|
24
|
+
before do
|
25
|
+
allow(file_set).to receive(:parent).and_return(work)
|
26
|
+
# TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but
|
27
|
+
# this part is only one piece of the over all integration.
|
28
|
+
allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__))
|
29
|
+
end
|
30
|
+
|
31
|
+
it { is_expected.to respond_to :split_files }
|
32
|
+
|
33
|
+
it 'uses the rodeo to split' do
|
34
|
+
expect(DerivativeRodeo::Generators::PdfSplitGenerator).to receive(:new).and_return(generator)
|
35
|
+
described_class.call(filename, file_set: file_set)
|
36
|
+
end
|
37
|
+
|
38
|
+
describe '#preprocessed_location_template' do
|
39
|
+
let(:derivative_rodeo_preprocessed_file) { IiifPrint::DerivativeRodeoService.derivative_rodeo_uri(file_set: file_set, filename: filename) }
|
40
|
+
let(:import_url) { "https://somewhere.com/that/exists.pdf" }
|
41
|
+
subject { instance.preprocessed_location_template }
|
42
|
+
|
43
|
+
context 'when the s3 file exists in the rodeo' do
|
44
|
+
it 'is that file' do
|
45
|
+
is_expected.to eq(derivative_rodeo_preprocessed_file)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context 'when the s3 file does not exist in the rodeo and we have the local file' do
|
50
|
+
it 'is the import_url' do
|
51
|
+
expect_any_instance_of(DerivativeRodeo::Generators::CopyGenerator).not_to receive(:generated_uris)
|
52
|
+
file_set.import_url = import_url
|
53
|
+
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
|
54
|
+
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(true)
|
55
|
+
expect(subject).to eq(instance.input_uri)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context 'when the s3 file does not exist and we do not have the input URI nor the given import url does NOT exist' do
|
60
|
+
let(:generator) { double(DerivativeRodeo::Generators::CopyGenerator, generated_uris: ["file:///generated/uri"]) }
|
61
|
+
it 'will invoke the DerivativeRodeo::Generators::CopyGenerator to bring the file locally' do
|
62
|
+
allow(DerivativeRodeo::Generators::CopyGenerator).to receive(:new).and_return(generator)
|
63
|
+
file_set.import_url = import_url
|
64
|
+
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
|
65
|
+
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(false)
|
66
|
+
|
67
|
+
expect(subject).to eq(generator.generated_uris.first)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "when the s3 file does not exist and we don't have a remote_url" do
|
72
|
+
it 'will use the given filename' do
|
73
|
+
file_set.import_url = nil
|
74
|
+
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
|
75
|
+
|
76
|
+
expect(subject).to eq(nil)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe IiifPrint::SplitPdfs::DestroyPdfChildWorksService do
|
6
|
+
let(:subject) { described_class.conditionally_destroy_spawned_children_of(file_set: fileset, work: work) }
|
7
|
+
|
8
|
+
let(:work) { WorkWithIiifPrintConfig.new(title: ['required title'], id: '123') }
|
9
|
+
let(:fileset) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
|
10
|
+
let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true) }
|
11
|
+
let(:pending_rel1) do
|
12
|
+
IiifPrint::PendingRelationship.new(
|
13
|
+
parent_id: work.id,
|
14
|
+
child_title: "Child of #{work.id} file.pdf page 01",
|
15
|
+
child_order: "Child of #{work.id} file.pdf page 01",
|
16
|
+
parent_model: WorkWithIiifPrintConfig,
|
17
|
+
child_model: WorkWithIiifPrintConfig,
|
18
|
+
file_id: fileset.id
|
19
|
+
)
|
20
|
+
end
|
21
|
+
let(:pending_rel2) do
|
22
|
+
IiifPrint::PendingRelationship.new(
|
23
|
+
parent_id: work.id,
|
24
|
+
child_title: "Child of #{work.id} another.pdf page 01",
|
25
|
+
child_order: "Child of #{work.id} another.pdf page 01",
|
26
|
+
parent_model: WorkWithIiifPrintConfig,
|
27
|
+
child_model: WorkWithIiifPrintConfig,
|
28
|
+
file_id: 'another'
|
29
|
+
)
|
30
|
+
end
|
31
|
+
# let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
|
32
|
+
# let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
|
33
|
+
|
34
|
+
before do
|
35
|
+
allow(fileset).to receive(:parent).and_return(work)
|
36
|
+
allow(fileset).to receive(:label).and_return('file.pdf')
|
37
|
+
allow(fileset).to receive(:mime_type).and_return('application/pdf')
|
38
|
+
end
|
39
|
+
|
40
|
+
describe 'class' do
|
41
|
+
subject { described_class }
|
42
|
+
|
43
|
+
it { is_expected.to respond_to(:conditionally_destroy_spawned_children_of) }
|
44
|
+
it { is_expected.not_to respond_to(:destroy_spawned_children) }
|
45
|
+
end
|
46
|
+
|
47
|
+
describe '#conditionally_destroy_spawned_children_of' do
|
48
|
+
context 'with child works by fileset id' do
|
49
|
+
before do
|
50
|
+
allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([child_work])
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'destroys the child works' do
|
54
|
+
expect(child_work).to receive(:destroy)
|
55
|
+
subject
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context 'with child works by title' do
|
60
|
+
before do
|
61
|
+
allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([])
|
62
|
+
allow(WorkWithIiifPrintConfig).to receive(:where).and_return([child_work])
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'destroys the child works' do
|
66
|
+
expect(child_work).to receive(:destroy)
|
67
|
+
subject
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'when fileset is not a PDF mimetype' do
|
72
|
+
before do
|
73
|
+
allow(fileset).to receive(:mime_type).and_return('not_pdf')
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'returns with no changes' do
|
77
|
+
expect(IiifPrint::PendingRelationship).not_to receive(:where)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
context 'when IiifPrint::PendingRelationship records exist' do
|
82
|
+
before do
|
83
|
+
pending_rel1.save
|
84
|
+
pending_rel2.save
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'deletes only records associated with the specific fileset PDF file' do
|
88
|
+
expect { subject }.to change(IiifPrint::PendingRelationship, :count).by(-1)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'misc_shared'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::SplitPdfs::PagesToJpgsSplitter do
|
5
|
+
let(:path) { __FILE__ }
|
6
|
+
let(:splitter) { described_class.new(path) }
|
7
|
+
|
8
|
+
describe '#quality' do
|
9
|
+
subject { splitter.quality }
|
10
|
+
it { is_expected.to eq(described_class.quality) }
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#quality?' do
|
14
|
+
subject { splitter.quality? }
|
15
|
+
it { is_expected.to be_truthy }
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#image_extension' do
|
19
|
+
subject { splitter.image_extension }
|
20
|
+
it { is_expected.to eq('jpg') }
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe IiifPrint::SplitPdfs::PagesToPngsSplitter do
|
4
|
+
describe '.compression' do
|
5
|
+
subject { described_class.compression }
|
6
|
+
it { is_expected.to be_nil }
|
7
|
+
end
|
8
|
+
|
9
|
+
describe '.compression?' do
|
10
|
+
subject { described_class.compression? }
|
11
|
+
it { is_expected.to be_falsey }
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '.image_extension' do
|
15
|
+
subject { described_class.image_extension }
|
16
|
+
it { is_expected.to eq('png') }
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'misc_shared'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::SplitPdfs::PagesToTiffsSplitter do
|
5
|
+
describe '.compression' do
|
6
|
+
subject { described_class.compression }
|
7
|
+
it { is_expected.to eq(described_class::DEFAULT_COMPRESSION) }
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '.compression?' do
|
11
|
+
subject { described_class.compression? }
|
12
|
+
it { is_expected.to be_truthy }
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '.image_extension' do
|
16
|
+
subject { described_class.image_extension }
|
17
|
+
it { is_expected.to eq('tiff') }
|
18
|
+
end
|
19
|
+
end
|
@@ -32,9 +32,9 @@ RSpec.describe IiifPrint::TextExtraction::HOCRReader do
|
|
32
32
|
describe "outputs text derivative formats" do
|
33
33
|
it "outputs plain text" do
|
34
34
|
plain_text = reader_minimal.text
|
35
|
-
expect(plain_text.slice(0, 40)).to eq "_A
|
35
|
+
expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. Rep"
|
36
36
|
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
37
|
-
expect(reader_minimal.text.size).to eq
|
37
|
+
expect(reader_minimal.text.size).to eq 723
|
38
38
|
end
|
39
39
|
|
40
40
|
it "passes args to WordCoordsBuilder and receives output" do
|
data/spec/iiif_print_spec.rb
CHANGED
@@ -1,6 +1,11 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe IiifPrint do
|
4
|
+
describe '.skip_splitting_pdf_files_that_end_with_these_texts' do
|
5
|
+
subject { described_class }
|
6
|
+
it { is_expected.to respond_to :skip_splitting_pdf_files_that_end_with_these_texts }
|
7
|
+
end
|
8
|
+
|
4
9
|
describe ".manifest_metadata_for" do
|
5
10
|
let(:attributes) do
|
6
11
|
{ "id" => "abc123",
|
@@ -35,17 +40,132 @@ RSpec.describe IiifPrint do
|
|
35
40
|
end
|
36
41
|
|
37
42
|
it "has a #pdf_splitter_service" do
|
38
|
-
expect(record.iiif_print_config.pdf_splitter_service).to be(IiifPrint::SplitPdfs::
|
43
|
+
expect(record.iiif_print_config.pdf_splitter_service).to be(IiifPrint::SplitPdfs::PagesToJpgsSplitter)
|
39
44
|
end
|
40
45
|
|
41
46
|
it "has #derivative_service_plugins" do
|
42
47
|
expect(record.iiif_print_config.derivative_service_plugins).to eq(
|
43
|
-
[IiifPrint::
|
44
|
-
|
45
|
-
|
46
|
-
|
48
|
+
[IiifPrint::TextExtractionDerivativeService]
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe ".fields_for_allinson_flex" do
|
55
|
+
subject { described_class.fields_for_allinson_flex(fields: fields, sort_order: sort_order) }
|
56
|
+
let(:sort_order) { [] }
|
57
|
+
|
58
|
+
context "when the fields include an admin only indexing property" do
|
59
|
+
let(:fields) do
|
60
|
+
[
|
61
|
+
IiifPrint::CollectionFieldShim.new(name: :title, value: "My Title"),
|
62
|
+
IiifPrint::CollectionFieldShim.new(name: :creator, value: "Hyrax, Sam", indexing: ["admin_only"])
|
63
|
+
]
|
64
|
+
end
|
65
|
+
|
66
|
+
it "does not include the admin only field" do
|
67
|
+
# We are mapping from one data structure to another
|
68
|
+
expect(subject.map(&:name)).to eq([fields.first.name])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
context "when the fields include duplicate name properties" do
|
73
|
+
let(:fields) do
|
74
|
+
[
|
75
|
+
IiifPrint::CollectionFieldShim.new(name: :title, value: "My Title"),
|
76
|
+
IiifPrint::CollectionFieldShim.new(name: :title, value: "My Other Title")
|
77
|
+
]
|
78
|
+
end
|
79
|
+
|
80
|
+
it "does not include later duplicates" do
|
81
|
+
expect(subject.map(&:label)).to eq([fields.first.value])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
context "when we provide a fields sort order" do
|
86
|
+
let(:fields) do
|
87
|
+
[
|
88
|
+
IiifPrint::CollectionFieldShim.new(name: :title, value: "My Title"),
|
89
|
+
IiifPrint::CollectionFieldShim.new(name: :creator, value: "Hyrax, Sam"),
|
90
|
+
IiifPrint::CollectionFieldShim.new(name: :date_created, value: "2023-05-02")
|
91
|
+
]
|
92
|
+
end
|
93
|
+
let(:sort_order) { [:creator, :title] }
|
94
|
+
|
95
|
+
it "returns the fields in the order specified and puts unspecified fields last" do
|
96
|
+
expect(subject.map(&:name)).to eq([:creator, :title, :date_created])
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe ".sort_af_fields!" do
|
102
|
+
let(:fields) { [:title, :creator, :date_created].map { |name| IiifPrint::Field.new(name: name) } }
|
103
|
+
subject(:sort_af_fields) { described_class.sort_af_fields!(fields, sort_order: sort_order) }
|
104
|
+
|
105
|
+
context "when the sort order is an empty array" do
|
106
|
+
let(:sort_order) { [] }
|
107
|
+
|
108
|
+
it "returns the fields in the order they were given" do
|
109
|
+
expect(sort_af_fields).to eq(fields)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
context "when the sort order specifies some of the fields" do
|
114
|
+
let(:sort_order) { [:date_created, :title] }
|
115
|
+
|
116
|
+
it "returns the fields in the order specified and puts unspecified fields last" do
|
117
|
+
expect(sort_af_fields).to eq([:date_created, :title, :creator].map { |name| IiifPrint::Field.new(name: name) })
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
describe '.conditionally_submit_split_for' do
|
123
|
+
context 'when the file suffix is one that we skip' do
|
124
|
+
subject do
|
125
|
+
described_class.conditionally_submit_split_for(
|
126
|
+
work: double,
|
127
|
+
file_set: double,
|
128
|
+
locations: ['hello.reader.pdf'],
|
129
|
+
skip_these_endings: ['.reader.pdf'],
|
130
|
+
user: double
|
47
131
|
)
|
48
132
|
end
|
133
|
+
|
134
|
+
it { is_expected.to eq(:no_pdfs_for_splitting) }
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
describe '.split_for_path_suffix?' do
|
139
|
+
context 'with default .skip_splitting_pdf_files_that_end_with_these_texts' do
|
140
|
+
subject { described_class.split_for_path_suffix?(path) }
|
141
|
+
[
|
142
|
+
["hello.pdf", true],
|
143
|
+
["hello.PDF", true],
|
144
|
+
["hello.reader.pdf", true],
|
145
|
+
["hello.png", false],
|
146
|
+
["hello.pdf.png", false]
|
147
|
+
].each do |given_path, expected_value|
|
148
|
+
context "with #{given_path.inspect}" do
|
149
|
+
let(:path) { given_path }
|
150
|
+
it { is_expected.to eq(expected_value) }
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
context 'with customized .skip_splitting_pdf_files_that_end_with_these_texts' do
|
156
|
+
subject { described_class.split_for_path_suffix?(path, skip_these_endings: ['.READER.pdf']) }
|
157
|
+
[
|
158
|
+
["hello.pdf", true],
|
159
|
+
["hello.PDF", true],
|
160
|
+
["hello.reader.pdf", false],
|
161
|
+
["hello.png", false],
|
162
|
+
["hello.pdf.png", false]
|
163
|
+
].each do |given_path, expected_value|
|
164
|
+
context "with #{given_path.inspect}" do
|
165
|
+
let(:path) { given_path }
|
166
|
+
it { is_expected.to eq(expected_value) }
|
167
|
+
end
|
168
|
+
end
|
49
169
|
end
|
50
170
|
end
|
51
171
|
end
|