iiif_print 1.1.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile.lock +2 -2
- data/README.md +4 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +1 -1
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +37 -22
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/{lib → app/jobs}/iiif_print/jobs/child_works_from_pdf_job.rb +14 -9
- data/{lib → app/jobs}/iiif_print/jobs/create_relationships_job.rb +10 -20
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +1 -1
- data/app/models/concerns/iiif_print/solr/document.rb +5 -3
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +5 -2
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +4 -2
- data/app/services/iiif_print/pluggable_derivative_service.rb +5 -1
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/hyrax/file_sets/_show_actions.html.erb +1 -1
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +3 -3
- data/iiif_print.gemspec +1 -1
- data/lib/iiif_print/base_derivative_service.rb +13 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +2 -2
- data/lib/iiif_print/catalog_search_builder.rb +2 -2
- data/lib/iiif_print/configuration.rb +65 -5
- data/lib/iiif_print/data/fileset_helper.rb +2 -2
- data/lib/iiif_print/data/work_derivatives.rb +1 -1
- data/lib/iiif_print/engine.rb +46 -2
- data/lib/iiif_print/homepage_search_builder.rb +2 -2
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +19 -6
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +11 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +19 -9
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +5 -16
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +79 -44
- metadata +18 -191
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -40
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -46
- data/bin/rails +0 -13
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -20
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/authorities/licenses.yml +0 -4
- data/spec/fixtures/authorities/rights_statements.yml +0 -4
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -28
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -59
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -193
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -35
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -118
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -249
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +0 -27
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +0 -80
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +0 -92
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +0 -22
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +0 -18
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +0 -19
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -171
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +0 -27
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -70
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/samvera/derivatives/configuration_spec.rb +0 -41
- data/spec/samvera/derivatives/hyrax_spec.rb +0 -62
- data/spec/samvera/derivatives_spec.rb +0 -54
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +0 -103
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +0 -20
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -175
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
- /data/{lib → app/jobs}/iiif_print/jobs/request_split_pdf_job.rb +0 -0
@@ -1,249 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe IiifPrint::Metadata do
|
4
|
-
let(:base_url) { "https://my.dev.test" }
|
5
|
-
let(:solr_hit) { SolrHit.new(attributes) }
|
6
|
-
let(:fields) { IiifPrint.default_fields(fields: metadata_fields) }
|
7
|
-
let(:metadata_fields) do
|
8
|
-
{
|
9
|
-
title: {},
|
10
|
-
description: {},
|
11
|
-
date_modified: {}
|
12
|
-
}
|
13
|
-
end
|
14
|
-
|
15
|
-
describe ".build_metadata_for" do
|
16
|
-
subject(:manifest_metadata) do
|
17
|
-
described_class.build_metadata_for(
|
18
|
-
work: solr_hit,
|
19
|
-
version: version,
|
20
|
-
fields: fields,
|
21
|
-
current_ability: double(Ability),
|
22
|
-
base_url: base_url
|
23
|
-
)
|
24
|
-
end
|
25
|
-
|
26
|
-
context "for version 2 of the IIIF spec" do
|
27
|
-
let(:version) { 2 }
|
28
|
-
|
29
|
-
context "with a field that has some plain text" do
|
30
|
-
let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
|
31
|
-
|
32
|
-
it "maps the metadata accordingly" do
|
33
|
-
expect(manifest_metadata).to eq [
|
34
|
-
{ "label" => "Title", "value" => ["My Awesome Title"] }
|
35
|
-
]
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
context "with a field that contains a url string" do
|
40
|
-
let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
|
41
|
-
|
42
|
-
it "creates a link for the url string" do
|
43
|
-
expect(manifest_metadata).to eq [
|
44
|
-
{ "label" => "Description",
|
45
|
-
"value" =>
|
46
|
-
[
|
47
|
-
"A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"
|
48
|
-
] }
|
49
|
-
]
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
context "with a date" do
|
54
|
-
let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
|
55
|
-
|
56
|
-
it "displays it just the date" do
|
57
|
-
expect(manifest_metadata).to eq [{ "label" => "Date modified", "value" => ["2011-11-11"] }]
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
context "with a faceted option" do
|
62
|
-
let(:metadata_fields) { { creator: { render_as: :faceted } } }
|
63
|
-
let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
|
64
|
-
|
65
|
-
it "adds a link to the faceted search" do
|
66
|
-
expect(manifest_metadata).to eq [
|
67
|
-
{ "label" => "Creator",
|
68
|
-
"value" =>
|
69
|
-
["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
|
70
|
-
]
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
context "with an authority option" do
|
75
|
-
context "rights statement" do
|
76
|
-
let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
|
77
|
-
let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
|
78
|
-
|
79
|
-
it "renders a link and displays a term" do
|
80
|
-
expect(manifest_metadata).to eq [
|
81
|
-
{ "label" => "Rights statement",
|
82
|
-
"value" => ["<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"] }
|
83
|
-
]
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
context "license" do
|
88
|
-
let(:metadata_fields) { { license: { render_as: :license } } }
|
89
|
-
let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
|
90
|
-
|
91
|
-
it "renders a link and displays a term" do
|
92
|
-
expect(manifest_metadata).to eq [
|
93
|
-
{ "label" => "License",
|
94
|
-
"value" => [
|
95
|
-
"<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
|
96
|
-
] }
|
97
|
-
]
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
context "when the work is apart of a collection" do
|
103
|
-
let(:metadata_fields) { { collection: {} } }
|
104
|
-
let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
|
105
|
-
let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
|
106
|
-
let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
|
107
|
-
|
108
|
-
it "renders a link to the collection" do
|
109
|
-
allow(SolrDocument).to receive(:find)
|
110
|
-
allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
|
111
|
-
expect(manifest_metadata).to eq [
|
112
|
-
{ "label" => "Collection",
|
113
|
-
"value" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] }
|
114
|
-
]
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
context "when the value has an empty string" do
|
119
|
-
let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
|
120
|
-
|
121
|
-
it "does not map the field with an empty string" do
|
122
|
-
expect(manifest_metadata.flat_map(&:values)).not_to include([""])
|
123
|
-
expect(manifest_metadata).to eq [{ "label" => "Title", "value" => ["This is a title."] }]
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
context "when the value is an empty string" do
|
128
|
-
let(:attributes) { { "description_tesim" => [""] } }
|
129
|
-
|
130
|
-
it "returns and empty array" do
|
131
|
-
expect(manifest_metadata).to eq []
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
context "for version 3 of the IIIF spec" do
|
137
|
-
let(:version) { 3 }
|
138
|
-
|
139
|
-
context "with a field that has some plain text" do
|
140
|
-
let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
|
141
|
-
|
142
|
-
# NOTE: this assumes the I18n.locale is set as :en
|
143
|
-
it "maps the metadata accordingly" do
|
144
|
-
expect(manifest_metadata).to eq [{ "label" => { "en" => ["Title"] },
|
145
|
-
"value" => { "none" => ["My Awesome Title"] } }]
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
context "with a field that contains a url string" do
|
150
|
-
let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
|
151
|
-
|
152
|
-
it "creates a link for the url string" do
|
153
|
-
expect(manifest_metadata).to eq [
|
154
|
-
{ "label" => { "en" => ["Description"] },
|
155
|
-
"value" => { "none" =>
|
156
|
-
["A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"] } }
|
157
|
-
]
|
158
|
-
end
|
159
|
-
end
|
160
|
-
|
161
|
-
context "with a date" do
|
162
|
-
let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
|
163
|
-
|
164
|
-
it "displays it just the date" do
|
165
|
-
expect(manifest_metadata).to eq [{ "label" => { "en" => ["Date modified"] },
|
166
|
-
"value" => { "none" => ["2011-11-11"] } }]
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
|
-
context "with a faceted option" do
|
171
|
-
let(:metadata_fields) { { creator: { render_as: :faceted } } }
|
172
|
-
let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
|
173
|
-
|
174
|
-
it "adds a link to the faceted search" do
|
175
|
-
expect(manifest_metadata). to eq [
|
176
|
-
{ "label" => { "en" => ["Creator"] },
|
177
|
-
"value" => { "none" =>
|
178
|
-
["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] } }
|
179
|
-
]
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
context "with an authority option" do
|
184
|
-
context "rights statement" do
|
185
|
-
let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
|
186
|
-
let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
|
187
|
-
|
188
|
-
it "renders a link and displays a term" do
|
189
|
-
expect(manifest_metadata).to eq [
|
190
|
-
{ "label" => { "en" => ["Rights statement"] },
|
191
|
-
"value" => { "none" => [
|
192
|
-
"<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"
|
193
|
-
] } }
|
194
|
-
]
|
195
|
-
end
|
196
|
-
end
|
197
|
-
|
198
|
-
context "license" do
|
199
|
-
let(:metadata_fields) { { license: { render_as: :license } } }
|
200
|
-
let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
|
201
|
-
|
202
|
-
it "renders a link and displays a term" do
|
203
|
-
expect(manifest_metadata).to eq [
|
204
|
-
{ "label" => { "en" => ["License"] },
|
205
|
-
"value" => { "none" => [
|
206
|
-
"<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
|
207
|
-
] } }
|
208
|
-
]
|
209
|
-
end
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
context "when the work is apart of a collection" do
|
214
|
-
let(:metadata_fields) { { collection: {} } }
|
215
|
-
let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
|
216
|
-
let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
|
217
|
-
let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
|
218
|
-
|
219
|
-
it "renders a link to the collection" do
|
220
|
-
allow(SolrDocument).to receive(:find)
|
221
|
-
allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
|
222
|
-
expect(manifest_metadata).to eq [
|
223
|
-
{ "label" => { "en" => ["Collection"] },
|
224
|
-
"value" => { "none" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] } }
|
225
|
-
]
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
context "when the value has an empty string" do
|
230
|
-
let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
|
231
|
-
|
232
|
-
it "does not map the field with an empty string" do
|
233
|
-
expect(manifest_metadata.flat_map(&:values)).not_to include({ "none" => [""] })
|
234
|
-
expect(manifest_metadata).to eq [
|
235
|
-
{ "label" => { "en" => ["Title"] }, "value" => { "none" => ["This is a title."] } }
|
236
|
-
]
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
context "when the value is an empty string" do
|
241
|
-
let(:attributes) { { "description_tesim" => [""] } }
|
242
|
-
|
243
|
-
it "returns and empty array" do
|
244
|
-
expect(manifest_metadata).to eq []
|
245
|
-
end
|
246
|
-
end
|
247
|
-
end
|
248
|
-
end
|
249
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe IiifPrint::SplitPdfs::BaseSplitter do
|
4
|
-
let(:path) { __FILE__ }
|
5
|
-
let(:splitter) { described_class.new(path) }
|
6
|
-
subject { described_class }
|
7
|
-
|
8
|
-
it { is_expected.to respond_to(:call) }
|
9
|
-
|
10
|
-
describe "instance" do
|
11
|
-
subject { splitter }
|
12
|
-
|
13
|
-
it { is_expected.to respond_to :compression }
|
14
|
-
it { is_expected.to respond_to :compression? }
|
15
|
-
it { is_expected.to respond_to :image_extension }
|
16
|
-
it { is_expected.to respond_to :quality }
|
17
|
-
end
|
18
|
-
|
19
|
-
describe '#compression' do
|
20
|
-
it 'can be changed within the instance' do
|
21
|
-
expect do
|
22
|
-
splitter.compression = 'squishy'
|
23
|
-
end.not_to change(splitter.class, :compression)
|
24
|
-
expect(splitter.compression).to eq('squishy')
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,80 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
|
-
RSpec.describe IiifPrint::SplitPdfs::DerivativeRodeoSplitter do
|
6
|
-
let(:filename) { __FILE__ }
|
7
|
-
let(:work) { double(MyWork, id: 'id-12345', aark_id: '12345') }
|
8
|
-
let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
|
9
|
-
let(:location_stub) { double(DerivativeRodeo::StorageLocations::BaseLocation, exist?: true) }
|
10
|
-
|
11
|
-
before do
|
12
|
-
allow(DerivativeRodeo::StorageLocations::BaseLocation).to receive(:from_uri).and_return(location_stub)
|
13
|
-
end
|
14
|
-
|
15
|
-
describe 'class' do
|
16
|
-
subject { described_class }
|
17
|
-
|
18
|
-
it { is_expected.to respond_to(:call) }
|
19
|
-
end
|
20
|
-
|
21
|
-
subject(:instance) { described_class.new(filename, file_set: file_set, output_tmp_dir: Dir.tmpdir) }
|
22
|
-
let(:generator) { double(DerivativeRodeo::Generators::PdfSplitGenerator, generated_files: []) }
|
23
|
-
|
24
|
-
before do
|
25
|
-
allow(file_set).to receive(:parent).and_return(work)
|
26
|
-
# TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but
|
27
|
-
# this part is only one piece of the over all integration.
|
28
|
-
allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__))
|
29
|
-
end
|
30
|
-
|
31
|
-
it { is_expected.to respond_to :split_files }
|
32
|
-
|
33
|
-
it 'uses the rodeo to split' do
|
34
|
-
expect(DerivativeRodeo::Generators::PdfSplitGenerator).to receive(:new).and_return(generator)
|
35
|
-
described_class.call(filename, file_set: file_set)
|
36
|
-
end
|
37
|
-
|
38
|
-
describe '#preprocessed_location_template' do
|
39
|
-
let(:derivative_rodeo_preprocessed_file) { IiifPrint::DerivativeRodeoService.derivative_rodeo_uri(file_set: file_set, filename: filename) }
|
40
|
-
let(:import_url) { "https://somewhere.com/that/exists.pdf" }
|
41
|
-
subject { instance.preprocessed_location_template }
|
42
|
-
|
43
|
-
context 'when the s3 file exists in the rodeo' do
|
44
|
-
it 'is that file' do
|
45
|
-
is_expected.to eq(derivative_rodeo_preprocessed_file)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
context 'when the s3 file does not exist in the rodeo and we have the local file' do
|
50
|
-
it 'is the import_url' do
|
51
|
-
expect_any_instance_of(DerivativeRodeo::Generators::CopyGenerator).not_to receive(:generated_uris)
|
52
|
-
file_set.import_url = import_url
|
53
|
-
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
|
54
|
-
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(true)
|
55
|
-
expect(subject).to eq(instance.input_uri)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
context 'when the s3 file does not exist and we do not have the input URI nor the given import url does NOT exist' do
|
60
|
-
let(:generator) { double(DerivativeRodeo::Generators::CopyGenerator, generated_uris: ["file:///generated/uri"]) }
|
61
|
-
it 'will invoke the DerivativeRodeo::Generators::CopyGenerator to bring the file locally' do
|
62
|
-
allow(DerivativeRodeo::Generators::CopyGenerator).to receive(:new).and_return(generator)
|
63
|
-
file_set.import_url = import_url
|
64
|
-
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
|
65
|
-
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(false)
|
66
|
-
|
67
|
-
expect(subject).to eq(generator.generated_uris.first)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
context "when the s3 file does not exist and we don't have a remote_url" do
|
72
|
-
it 'will use the given filename' do
|
73
|
-
file_set.import_url = nil
|
74
|
-
expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
|
75
|
-
|
76
|
-
expect(subject).to eq(nil)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
|
-
RSpec.describe IiifPrint::SplitPdfs::DestroyPdfChildWorksService do
|
6
|
-
let(:subject) { described_class.conditionally_destroy_spawned_children_of(file_set: fileset, work: work) }
|
7
|
-
|
8
|
-
let(:work) { WorkWithIiifPrintConfig.new(title: ['required title'], id: '123') }
|
9
|
-
let(:fileset) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
|
10
|
-
let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true) }
|
11
|
-
let(:pending_rel1) do
|
12
|
-
IiifPrint::PendingRelationship.new(
|
13
|
-
parent_id: work.id,
|
14
|
-
child_title: "Child of #{work.id} file.pdf page 01",
|
15
|
-
child_order: "Child of #{work.id} file.pdf page 01",
|
16
|
-
parent_model: WorkWithIiifPrintConfig,
|
17
|
-
child_model: WorkWithIiifPrintConfig,
|
18
|
-
file_id: fileset.id
|
19
|
-
)
|
20
|
-
end
|
21
|
-
let(:pending_rel2) do
|
22
|
-
IiifPrint::PendingRelationship.new(
|
23
|
-
parent_id: work.id,
|
24
|
-
child_title: "Child of #{work.id} another.pdf page 01",
|
25
|
-
child_order: "Child of #{work.id} another.pdf page 01",
|
26
|
-
parent_model: WorkWithIiifPrintConfig,
|
27
|
-
child_model: WorkWithIiifPrintConfig,
|
28
|
-
file_id: 'another'
|
29
|
-
)
|
30
|
-
end
|
31
|
-
# let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
|
32
|
-
# let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
|
33
|
-
|
34
|
-
before do
|
35
|
-
allow(fileset).to receive(:parent).and_return(work)
|
36
|
-
allow(fileset).to receive(:label).and_return('file.pdf')
|
37
|
-
allow(fileset).to receive(:mime_type).and_return('application/pdf')
|
38
|
-
end
|
39
|
-
|
40
|
-
describe 'class' do
|
41
|
-
subject { described_class }
|
42
|
-
|
43
|
-
it { is_expected.to respond_to(:conditionally_destroy_spawned_children_of) }
|
44
|
-
it { is_expected.not_to respond_to(:destroy_spawned_children) }
|
45
|
-
end
|
46
|
-
|
47
|
-
describe '#conditionally_destroy_spawned_children_of' do
|
48
|
-
context 'with child works by fileset id' do
|
49
|
-
before do
|
50
|
-
allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([child_work])
|
51
|
-
end
|
52
|
-
|
53
|
-
it 'destroys the child works' do
|
54
|
-
expect(child_work).to receive(:destroy)
|
55
|
-
subject
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
context 'with child works by title' do
|
60
|
-
before do
|
61
|
-
allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([])
|
62
|
-
allow(WorkWithIiifPrintConfig).to receive(:where).and_return([child_work])
|
63
|
-
end
|
64
|
-
|
65
|
-
it 'destroys the child works' do
|
66
|
-
expect(child_work).to receive(:destroy)
|
67
|
-
subject
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
context 'when fileset is not a PDF mimetype' do
|
72
|
-
before do
|
73
|
-
allow(fileset).to receive(:mime_type).and_return('not_pdf')
|
74
|
-
end
|
75
|
-
|
76
|
-
it 'returns with no changes' do
|
77
|
-
expect(IiifPrint::PendingRelationship).not_to receive(:where)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
context 'when IiifPrint::PendingRelationship records exist' do
|
82
|
-
before do
|
83
|
-
pending_rel1.save
|
84
|
-
pending_rel2.save
|
85
|
-
end
|
86
|
-
|
87
|
-
it 'deletes only records associated with the specific fileset PDF file' do
|
88
|
-
expect { subject }.to change(IiifPrint::PendingRelationship, :count).by(-1)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'misc_shared'
|
3
|
-
|
4
|
-
RSpec.describe IiifPrint::SplitPdfs::PagesToJpgsSplitter do
|
5
|
-
let(:path) { __FILE__ }
|
6
|
-
let(:splitter) { described_class.new(path) }
|
7
|
-
|
8
|
-
describe '#quality' do
|
9
|
-
subject { splitter.quality }
|
10
|
-
it { is_expected.to eq(described_class.quality) }
|
11
|
-
end
|
12
|
-
|
13
|
-
describe '#quality?' do
|
14
|
-
subject { splitter.quality? }
|
15
|
-
it { is_expected.to be_truthy }
|
16
|
-
end
|
17
|
-
|
18
|
-
describe '#image_extension' do
|
19
|
-
subject { splitter.image_extension }
|
20
|
-
it { is_expected.to eq('jpg') }
|
21
|
-
end
|
22
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe IiifPrint::SplitPdfs::PagesToPngsSplitter do
|
4
|
-
describe '.compression' do
|
5
|
-
subject { described_class.compression }
|
6
|
-
it { is_expected.to be_nil }
|
7
|
-
end
|
8
|
-
|
9
|
-
describe '.compression?' do
|
10
|
-
subject { described_class.compression? }
|
11
|
-
it { is_expected.to be_falsey }
|
12
|
-
end
|
13
|
-
|
14
|
-
describe '.image_extension' do
|
15
|
-
subject { described_class.image_extension }
|
16
|
-
it { is_expected.to eq('png') }
|
17
|
-
end
|
18
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
require 'misc_shared'
|
3
|
-
|
4
|
-
RSpec.describe IiifPrint::SplitPdfs::PagesToTiffsSplitter do
|
5
|
-
describe '.compression' do
|
6
|
-
subject { described_class.compression }
|
7
|
-
it { is_expected.to eq(described_class::DEFAULT_COMPRESSION) }
|
8
|
-
end
|
9
|
-
|
10
|
-
describe '.compression?' do
|
11
|
-
subject { described_class.compression? }
|
12
|
-
it { is_expected.to be_truthy }
|
13
|
-
end
|
14
|
-
|
15
|
-
describe '.image_extension' do
|
16
|
-
subject { described_class.image_extension }
|
17
|
-
it { is_expected.to eq('tiff') }
|
18
|
-
end
|
19
|
-
end
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
RSpec.describe IiifPrint::TextExtraction::AltoReader do
|
5
|
-
let(:fixture_path) do
|
6
|
-
File.join(
|
7
|
-
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
8
|
-
)
|
9
|
-
end
|
10
|
-
|
11
|
-
let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
|
12
|
-
let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
|
13
|
-
let(:minimal) { File.read(minimal_path) }
|
14
|
-
|
15
|
-
let(:reader_minimal) { described_class.new(minimal) }
|
16
|
-
let(:reader_minimal_path) { described_class.new(minimal_path) }
|
17
|
-
let(:reader_ndnp) { described_class.new(ndnp_alto_path) }
|
18
|
-
|
19
|
-
describe "reads alto" do
|
20
|
-
it "loads ALTO source" do
|
21
|
-
expect(reader_minimal_path.source).to eq reader_minimal.source
|
22
|
-
expect(reader_minimal_path.source.size).to eq 1383
|
23
|
-
expect(reader_ndnp.source.size).to eq 1_050_876
|
24
|
-
end
|
25
|
-
|
26
|
-
it "loads document stream" do
|
27
|
-
expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
|
28
|
-
expect(reader_minimal_path.doc_stream).to respond_to :text
|
29
|
-
expect(reader_minimal_path.doc_stream).to respond_to :words
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
describe "outputs text derivative formats" do
|
34
|
-
it "outputs plain text" do
|
35
|
-
# try simple flat text input
|
36
|
-
expect(reader_minimal.text).to eq "This is only a test."
|
37
|
-
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
38
|
-
# try more complex input
|
39
|
-
expect(reader_ndnp.text.size).to eq 30_519
|
40
|
-
end
|
41
|
-
|
42
|
-
it "passes args to WordCoordsBuilder and receives output" do
|
43
|
-
parsed = JSON.parse(reader_minimal.json)
|
44
|
-
expect(parsed['coords'].length).to be > 1
|
45
|
-
parsed = JSON.parse(reader_ndnp.json)
|
46
|
-
expect(parsed['coords'].size).to eq 2_125
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
require 'nokogiri'
|
3
|
-
require 'spec_helper'
|
4
|
-
|
5
|
-
RSpec.describe IiifPrint::TextExtraction::HOCRReader do
|
6
|
-
let(:fixture_path) do
|
7
|
-
File.join(
|
8
|
-
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
9
|
-
)
|
10
|
-
end
|
11
|
-
|
12
|
-
let(:minimal_path) { File.join(fixture_path, 'ocr_mono_text_hocr.html') }
|
13
|
-
let(:minimal) { File.read(minimal_path) }
|
14
|
-
|
15
|
-
let(:reader_minimal) { described_class.new(minimal) }
|
16
|
-
let(:reader_minimal_path) { described_class.new(minimal_path) }
|
17
|
-
|
18
|
-
describe "reads hOCR" do
|
19
|
-
it "loads hOCR either from path or source text" do
|
20
|
-
expect(reader_minimal_path.source).to eq reader_minimal.source
|
21
|
-
# size here is in Unicode characters, not bytes:
|
22
|
-
expect(reader_minimal_path.source.size).to eq 16_590
|
23
|
-
end
|
24
|
-
|
25
|
-
it "loads document stream" do
|
26
|
-
expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
|
27
|
-
expect(reader_minimal_path.doc_stream).to respond_to :text
|
28
|
-
expect(reader_minimal_path.doc_stream).to respond_to :words
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
describe "outputs text derivative formats" do
|
33
|
-
it "outputs plain text" do
|
34
|
-
plain_text = reader_minimal.text
|
35
|
-
expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. Rep"
|
36
|
-
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
37
|
-
expect(reader_minimal.text.size).to eq 723
|
38
|
-
end
|
39
|
-
|
40
|
-
it "passes args to WordCoordsBuilder and receives output" do
|
41
|
-
parsed = JSON.parse(reader_minimal.json)
|
42
|
-
expect(parsed['coords'].length).to be > 1
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|