iiif_print 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/Gemfile.lock +2 -2
  4. data/README.md +4 -0
  5. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +1 -1
  6. data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
  7. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +37 -22
  8. data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
  9. data/{lib → app/jobs}/iiif_print/jobs/child_works_from_pdf_job.rb +14 -9
  10. data/{lib → app/jobs}/iiif_print/jobs/create_relationships_job.rb +10 -20
  11. data/app/listeners/iiif_print/listener.rb +31 -0
  12. data/app/models/concerns/iiif_print/set_child_flag.rb +1 -1
  13. data/app/models/concerns/iiif_print/solr/document.rb +5 -3
  14. data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
  15. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  16. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +5 -2
  17. data/app/services/iiif_print/manifest_builder_service_behavior.rb +4 -2
  18. data/app/services/iiif_print/pluggable_derivative_service.rb +5 -1
  19. data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
  20. data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
  21. data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
  22. data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
  23. data/app/views/hyrax/file_sets/_show_actions.html.erb +1 -1
  24. data/config/initializers/simple_schema_loader.rb +1 -0
  25. data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
  26. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
  27. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
  28. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
  29. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +3 -3
  30. data/iiif_print.gemspec +1 -1
  31. data/lib/iiif_print/base_derivative_service.rb +13 -2
  32. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +2 -2
  33. data/lib/iiif_print/catalog_search_builder.rb +2 -2
  34. data/lib/iiif_print/configuration.rb +65 -5
  35. data/lib/iiif_print/data/fileset_helper.rb +2 -2
  36. data/lib/iiif_print/data/work_derivatives.rb +1 -1
  37. data/lib/iiif_print/engine.rb +46 -2
  38. data/lib/iiif_print/homepage_search_builder.rb +2 -2
  39. data/lib/iiif_print/jp2_derivative_service.rb +4 -1
  40. data/lib/iiif_print/lineage_service.rb +19 -6
  41. data/lib/iiif_print/pdf_derivative_service.rb +3 -1
  42. data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
  43. data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
  44. data/lib/iiif_print/persistence_layer.rb +118 -0
  45. data/lib/iiif_print/split_pdfs/base_splitter.rb +11 -0
  46. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +19 -9
  47. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +5 -16
  48. data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
  49. data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
  50. data/lib/iiif_print/tiff_derivative_service.rb +3 -1
  51. data/lib/iiif_print/version.rb +1 -1
  52. data/lib/iiif_print.rb +79 -44
  53. metadata +18 -191
  54. data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -40
  55. data/app/views/hyrax/file_sets/_actions.html.erb +0 -46
  56. data/bin/rails +0 -13
  57. data/spec/.keep.txt +0 -1
  58. data/spec/factories/ability.rb +0 -6
  59. data/spec/factories/newspaper_issue.rb +0 -7
  60. data/spec/factories/newspaper_page.rb +0 -7
  61. data/spec/factories/newspaper_page_solr_document.rb +0 -20
  62. data/spec/factories/newspaper_title.rb +0 -8
  63. data/spec/factories/uploaded_pdf_file.rb +0 -9
  64. data/spec/factories/uploaded_txt_file.rb +0 -9
  65. data/spec/factories/user.rb +0 -13
  66. data/spec/fixtures/authorities/licenses.yml +0 -4
  67. data/spec/fixtures/authorities/rights_statements.yml +0 -4
  68. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  69. data/spec/fixtures/files/4.1.07.tiff +0 -0
  70. data/spec/fixtures/files/README.md +0 -7
  71. data/spec/fixtures/files/alto-2-0.xsd +0 -714
  72. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  73. data/spec/fixtures/files/credits.md +0 -16
  74. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  75. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  76. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  77. data/spec/fixtures/files/minimal-alto.xml +0 -31
  78. data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
  79. data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
  80. data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
  81. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  82. data/spec/fixtures/files/ocr_alto.xml +0 -202
  83. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
  84. data/spec/fixtures/files/ocr_color.tiff +0 -0
  85. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  86. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  87. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  88. data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
  89. data/spec/fixtures/files/page1.tiff +0 -0
  90. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  91. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  92. data/spec/fixtures/files/thumbnail.jpg +0 -0
  93. data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
  94. data/spec/helpers/iiif_print_helper_spec.rb +0 -43
  95. data/spec/iiif_print/base_derivative_service_spec.rb +0 -28
  96. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -59
  97. data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
  98. data/spec/iiif_print/configuration_spec.rb +0 -193
  99. data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
  100. data/spec/iiif_print/data/work_file_spec.rb +0 -99
  101. data/spec/iiif_print/data/work_files_spec.rb +0 -237
  102. data/spec/iiif_print/image_tool_spec.rb +0 -109
  103. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -35
  104. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -118
  105. data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
  106. data/spec/iiif_print/lineage_service_spec.rb +0 -13
  107. data/spec/iiif_print/metadata_spec.rb +0 -249
  108. data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +0 -27
  109. data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +0 -80
  110. data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +0 -92
  111. data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +0 -22
  112. data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +0 -18
  113. data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +0 -19
  114. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
  115. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
  116. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
  117. data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
  118. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
  119. data/spec/iiif_print_spec.rb +0 -171
  120. data/spec/misc_shared.rb +0 -111
  121. data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
  122. data/spec/models/iiif_print/iiif_search_decorator_spec.rb +0 -27
  123. data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
  124. data/spec/models/solr_document_spec.rb +0 -14
  125. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -70
  126. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
  127. data/spec/samvera/derivatives/configuration_spec.rb +0 -41
  128. data/spec/samvera/derivatives/hyrax_spec.rb +0 -62
  129. data/spec/samvera/derivatives_spec.rb +0 -54
  130. data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +0 -103
  131. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
  132. data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +0 -20
  133. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
  134. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -175
  135. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
  136. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
  137. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
  138. data/spec/spec_helper.rb +0 -181
  139. data/spec/support/controller_level_helpers.rb +0 -28
  140. data/spec/support/iiif_print_models.rb +0 -127
  141. data/spec/test_app_templates/blacklight.yml +0 -9
  142. data/spec/test_app_templates/fedora.yml +0 -15
  143. data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
  144. data/spec/test_app_templates/redis.yml +0 -9
  145. data/spec/test_app_templates/solr/conf/schema.xml +0 -362
  146. data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
  147. data/spec/test_app_templates/solr.yml +0 -7
  148. /data/{lib → app/jobs}/iiif_print/jobs/request_split_pdf_job.rb +0 -0
@@ -1,249 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe IiifPrint::Metadata do
4
- let(:base_url) { "https://my.dev.test" }
5
- let(:solr_hit) { SolrHit.new(attributes) }
6
- let(:fields) { IiifPrint.default_fields(fields: metadata_fields) }
7
- let(:metadata_fields) do
8
- {
9
- title: {},
10
- description: {},
11
- date_modified: {}
12
- }
13
- end
14
-
15
- describe ".build_metadata_for" do
16
- subject(:manifest_metadata) do
17
- described_class.build_metadata_for(
18
- work: solr_hit,
19
- version: version,
20
- fields: fields,
21
- current_ability: double(Ability),
22
- base_url: base_url
23
- )
24
- end
25
-
26
- context "for version 2 of the IIIF spec" do
27
- let(:version) { 2 }
28
-
29
- context "with a field that has some plain text" do
30
- let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
31
-
32
- it "maps the metadata accordingly" do
33
- expect(manifest_metadata).to eq [
34
- { "label" => "Title", "value" => ["My Awesome Title"] }
35
- ]
36
- end
37
- end
38
-
39
- context "with a field that contains a url string" do
40
- let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
41
-
42
- it "creates a link for the url string" do
43
- expect(manifest_metadata).to eq [
44
- { "label" => "Description",
45
- "value" =>
46
- [
47
- "A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"
48
- ] }
49
- ]
50
- end
51
- end
52
-
53
- context "with a date" do
54
- let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
55
-
56
- it "displays it just the date" do
57
- expect(manifest_metadata).to eq [{ "label" => "Date modified", "value" => ["2011-11-11"] }]
58
- end
59
- end
60
-
61
- context "with a faceted option" do
62
- let(:metadata_fields) { { creator: { render_as: :faceted } } }
63
- let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
64
-
65
- it "adds a link to the faceted search" do
66
- expect(manifest_metadata).to eq [
67
- { "label" => "Creator",
68
- "value" =>
69
- ["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
70
- ]
71
- end
72
- end
73
-
74
- context "with an authority option" do
75
- context "rights statement" do
76
- let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
77
- let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
78
-
79
- it "renders a link and displays a term" do
80
- expect(manifest_metadata).to eq [
81
- { "label" => "Rights statement",
82
- "value" => ["<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"] }
83
- ]
84
- end
85
- end
86
-
87
- context "license" do
88
- let(:metadata_fields) { { license: { render_as: :license } } }
89
- let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
90
-
91
- it "renders a link and displays a term" do
92
- expect(manifest_metadata).to eq [
93
- { "label" => "License",
94
- "value" => [
95
- "<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
96
- ] }
97
- ]
98
- end
99
- end
100
- end
101
-
102
- context "when the work is apart of a collection" do
103
- let(:metadata_fields) { { collection: {} } }
104
- let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
105
- let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
106
- let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
107
-
108
- it "renders a link to the collection" do
109
- allow(SolrDocument).to receive(:find)
110
- allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
111
- expect(manifest_metadata).to eq [
112
- { "label" => "Collection",
113
- "value" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] }
114
- ]
115
- end
116
- end
117
-
118
- context "when the value has an empty string" do
119
- let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
120
-
121
- it "does not map the field with an empty string" do
122
- expect(manifest_metadata.flat_map(&:values)).not_to include([""])
123
- expect(manifest_metadata).to eq [{ "label" => "Title", "value" => ["This is a title."] }]
124
- end
125
- end
126
-
127
- context "when the value is an empty string" do
128
- let(:attributes) { { "description_tesim" => [""] } }
129
-
130
- it "returns and empty array" do
131
- expect(manifest_metadata).to eq []
132
- end
133
- end
134
- end
135
-
136
- context "for version 3 of the IIIF spec" do
137
- let(:version) { 3 }
138
-
139
- context "with a field that has some plain text" do
140
- let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
141
-
142
- # NOTE: this assumes the I18n.locale is set as :en
143
- it "maps the metadata accordingly" do
144
- expect(manifest_metadata).to eq [{ "label" => { "en" => ["Title"] },
145
- "value" => { "none" => ["My Awesome Title"] } }]
146
- end
147
- end
148
-
149
- context "with a field that contains a url string" do
150
- let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
151
-
152
- it "creates a link for the url string" do
153
- expect(manifest_metadata).to eq [
154
- { "label" => { "en" => ["Description"] },
155
- "value" => { "none" =>
156
- ["A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"] } }
157
- ]
158
- end
159
- end
160
-
161
- context "with a date" do
162
- let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
163
-
164
- it "displays it just the date" do
165
- expect(manifest_metadata).to eq [{ "label" => { "en" => ["Date modified"] },
166
- "value" => { "none" => ["2011-11-11"] } }]
167
- end
168
- end
169
-
170
- context "with a faceted option" do
171
- let(:metadata_fields) { { creator: { render_as: :faceted } } }
172
- let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
173
-
174
- it "adds a link to the faceted search" do
175
- expect(manifest_metadata). to eq [
176
- { "label" => { "en" => ["Creator"] },
177
- "value" => { "none" =>
178
- ["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] } }
179
- ]
180
- end
181
- end
182
-
183
- context "with an authority option" do
184
- context "rights statement" do
185
- let(:metadata_fields) { { rights_statement: { render_as: :rights_statement } } }
186
- let(:attributes) { { "rights_statement_tesim" => ["http://rightsstatements.org/vocab/InC-OW-EU/1.0/"] } }
187
-
188
- it "renders a link and displays a term" do
189
- expect(manifest_metadata).to eq [
190
- { "label" => { "en" => ["Rights statement"] },
191
- "value" => { "none" => [
192
- "<a href='http://rightsstatements.org/vocab/InC-OW-EU/1.0/'>In Copyright - EU Orphan Work</a>"
193
- ] } }
194
- ]
195
- end
196
- end
197
-
198
- context "license" do
199
- let(:metadata_fields) { { license: { render_as: :license } } }
200
- let(:attributes) { { "license_tesim" => ["https://creativecommons.org/licenses/by-sa/4.0/"] } }
201
-
202
- it "renders a link and displays a term" do
203
- expect(manifest_metadata).to eq [
204
- { "label" => { "en" => ["License"] },
205
- "value" => { "none" => [
206
- "<a href='https://creativecommons.org/licenses/by-sa/4.0/'>Creative Commons BY-SA Attribution-ShareAlike 4.0 International</a>"
207
- ] } }
208
- ]
209
- end
210
- end
211
- end
212
-
213
- context "when the work is apart of a collection" do
214
- let(:metadata_fields) { { collection: {} } }
215
- let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
216
- let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
217
- let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
218
-
219
- it "renders a link to the collection" do
220
- allow(SolrDocument).to receive(:find)
221
- allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
222
- expect(manifest_metadata).to eq [
223
- { "label" => { "en" => ["Collection"] },
224
- "value" => { "none" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] } }
225
- ]
226
- end
227
- end
228
-
229
- context "when the value has an empty string" do
230
- let(:attributes) { { "title_tesim" => ["This is a title."], "description_tesim" => [""] } }
231
-
232
- it "does not map the field with an empty string" do
233
- expect(manifest_metadata.flat_map(&:values)).not_to include({ "none" => [""] })
234
- expect(manifest_metadata).to eq [
235
- { "label" => { "en" => ["Title"] }, "value" => { "none" => ["This is a title."] } }
236
- ]
237
- end
238
- end
239
-
240
- context "when the value is an empty string" do
241
- let(:attributes) { { "description_tesim" => [""] } }
242
-
243
- it "returns and empty array" do
244
- expect(manifest_metadata).to eq []
245
- end
246
- end
247
- end
248
- end
249
- end
@@ -1,27 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe IiifPrint::SplitPdfs::BaseSplitter do
4
- let(:path) { __FILE__ }
5
- let(:splitter) { described_class.new(path) }
6
- subject { described_class }
7
-
8
- it { is_expected.to respond_to(:call) }
9
-
10
- describe "instance" do
11
- subject { splitter }
12
-
13
- it { is_expected.to respond_to :compression }
14
- it { is_expected.to respond_to :compression? }
15
- it { is_expected.to respond_to :image_extension }
16
- it { is_expected.to respond_to :quality }
17
- end
18
-
19
- describe '#compression' do
20
- it 'can be changed within the instance' do
21
- expect do
22
- splitter.compression = 'squishy'
23
- end.not_to change(splitter.class, :compression)
24
- expect(splitter.compression).to eq('squishy')
25
- end
26
- end
27
- end
@@ -1,80 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'spec_helper'
4
-
5
- RSpec.describe IiifPrint::SplitPdfs::DerivativeRodeoSplitter do
6
- let(:filename) { __FILE__ }
7
- let(:work) { double(MyWork, id: 'id-12345', aark_id: '12345') }
8
- let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
9
- let(:location_stub) { double(DerivativeRodeo::StorageLocations::BaseLocation, exist?: true) }
10
-
11
- before do
12
- allow(DerivativeRodeo::StorageLocations::BaseLocation).to receive(:from_uri).and_return(location_stub)
13
- end
14
-
15
- describe 'class' do
16
- subject { described_class }
17
-
18
- it { is_expected.to respond_to(:call) }
19
- end
20
-
21
- subject(:instance) { described_class.new(filename, file_set: file_set, output_tmp_dir: Dir.tmpdir) }
22
- let(:generator) { double(DerivativeRodeo::Generators::PdfSplitGenerator, generated_files: []) }
23
-
24
- before do
25
- allow(file_set).to receive(:parent).and_return(work)
26
- # TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but
27
- # this part is only one piece of the over all integration.
28
- allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__))
29
- end
30
-
31
- it { is_expected.to respond_to :split_files }
32
-
33
- it 'uses the rodeo to split' do
34
- expect(DerivativeRodeo::Generators::PdfSplitGenerator).to receive(:new).and_return(generator)
35
- described_class.call(filename, file_set: file_set)
36
- end
37
-
38
- describe '#preprocessed_location_template' do
39
- let(:derivative_rodeo_preprocessed_file) { IiifPrint::DerivativeRodeoService.derivative_rodeo_uri(file_set: file_set, filename: filename) }
40
- let(:import_url) { "https://somewhere.com/that/exists.pdf" }
41
- subject { instance.preprocessed_location_template }
42
-
43
- context 'when the s3 file exists in the rodeo' do
44
- it 'is that file' do
45
- is_expected.to eq(derivative_rodeo_preprocessed_file)
46
- end
47
- end
48
-
49
- context 'when the s3 file does not exist in the rodeo and we have the local file' do
50
- it 'is the import_url' do
51
- expect_any_instance_of(DerivativeRodeo::Generators::CopyGenerator).not_to receive(:generated_uris)
52
- file_set.import_url = import_url
53
- expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
54
- expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(true)
55
- expect(subject).to eq(instance.input_uri)
56
- end
57
- end
58
-
59
- context 'when the s3 file does not exist and we do not have the input URI nor the given import url does NOT exist' do
60
- let(:generator) { double(DerivativeRodeo::Generators::CopyGenerator, generated_uris: ["file:///generated/uri"]) }
61
- it 'will invoke the DerivativeRodeo::Generators::CopyGenerator to bring the file locally' do
62
- allow(DerivativeRodeo::Generators::CopyGenerator).to receive(:new).and_return(generator)
63
- file_set.import_url = import_url
64
- expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
65
- expect(instance).to receive(:rodeo_conformant_uri_exists?).with(instance.input_uri).and_return(false)
66
-
67
- expect(subject).to eq(generator.generated_uris.first)
68
- end
69
- end
70
-
71
- context "when the s3 file does not exist and we don't have a remote_url" do
72
- it 'will use the given filename' do
73
- file_set.import_url = nil
74
- expect(instance).to receive(:rodeo_conformant_uri_exists?).with(derivative_rodeo_preprocessed_file).and_return(false)
75
-
76
- expect(subject).to eq(nil)
77
- end
78
- end
79
- end
80
- end
@@ -1,92 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'spec_helper'
4
-
5
- RSpec.describe IiifPrint::SplitPdfs::DestroyPdfChildWorksService do
6
- let(:subject) { described_class.conditionally_destroy_spawned_children_of(file_set: fileset, work: work) }
7
-
8
- let(:work) { WorkWithIiifPrintConfig.new(title: ['required title'], id: '123') }
9
- let(:fileset) { FileSet.new.tap { |fs| fs.save!(validate: false) } }
10
- let(:child_work) { WorkWithIiifPrintConfig.new(title: ["Child of #{work.id} file.pdf page 01"], id: '456', is_child: true) }
11
- let(:pending_rel1) do
12
- IiifPrint::PendingRelationship.new(
13
- parent_id: work.id,
14
- child_title: "Child of #{work.id} file.pdf page 01",
15
- child_order: "Child of #{work.id} file.pdf page 01",
16
- parent_model: WorkWithIiifPrintConfig,
17
- child_model: WorkWithIiifPrintConfig,
18
- file_id: fileset.id
19
- )
20
- end
21
- let(:pending_rel2) do
22
- IiifPrint::PendingRelationship.new(
23
- parent_id: work.id,
24
- child_title: "Child of #{work.id} another.pdf page 01",
25
- child_order: "Child of #{work.id} another.pdf page 01",
26
- parent_model: WorkWithIiifPrintConfig,
27
- child_model: WorkWithIiifPrintConfig,
28
- file_id: 'another'
29
- )
30
- end
31
- # let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
32
- # let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
33
-
34
- before do
35
- allow(fileset).to receive(:parent).and_return(work)
36
- allow(fileset).to receive(:label).and_return('file.pdf')
37
- allow(fileset).to receive(:mime_type).and_return('application/pdf')
38
- end
39
-
40
- describe 'class' do
41
- subject { described_class }
42
-
43
- it { is_expected.to respond_to(:conditionally_destroy_spawned_children_of) }
44
- it { is_expected.not_to respond_to(:destroy_spawned_children) }
45
- end
46
-
47
- describe '#conditionally_destroy_spawned_children_of' do
48
- context 'with child works by fileset id' do
49
- before do
50
- allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([child_work])
51
- end
52
-
53
- it 'destroys the child works' do
54
- expect(child_work).to receive(:destroy)
55
- subject
56
- end
57
- end
58
-
59
- context 'with child works by title' do
60
- before do
61
- allow(WorkWithIiifPrintConfig).to receive(:where).with(split_from_pdf_id: fileset.id).and_return([])
62
- allow(WorkWithIiifPrintConfig).to receive(:where).and_return([child_work])
63
- end
64
-
65
- it 'destroys the child works' do
66
- expect(child_work).to receive(:destroy)
67
- subject
68
- end
69
- end
70
-
71
- context 'when fileset is not a PDF mimetype' do
72
- before do
73
- allow(fileset).to receive(:mime_type).and_return('not_pdf')
74
- end
75
-
76
- it 'returns with no changes' do
77
- expect(IiifPrint::PendingRelationship).not_to receive(:where)
78
- end
79
- end
80
-
81
- context 'when IiifPrint::PendingRelationship records exist' do
82
- before do
83
- pending_rel1.save
84
- pending_rel2.save
85
- end
86
-
87
- it 'deletes only records associated with the specific fileset PDF file' do
88
- expect { subject }.to change(IiifPrint::PendingRelationship, :count).by(-1)
89
- end
90
- end
91
- end
92
- end
@@ -1,22 +0,0 @@
1
- require 'spec_helper'
2
- require 'misc_shared'
3
-
4
- RSpec.describe IiifPrint::SplitPdfs::PagesToJpgsSplitter do
5
- let(:path) { __FILE__ }
6
- let(:splitter) { described_class.new(path) }
7
-
8
- describe '#quality' do
9
- subject { splitter.quality }
10
- it { is_expected.to eq(described_class.quality) }
11
- end
12
-
13
- describe '#quality?' do
14
- subject { splitter.quality? }
15
- it { is_expected.to be_truthy }
16
- end
17
-
18
- describe '#image_extension' do
19
- subject { splitter.image_extension }
20
- it { is_expected.to eq('jpg') }
21
- end
22
- end
@@ -1,18 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe IiifPrint::SplitPdfs::PagesToPngsSplitter do
4
- describe '.compression' do
5
- subject { described_class.compression }
6
- it { is_expected.to be_nil }
7
- end
8
-
9
- describe '.compression?' do
10
- subject { described_class.compression? }
11
- it { is_expected.to be_falsey }
12
- end
13
-
14
- describe '.image_extension' do
15
- subject { described_class.image_extension }
16
- it { is_expected.to eq('png') }
17
- end
18
- end
@@ -1,19 +0,0 @@
1
- require 'spec_helper'
2
- require 'misc_shared'
3
-
4
- RSpec.describe IiifPrint::SplitPdfs::PagesToTiffsSplitter do
5
- describe '.compression' do
6
- subject { described_class.compression }
7
- it { is_expected.to eq(described_class::DEFAULT_COMPRESSION) }
8
- end
9
-
10
- describe '.compression?' do
11
- subject { described_class.compression? }
12
- it { is_expected.to be_truthy }
13
- end
14
-
15
- describe '.image_extension' do
16
- subject { described_class.image_extension }
17
- it { is_expected.to eq('tiff') }
18
- end
19
- end
@@ -1,49 +0,0 @@
1
- require 'json'
2
- require 'spec_helper'
3
-
4
- RSpec.describe IiifPrint::TextExtraction::AltoReader do
5
- let(:fixture_path) do
6
- File.join(
7
- IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
8
- )
9
- end
10
-
11
- let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
12
- let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
13
- let(:minimal) { File.read(minimal_path) }
14
-
15
- let(:reader_minimal) { described_class.new(minimal) }
16
- let(:reader_minimal_path) { described_class.new(minimal_path) }
17
- let(:reader_ndnp) { described_class.new(ndnp_alto_path) }
18
-
19
- describe "reads alto" do
20
- it "loads ALTO source" do
21
- expect(reader_minimal_path.source).to eq reader_minimal.source
22
- expect(reader_minimal_path.source.size).to eq 1383
23
- expect(reader_ndnp.source.size).to eq 1_050_876
24
- end
25
-
26
- it "loads document stream" do
27
- expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
28
- expect(reader_minimal_path.doc_stream).to respond_to :text
29
- expect(reader_minimal_path.doc_stream).to respond_to :words
30
- end
31
- end
32
-
33
- describe "outputs text derivative formats" do
34
- it "outputs plain text" do
35
- # try simple flat text input
36
- expect(reader_minimal.text).to eq "This is only a test."
37
- expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
38
- # try more complex input
39
- expect(reader_ndnp.text.size).to eq 30_519
40
- end
41
-
42
- it "passes args to WordCoordsBuilder and receives output" do
43
- parsed = JSON.parse(reader_minimal.json)
44
- expect(parsed['coords'].length).to be > 1
45
- parsed = JSON.parse(reader_ndnp.json)
46
- expect(parsed['coords'].size).to eq 2_125
47
- end
48
- end
49
- end
@@ -1,45 +0,0 @@
1
- require 'json'
2
- require 'nokogiri'
3
- require 'spec_helper'
4
-
5
- RSpec.describe IiifPrint::TextExtraction::HOCRReader do
6
- let(:fixture_path) do
7
- File.join(
8
- IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
9
- )
10
- end
11
-
12
- let(:minimal_path) { File.join(fixture_path, 'ocr_mono_text_hocr.html') }
13
- let(:minimal) { File.read(minimal_path) }
14
-
15
- let(:reader_minimal) { described_class.new(minimal) }
16
- let(:reader_minimal_path) { described_class.new(minimal_path) }
17
-
18
- describe "reads hOCR" do
19
- it "loads hOCR either from path or source text" do
20
- expect(reader_minimal_path.source).to eq reader_minimal.source
21
- # size here is in Unicode characters, not bytes:
22
- expect(reader_minimal_path.source.size).to eq 16_590
23
- end
24
-
25
- it "loads document stream" do
26
- expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
27
- expect(reader_minimal_path.doc_stream).to respond_to :text
28
- expect(reader_minimal_path.doc_stream).to respond_to :words
29
- end
30
- end
31
-
32
- describe "outputs text derivative formats" do
33
- it "outputs plain text" do
34
- plain_text = reader_minimal.text
35
- expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. Rep"
36
- expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
37
- expect(reader_minimal.text.size).to eq 723
38
- end
39
-
40
- it "passes args to WordCoordsBuilder and receives output" do
41
- parsed = JSON.parse(reader_minimal.json)
42
- expect(parsed['coords'].length).to be > 1
43
- end
44
- end
45
- end