iiif_print 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.env +5 -0
- data/.fcrepo_wrapper +4 -0
- data/.github/release.yml +20 -0
- data/.github/workflows/branches.yml +24 -0
- data/.github/workflows/build-lint-test-action.yaml +33 -0
- data/.github/workflows/release_labels.yml +25 -0
- data/.gitignore +52 -0
- data/.rubocop.yml +177 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +49 -0
- data/CONTRIBUTING.md +181 -0
- data/Dockerfile +15 -0
- data/Gemfile +52 -0
- data/LICENSE +203 -0
- data/README.md +203 -0
- data/Rakefile +38 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
- data/app/assets/config/iiif_print_manifest.js +2 -0
- data/app/assets/images/iiif_print/.keep +0 -0
- data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/iiif_print.js +3 -0
- data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
- data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
- data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
- data/app/helpers/hyrax/iiif_helper.rb +22 -0
- data/app/helpers/iiif_print/application_helper.rb +5 -0
- data/app/helpers/iiif_print_helper.rb +64 -0
- data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
- data/app/mailers/iiif_print/application_mailer.rb +8 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
- data/app/models/concerns/iiif_print/solr/document.rb +47 -0
- data/app/models/iiif_print/application_record.rb +6 -0
- data/app/models/iiif_print/derivative_attachment.rb +8 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
- data/app/models/iiif_print/ingest_file_relation.rb +14 -0
- data/app/models/iiif_print/pending_relationship.rb +7 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
- data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
- data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
- data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/hyrax/base/_representative_media.html.erb +9 -0
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/iiif_print.de.yml +148 -0
- data/config/locales/iiif_print.en.yml +119 -0
- data/config/locales/iiif_print.es.yml +148 -0
- data/config/locales/iiif_print.fr.yml +149 -0
- data/config/locales/iiif_print.it.yml +142 -0
- data/config/locales/iiif_print.pt-BR.yml +148 -0
- data/config/locales/iiif_print.zh.yml +142 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
- data/docker-compose.yml +129 -0
- data/iiif_print.gemspec +43 -0
- data/lib/generators/iiif_print/assets_generator.rb +29 -0
- data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
- data/lib/generators/iiif_print/install_generator.rb +52 -0
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
- data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
- data/lib/iiif_print/base_derivative_service.rb +113 -0
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
- data/lib/iiif_print/catalog_search_builder.rb +31 -0
- data/lib/iiif_print/configuration.rb +99 -0
- data/lib/iiif_print/data/fileset_helper.rb +25 -0
- data/lib/iiif_print/data/path_helper.rb +40 -0
- data/lib/iiif_print/data/work_derivatives.rb +323 -0
- data/lib/iiif_print/data/work_file.rb +92 -0
- data/lib/iiif_print/data/work_files.rb +199 -0
- data/lib/iiif_print/data.rb +35 -0
- data/lib/iiif_print/engine.rb +77 -0
- data/lib/iiif_print/errors.rb +9 -0
- data/lib/iiif_print/image_tool.rb +119 -0
- data/lib/iiif_print/jobs/application_job.rb +8 -0
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
- data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
- data/lib/iiif_print/jp2_derivative_service.rb +118 -0
- data/lib/iiif_print/jp2_image_metadata.rb +81 -0
- data/lib/iiif_print/lineage_service.rb +41 -0
- data/lib/iiif_print/metadata.rb +125 -0
- data/lib/iiif_print/pdf_derivative_service.rb +42 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
- data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
- data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
- data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
- data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
- data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
- data/lib/iiif_print/text_extraction.rb +11 -0
- data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
- data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
- data/lib/iiif_print/tiff_derivative_service.rb +50 -0
- data/lib/iiif_print/version.rb +3 -0
- data/lib/iiif_print/works_controller_behavior.rb +9 -0
- data/lib/iiif_print.rb +136 -0
- data/lib/tasks/set_child_works.rake +22 -0
- data/spec/.keep.txt +1 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/uploaded_txt_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
- data/spec/helpers/iiif_print_helper_spec.rb +43 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
- data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
- data/spec/iiif_print/configuration_spec.rb +67 -0
- data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
- data/spec/iiif_print/data/work_file_spec.rb +99 -0
- data/spec/iiif_print/data/work_files_spec.rb +237 -0
- data/spec/iiif_print/image_tool_spec.rb +109 -0
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
- data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
- data/spec/iiif_print/lineage_service_spec.rb +13 -0
- data/spec/iiif_print/metadata_spec.rb +115 -0
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
- data/spec/iiif_print_spec.rb +51 -0
- data/spec/misc_shared.rb +111 -0
- data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
- data/spec/spec_helper.rb +181 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/support/iiif_print_models.rb +127 -0
- data/spec/test_app_templates/blacklight.yml +9 -0
- data/spec/test_app_templates/fedora.yml +15 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/test_app_templates/redis.yml +9 -0
- data/spec/test_app_templates/solr/conf/schema.xml +362 -0
- data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
- data/spec/test_app_templates/solr.yml +7 -0
- data/tasks/iiif_print_dev.rake +34 -0
- data/tmp/.keep +0 -0
- metadata +605 -0
@@ -0,0 +1,237 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'misc_shared'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::Data::WorkFiles do
|
5
|
+
include_context "shared setup"
|
6
|
+
|
7
|
+
let(:work) { work_with_file }
|
8
|
+
let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
|
9
|
+
let(:tiff_uri) { 'file://' + File.expand_path(tiff_path) }
|
10
|
+
|
11
|
+
describe "adapter composition" do
|
12
|
+
it "adapts work" do
|
13
|
+
adapter = described_class.new(work)
|
14
|
+
expect(adapter.work).to be work
|
15
|
+
end
|
16
|
+
|
17
|
+
it "adapts work with 'of' alt constructor" do
|
18
|
+
adapter = described_class.of(work)
|
19
|
+
expect(adapter.work).to be work
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "path assignment queueing" do
|
24
|
+
it "queues assigned file path" do
|
25
|
+
adapter = described_class.of(work)
|
26
|
+
expect(adapter.assigned).to be_empty
|
27
|
+
# assign a valid source path
|
28
|
+
adapter.assign(tiff_path)
|
29
|
+
expect(adapter.assigned).to include tiff_path
|
30
|
+
end
|
31
|
+
|
32
|
+
it "will fail to assign file in non registered dir" do
|
33
|
+
adapter = described_class.new(work)
|
34
|
+
# need a non-registered file that exists:
|
35
|
+
bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
|
36
|
+
expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "queues a file:/// URI" do
|
40
|
+
adapter = described_class.of(work)
|
41
|
+
expect(adapter.assigned).to be_empty
|
42
|
+
adapter.assign(tiff_uri)
|
43
|
+
expect(adapter.assigned).to include tiff_uri
|
44
|
+
end
|
45
|
+
|
46
|
+
it "queues a Pathname, normalized to string" do
|
47
|
+
adapter = described_class.of(work)
|
48
|
+
expect(adapter.assigned).to be_empty
|
49
|
+
adapter.assign(Pathname.new(tiff_path))
|
50
|
+
expect(adapter.assigned).to include tiff_path
|
51
|
+
end
|
52
|
+
|
53
|
+
it "unqueues a queued path" do
|
54
|
+
adapter = described_class.of(work)
|
55
|
+
adapter.assign(tiff_path)
|
56
|
+
expect(adapter.assigned).to include tiff_path
|
57
|
+
adapter.unassign(tiff_path)
|
58
|
+
expect(adapter.assigned).to be_empty
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "hash/mapping-like file enumeration" do
|
63
|
+
it "has expected WorkFile in values for work" do
|
64
|
+
adapter = described_class.of(work)
|
65
|
+
values = adapter.values
|
66
|
+
expect(values).to be_an Array
|
67
|
+
expect(values.size).to eq 1
|
68
|
+
expect(values[0]).to be_an IiifPrint::Data::WorkFile
|
69
|
+
expect(values[0].parent).to be adapter
|
70
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
71
|
+
expect(values[0].fileset).to eq first_fileset
|
72
|
+
expect(values[0].unwrapped).to be_a Hydra::PCDM::File
|
73
|
+
end
|
74
|
+
|
75
|
+
it "has expected fileset keys for work" do
|
76
|
+
adapter = described_class.of(work)
|
77
|
+
keys = adapter.keys
|
78
|
+
expect(keys).to be_an Array
|
79
|
+
expect(keys[0]).to be_a String
|
80
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
81
|
+
expect(keys[0]).to eq first_fileset.id
|
82
|
+
end
|
83
|
+
|
84
|
+
it "has expected entries for work" do
|
85
|
+
adapter = described_class.of(work)
|
86
|
+
entries = adapter.entries
|
87
|
+
expect(entries).to be_an Array
|
88
|
+
expect(entries[0]).to be_an Array
|
89
|
+
expect(entries[0].size).to eq 2
|
90
|
+
expect(entries[0][0]).to eq adapter.keys[0]
|
91
|
+
expect(entries[0][1]).to eq adapter.values[0]
|
92
|
+
end
|
93
|
+
|
94
|
+
it "gets work file by fileset id" do
|
95
|
+
adapter = described_class.of(work)
|
96
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
97
|
+
fsid = adapter.keys[0]
|
98
|
+
expect(fsid).to eq first_fileset.id
|
99
|
+
work_file = adapter.get(fsid)
|
100
|
+
expect(work_file.unwrapped).to eq first_fileset.original_file
|
101
|
+
work_file = adapter[fsid]
|
102
|
+
expect(work_file.unwrapped).to eq first_fileset.original_file
|
103
|
+
end
|
104
|
+
|
105
|
+
it "gets work file by work-local filename" do
|
106
|
+
adapter = described_class.of(work)
|
107
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
108
|
+
name = first_fileset.original_file.original_name
|
109
|
+
work_file = adapter.get(name)
|
110
|
+
expect(work_file).to eq adapter.get(first_fileset.id)
|
111
|
+
end
|
112
|
+
|
113
|
+
it "verifies inclusion of fileset id key" do
|
114
|
+
adapter = described_class.of(work)
|
115
|
+
fsid = adapter.keys[0]
|
116
|
+
expect(adapter.include?(fsid)).to be true
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "assignment state" do
|
121
|
+
it "has empty state for work with no files" do
|
122
|
+
bare_work = MyWork.new
|
123
|
+
bare_work.title = ['No files to see here']
|
124
|
+
bare_work.save!
|
125
|
+
adapter = described_class.of(bare_work)
|
126
|
+
expect(adapter.keys.empty?).to be true
|
127
|
+
expect(adapter.state).to eq 'empty'
|
128
|
+
end
|
129
|
+
|
130
|
+
it "has 'dirty' state when files assigned" do
|
131
|
+
adapter = described_class.of(work)
|
132
|
+
expect(adapter.state).to eq 'saved'
|
133
|
+
adapter.assign(tiff_path)
|
134
|
+
# changes to dirty
|
135
|
+
expect(adapter.state).to eq 'dirty'
|
136
|
+
# unassign path again to empty assigned queue:
|
137
|
+
adapter.unassign(tiff_path)
|
138
|
+
# no we are back to 'saved' since no changes are queued now:
|
139
|
+
expect(adapter.state).to eq 'saved'
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
describe "commits changes" do
|
144
|
+
# We need to register these jobs to run now, at minimum:
|
145
|
+
do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
|
146
|
+
# These we skip: [CharacterizeJob, CreateDerivativesJob]
|
147
|
+
# -- skipping these saves 10-15 seconds on attachment example
|
148
|
+
|
149
|
+
permission_methods = [
|
150
|
+
:edit_users,
|
151
|
+
:read_users,
|
152
|
+
:discover_users,
|
153
|
+
:edit_groups,
|
154
|
+
:read_groups,
|
155
|
+
:discover_groups
|
156
|
+
]
|
157
|
+
|
158
|
+
let(:bare_work) do
|
159
|
+
bare_work = MyWork.new
|
160
|
+
bare_work.title = ['No files to see here']
|
161
|
+
bare_work.save!
|
162
|
+
bare_work
|
163
|
+
end
|
164
|
+
|
165
|
+
it "commits unassign (file deletions)" do
|
166
|
+
adapter = described_class.of(work)
|
167
|
+
expect(adapter.keys.size).to eq 1
|
168
|
+
adapter.unassign(adapter.keys[0])
|
169
|
+
adapter.commit!
|
170
|
+
expect(adapter.keys.size).to eq 0
|
171
|
+
expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 0
|
172
|
+
end
|
173
|
+
|
174
|
+
context "when it is a new work" do
|
175
|
+
it "commit for assignment invokes actor stack" do
|
176
|
+
work = MyWork.new(title: ['Just a new work'])
|
177
|
+
adapter = described_class.of(work)
|
178
|
+
adapter.assign(tiff_path)
|
179
|
+
allow(Hyrax::CurationConcern.actor).to receive(:create).and_return(true)
|
180
|
+
expect(Hyrax::CurationConcern.actor).to receive(:create)
|
181
|
+
expect(adapter.commit!).to be true
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
context "when the work already exists" do
|
186
|
+
it "commit for assignment invokes actor stack" do
|
187
|
+
work = bare_work
|
188
|
+
adapter = described_class.of(work)
|
189
|
+
adapter.assign(tiff_path)
|
190
|
+
allow(Hyrax::CurationConcern.actor).to receive(:update).and_return(true)
|
191
|
+
expect(Hyrax::CurationConcern.actor).to receive(:update)
|
192
|
+
expect(adapter.commit!).to be true
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
xit "commits successful file attachment", perform_enqueued: do_now_jobs do
|
197
|
+
work = bare_work
|
198
|
+
adapter = described_class.of(work)
|
199
|
+
adapter.assign(tiff_path)
|
200
|
+
adapter.commit!
|
201
|
+
# registered jobs (do_now_jobs) performed as effect of commit!
|
202
|
+
# are configured to effectively run inline. Reloading work
|
203
|
+
# should refresh the work.members, and by consequence adapter.keys
|
204
|
+
work.reload
|
205
|
+
expect(adapter.keys.size).to eq 1
|
206
|
+
expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 1
|
207
|
+
expect(adapter.names).to include 'ocr_gray.tiff'
|
208
|
+
end
|
209
|
+
|
210
|
+
xit "copies work perimssions to fileset", perform_enqueued: do_now_jobs do
|
211
|
+
adapter = described_class.of(bare_work)
|
212
|
+
adapter.assign(tiff_path)
|
213
|
+
adapter.commit!
|
214
|
+
bare_work.reload
|
215
|
+
fileset = bare_work.members.detect { |m| m.is_a?(FileSet) }
|
216
|
+
permission_methods.each do |m|
|
217
|
+
expect(fileset.send(m)).to match_array bare_work.send(m)
|
218
|
+
end
|
219
|
+
expect(fileset.visibility).to eq bare_work.visibility
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
describe "derivative access" do
|
224
|
+
it "gets derivatives for first fileset" do
|
225
|
+
fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
226
|
+
adapter = described_class.of(work)
|
227
|
+
# adapts same context(s):
|
228
|
+
expect(adapter.derivatives.fileset.id).to eq fileset.id
|
229
|
+
expect(adapter.derivatives.work).to be work
|
230
|
+
expect(adapter.derivatives.class).to eq \
|
231
|
+
IiifPrint::Data::WorkDerivatives
|
232
|
+
# transitive parent/child relationship, can traverse to adapter from
|
233
|
+
# derivatives:
|
234
|
+
expect(adapter.derivatives.parent.parent).to be adapter
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tmpdir'
|
3
|
+
|
4
|
+
describe IiifPrint::ImageTool do
|
5
|
+
let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
|
6
|
+
|
7
|
+
# Image fixtures to test identification, metadata extraction for:
|
8
|
+
let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
|
9
|
+
let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
|
10
|
+
let(:gray_tiff) { File.join(fixtures, 'ocr_gray.tiff') }
|
11
|
+
let(:mono_tiff) { File.join(fixtures, 'ocr_mono.tiff') }
|
12
|
+
let(:color_tiff) { File.join(fixtures, '4.1.07.tiff') }
|
13
|
+
let(:pdf) { File.join(fixtures, 'minimal-1-page.pdf') }
|
14
|
+
|
15
|
+
describe "Extracts metadata with JP2 backend" do
|
16
|
+
it "constructs with a path" do
|
17
|
+
identify = described_class.new(gray_jp2)
|
18
|
+
expect(identify.path).to eq gray_jp2
|
19
|
+
end
|
20
|
+
|
21
|
+
it "gets metadata for grayscale JP2 image" do
|
22
|
+
result = described_class.new(gray_jp2).metadata
|
23
|
+
expect(result[:color]).to eq 'gray'
|
24
|
+
expect(result[:width]).to eq 418
|
25
|
+
expect(result[:height]).to eq 1046
|
26
|
+
expect(result[:bits_per_component]).to eq 8
|
27
|
+
expect(result[:num_components]).to eq 1
|
28
|
+
end
|
29
|
+
|
30
|
+
it "gets metadata for color JP2 image" do
|
31
|
+
result = described_class.new(color_jp2).metadata
|
32
|
+
expect(result[:color]).to eq 'color'
|
33
|
+
expect(result[:width]).to eq 256
|
34
|
+
expect(result[:height]).to eq 256
|
35
|
+
expect(result[:bits_per_component]).to eq 8
|
36
|
+
# e.g. is 3, but would be four if sample image had an alpha channel
|
37
|
+
expect(result[:num_components]).to eq 3
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "Extracts metadata for non-JP2 images with imagemagick" do
|
42
|
+
it "gets metadata for gray TIFF image" do
|
43
|
+
result = described_class.new(gray_tiff).metadata
|
44
|
+
expect(result[:color]).to eq 'gray'
|
45
|
+
expect(result[:width]).to eq 418
|
46
|
+
expect(result[:height]).to eq 1046
|
47
|
+
expect(result[:bits_per_component]).to eq 8
|
48
|
+
expect(result[:num_components]).to eq 1
|
49
|
+
end
|
50
|
+
|
51
|
+
it "gets metadata for monochrome TIFF image" do
|
52
|
+
result = described_class.new(mono_tiff).metadata
|
53
|
+
expect(result[:color]).to eq 'monochrome'
|
54
|
+
expect(result[:width]).to eq 1261
|
55
|
+
expect(result[:height]).to eq 1744
|
56
|
+
expect(result[:bits_per_component]).to eq 1
|
57
|
+
expect(result[:num_components]).to eq 1
|
58
|
+
end
|
59
|
+
|
60
|
+
it "gets metadata for color TIFF image" do
|
61
|
+
result = described_class.new(color_tiff).metadata
|
62
|
+
expect(result[:color]).to eq 'color'
|
63
|
+
expect(result[:width]).to eq 256
|
64
|
+
expect(result[:height]).to eq 256
|
65
|
+
expect(result[:bits_per_component]).to eq 8
|
66
|
+
# e.g. is 3, but would be four if sample image had an alpha channel
|
67
|
+
expect(result[:num_components]).to eq 3
|
68
|
+
end
|
69
|
+
|
70
|
+
it "detects mime type of pdf" do
|
71
|
+
result = described_class.new(pdf).metadata
|
72
|
+
expect(result[:content_type]).to eq 'application/pdf'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "converts images" do
|
77
|
+
it "makes a monochrome TIFF from JP2" do
|
78
|
+
tool = described_class.new(gray_jp2)
|
79
|
+
dest = File.join(Dir.mktmpdir, 'mono.tif')
|
80
|
+
tool.convert(dest, true)
|
81
|
+
expect(File.exist?(dest)).to be true
|
82
|
+
expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
|
83
|
+
end
|
84
|
+
|
85
|
+
it "makes a gray TIFF from JP2" do
|
86
|
+
tool = described_class.new(gray_jp2)
|
87
|
+
dest = File.join(Dir.mktmpdir, 'gray.tif')
|
88
|
+
tool.convert(dest, false)
|
89
|
+
expect(File.exist?(dest)).to be true
|
90
|
+
expect(described_class.new(dest).metadata[:color]).to eq 'gray'
|
91
|
+
end
|
92
|
+
|
93
|
+
it "makes a monochrome TIFF from grayscale TIFF" do
|
94
|
+
tool = described_class.new(gray_tiff)
|
95
|
+
dest = File.join(Dir.mktmpdir, 'mono.tif')
|
96
|
+
tool.convert(dest, true)
|
97
|
+
expect(File.exist?(dest)).to be true
|
98
|
+
expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
|
99
|
+
end
|
100
|
+
|
101
|
+
# Not yet supported to use this tool to make JP2, for now the only
|
102
|
+
# component in IiifPrint doing that is
|
103
|
+
# IiifPrint::JP2DerivativeService
|
104
|
+
it "raises error on JP2 destination" do
|
105
|
+
expect { described_class.new(gray_tiff).convert('out.jp2') }.to \
|
106
|
+
raise_error(RuntimeError)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'misc_shared'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::Jobs::ChildWorksFromPdfJob do
|
5
|
+
# TODO: add specs
|
6
|
+
let(:work) { WorkWithIiifPrintConfig.new(title: ['required title']) }
|
7
|
+
let(:my_user) { build(:user) }
|
8
|
+
let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
|
9
|
+
let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
|
10
|
+
let(:pdf_paths) do
|
11
|
+
uploads = Hyrax::UploadedFile.find(uploaded_file_ids)
|
12
|
+
upload_paths = uploads.map { |upload| upload.file.file.file }
|
13
|
+
upload_paths.select { |path| path.end_with?('.pdf', '.PDF') }
|
14
|
+
end
|
15
|
+
let(:admin_set_id) { "admin_set/default" }
|
16
|
+
let(:prior_pdfs) { 0 }
|
17
|
+
|
18
|
+
let(:subject) { described_class.perform(work, paths, user, admin_set_id, prior_pdfs) }
|
19
|
+
|
20
|
+
describe '#perform' do
|
21
|
+
xit 'calls pdf splitter service with path' do
|
22
|
+
end
|
23
|
+
|
24
|
+
xit 'submits one BatchCreateJob per PDF' do
|
25
|
+
end
|
26
|
+
|
27
|
+
xit 'submits IiifPrint::Jobs::CreateRelationshipsJob' do
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'misc_shared'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::Jobs::CreateRelationshipsJob do
|
5
|
+
# TODO: add specs
|
6
|
+
let(:parent) { WorkWithIiifPrintConfig.new(title: ['required title']) }
|
7
|
+
let(:my_user) { build(:user) }
|
8
|
+
let(:parent_model) { WorkWithIiifPrintConfig }
|
9
|
+
let(:child_model) { WorkWithIiifPrintConfig }
|
10
|
+
|
11
|
+
let(:subject) { described_class.perform(user: my_user, parent_id: parent.id, parent_model: parent_model, child_model: child_model) }
|
12
|
+
|
13
|
+
describe '#perform' do
|
14
|
+
xit 'loads all child work ids into ordered_members' do
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe IiifPrint::JP2ImageMetadata do
|
4
|
+
let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
|
5
|
+
|
6
|
+
let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
|
7
|
+
|
8
|
+
let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
|
9
|
+
|
10
|
+
describe "Extracts technical metadata from a JP2 file" do
|
11
|
+
it "constructs with a path" do
|
12
|
+
meta = described_class.new(gray_jp2)
|
13
|
+
expect(meta.path).to eq gray_jp2
|
14
|
+
end
|
15
|
+
|
16
|
+
it "gets metadata for grayscale image" do
|
17
|
+
meta = described_class.new(gray_jp2)
|
18
|
+
result = meta.technical_metadata
|
19
|
+
expect(result[:color]).to eq 'gray'
|
20
|
+
expect(result[:width]).to eq 418
|
21
|
+
expect(result[:height]).to eq 1046
|
22
|
+
expect(result[:bits_per_component]).to eq 8
|
23
|
+
expect(result[:num_components]).to eq 1
|
24
|
+
end
|
25
|
+
|
26
|
+
it "gets metadata for color image" do
|
27
|
+
meta = described_class.new(color_jp2)
|
28
|
+
result = meta.technical_metadata
|
29
|
+
expect(result[:color]).to eq 'color'
|
30
|
+
expect(result[:width]).to eq 256
|
31
|
+
expect(result[:height]).to eq 256
|
32
|
+
expect(result[:bits_per_component]).to eq 8
|
33
|
+
# e.g. is 3, but would be four if sample image had an alpha channel
|
34
|
+
expect(result[:num_components]).to eq 3
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe IiifPrint::Metadata do
|
4
|
+
let(:base_url) { "https://my.dev.test" }
|
5
|
+
let(:solr_document) { SolrDocument.new(attributes) }
|
6
|
+
let(:fields) do
|
7
|
+
metadata_fields.map do |field|
|
8
|
+
SampleField.new(
|
9
|
+
name: field.first,
|
10
|
+
label: Hyrax::Renderers::AttributeRenderer.new(field, nil).label,
|
11
|
+
options: field.last
|
12
|
+
)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
let(:metadata_fields) do
|
16
|
+
{
|
17
|
+
title: {},
|
18
|
+
description: {},
|
19
|
+
date_modified: {}
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
SampleField = Struct.new(:name, :label, :options, keyword_init: true)
|
24
|
+
|
25
|
+
describe ".build_metadata_for" do
|
26
|
+
subject(:manifest_metadata) do
|
27
|
+
described_class.build_metadata_for(
|
28
|
+
work: solr_document,
|
29
|
+
version: version,
|
30
|
+
fields: fields,
|
31
|
+
current_ability: double(Ability),
|
32
|
+
base_url: base_url
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
context "for version 2 of the IIIF spec" do
|
37
|
+
let(:version) { 2 }
|
38
|
+
|
39
|
+
context "with a field that has some plain text" do
|
40
|
+
let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
|
41
|
+
|
42
|
+
it "maps the metadata accordingly" do
|
43
|
+
expect(manifest_metadata).to eq [
|
44
|
+
{ "label" => "Title", "value" => ["My Awesome Title"] }
|
45
|
+
]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "with a field that contains a url string" do
|
50
|
+
let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
|
51
|
+
|
52
|
+
it "creates a link for the url string" do
|
53
|
+
expect(manifest_metadata).to eq [
|
54
|
+
{ "label" => "Description",
|
55
|
+
"value" =>
|
56
|
+
[
|
57
|
+
"A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"
|
58
|
+
] }
|
59
|
+
]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context "with a date" do
|
64
|
+
let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
|
65
|
+
|
66
|
+
it "displays it just the date" do
|
67
|
+
expect(manifest_metadata).to eq [{ "label" => "Date modified", "value" => ["2011-11-11"] }]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "with a faceted option" do
|
72
|
+
let(:metadata_fields) { { creator: { render_as: :faceted } } }
|
73
|
+
let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
|
74
|
+
|
75
|
+
it "adds a link to the faceted search" do
|
76
|
+
expect(manifest_metadata). to eq [
|
77
|
+
{ "label" => "Creator",
|
78
|
+
"value" =>
|
79
|
+
["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
|
80
|
+
]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context "when the work is apart of a collection" do
|
85
|
+
let(:metadata_fields) { { collection: {} } }
|
86
|
+
let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
|
87
|
+
let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
|
88
|
+
let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
|
89
|
+
|
90
|
+
it "renders a link to the collection" do
|
91
|
+
allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
|
92
|
+
expect(manifest_metadata).to eq [
|
93
|
+
{ "label" => "Collection",
|
94
|
+
"value" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] }
|
95
|
+
]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context "for version 3 of the IIIF spec", skip: "version 3 metadata not implemented yet" do
|
101
|
+
let(:version) { 3 }
|
102
|
+
|
103
|
+
it "maps the metadata accordingly" do
|
104
|
+
# NOTE: this assumes the I18n.locale is set as :en
|
105
|
+
expect(manifest_metadata).to eq [
|
106
|
+
{ "label" => { "en" => ["Title"] }, "value" => { "none" => ["My Awesome Title"] } },
|
107
|
+
{ "label" => { "en" => ["Description"] },
|
108
|
+
"value" => { "none" => ["This is and awesome description"] } },
|
109
|
+
{ "label" => { "en" => ["Date modified"] }, "value" => { "none" => ["2011-11-11"] } },
|
110
|
+
{ "label" => { "en" => ["Creator"] }, "value" => { "none" => ["McAuthor, Arthur"] } }
|
111
|
+
]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::TextExtraction::AltoReader do
|
5
|
+
let(:fixture_path) do
|
6
|
+
File.join(
|
7
|
+
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
|
12
|
+
let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
|
13
|
+
let(:minimal) { File.read(minimal_path) }
|
14
|
+
|
15
|
+
let(:reader_minimal) { described_class.new(minimal) }
|
16
|
+
let(:reader_minimal_path) { described_class.new(minimal_path) }
|
17
|
+
let(:reader_ndnp) { described_class.new(ndnp_alto_path) }
|
18
|
+
|
19
|
+
describe "reads alto" do
|
20
|
+
it "loads ALTO source" do
|
21
|
+
expect(reader_minimal_path.source).to eq reader_minimal.source
|
22
|
+
expect(reader_minimal_path.source.size).to eq 1383
|
23
|
+
expect(reader_ndnp.source.size).to eq 1_050_876
|
24
|
+
end
|
25
|
+
|
26
|
+
it "loads document stream" do
|
27
|
+
expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
|
28
|
+
expect(reader_minimal_path.doc_stream).to respond_to :text
|
29
|
+
expect(reader_minimal_path.doc_stream).to respond_to :words
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "outputs text derivative formats" do
|
34
|
+
it "outputs plain text" do
|
35
|
+
# try simple flat text input
|
36
|
+
expect(reader_minimal.text).to eq "This is only a test."
|
37
|
+
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
38
|
+
# try more complex input
|
39
|
+
expect(reader_ndnp.text.size).to eq 30_519
|
40
|
+
end
|
41
|
+
|
42
|
+
it "passes args to WordCoordsBuilder and receives output" do
|
43
|
+
parsed = JSON.parse(reader_minimal.json)
|
44
|
+
expect(parsed['coords'].length).to be > 1
|
45
|
+
parsed = JSON.parse(reader_ndnp.json)
|
46
|
+
expect(parsed['coords'].size).to eq 2_125
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe IiifPrint::TextExtraction::HOCRReader do
|
6
|
+
let(:fixture_path) do
|
7
|
+
File.join(
|
8
|
+
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
9
|
+
)
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:minimal_path) { File.join(fixture_path, 'ocr_mono_text_hocr.html') }
|
13
|
+
let(:minimal) { File.read(minimal_path) }
|
14
|
+
|
15
|
+
let(:reader_minimal) { described_class.new(minimal) }
|
16
|
+
let(:reader_minimal_path) { described_class.new(minimal_path) }
|
17
|
+
|
18
|
+
describe "reads hOCR" do
|
19
|
+
it "loads hOCR either from path or source text" do
|
20
|
+
expect(reader_minimal_path.source).to eq reader_minimal.source
|
21
|
+
# size here is in Unicode characters, not bytes:
|
22
|
+
expect(reader_minimal_path.source.size).to eq 16_590
|
23
|
+
end
|
24
|
+
|
25
|
+
it "loads document stream" do
|
26
|
+
expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
|
27
|
+
expect(reader_minimal_path.doc_stream).to respond_to :text
|
28
|
+
expect(reader_minimal_path.doc_stream).to respond_to :words
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "outputs text derivative formats" do
|
33
|
+
it "outputs plain text" do
|
34
|
+
plain_text = reader_minimal.text
|
35
|
+
expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. "
|
36
|
+
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
37
|
+
expect(reader_minimal.text.size).to eq 831
|
38
|
+
end
|
39
|
+
|
40
|
+
it "passes args to WordCoordsBuilder and receives output" do
|
41
|
+
parsed = JSON.parse(reader_minimal.json)
|
42
|
+
expect(parsed['coords'].length).to be > 1
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|