iiif_print 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.env +5 -0
- data/.fcrepo_wrapper +4 -0
- data/.github/release.yml +20 -0
- data/.github/workflows/branches.yml +24 -0
- data/.github/workflows/build-lint-test-action.yaml +33 -0
- data/.github/workflows/release_labels.yml +25 -0
- data/.gitignore +52 -0
- data/.rubocop.yml +177 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +49 -0
- data/CONTRIBUTING.md +181 -0
- data/Dockerfile +15 -0
- data/Gemfile +52 -0
- data/LICENSE +203 -0
- data/README.md +203 -0
- data/Rakefile +38 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
- data/app/assets/config/iiif_print_manifest.js +2 -0
- data/app/assets/images/iiif_print/.keep +0 -0
- data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/iiif_print.js +3 -0
- data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
- data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
- data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
- data/app/helpers/hyrax/iiif_helper.rb +22 -0
- data/app/helpers/iiif_print/application_helper.rb +5 -0
- data/app/helpers/iiif_print_helper.rb +64 -0
- data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
- data/app/mailers/iiif_print/application_mailer.rb +8 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
- data/app/models/concerns/iiif_print/solr/document.rb +47 -0
- data/app/models/iiif_print/application_record.rb +6 -0
- data/app/models/iiif_print/derivative_attachment.rb +8 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
- data/app/models/iiif_print/ingest_file_relation.rb +14 -0
- data/app/models/iiif_print/pending_relationship.rb +7 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
- data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
- data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
- data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/hyrax/base/_representative_media.html.erb +9 -0
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/iiif_print.de.yml +148 -0
- data/config/locales/iiif_print.en.yml +119 -0
- data/config/locales/iiif_print.es.yml +148 -0
- data/config/locales/iiif_print.fr.yml +149 -0
- data/config/locales/iiif_print.it.yml +142 -0
- data/config/locales/iiif_print.pt-BR.yml +148 -0
- data/config/locales/iiif_print.zh.yml +142 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
- data/docker-compose.yml +129 -0
- data/iiif_print.gemspec +43 -0
- data/lib/generators/iiif_print/assets_generator.rb +29 -0
- data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
- data/lib/generators/iiif_print/install_generator.rb +52 -0
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
- data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
- data/lib/iiif_print/base_derivative_service.rb +113 -0
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
- data/lib/iiif_print/catalog_search_builder.rb +31 -0
- data/lib/iiif_print/configuration.rb +99 -0
- data/lib/iiif_print/data/fileset_helper.rb +25 -0
- data/lib/iiif_print/data/path_helper.rb +40 -0
- data/lib/iiif_print/data/work_derivatives.rb +323 -0
- data/lib/iiif_print/data/work_file.rb +92 -0
- data/lib/iiif_print/data/work_files.rb +199 -0
- data/lib/iiif_print/data.rb +35 -0
- data/lib/iiif_print/engine.rb +77 -0
- data/lib/iiif_print/errors.rb +9 -0
- data/lib/iiif_print/image_tool.rb +119 -0
- data/lib/iiif_print/jobs/application_job.rb +8 -0
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
- data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
- data/lib/iiif_print/jp2_derivative_service.rb +118 -0
- data/lib/iiif_print/jp2_image_metadata.rb +81 -0
- data/lib/iiif_print/lineage_service.rb +41 -0
- data/lib/iiif_print/metadata.rb +125 -0
- data/lib/iiif_print/pdf_derivative_service.rb +42 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
- data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
- data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
- data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
- data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
- data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
- data/lib/iiif_print/text_extraction.rb +11 -0
- data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
- data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
- data/lib/iiif_print/tiff_derivative_service.rb +50 -0
- data/lib/iiif_print/version.rb +3 -0
- data/lib/iiif_print/works_controller_behavior.rb +9 -0
- data/lib/iiif_print.rb +136 -0
- data/lib/tasks/set_child_works.rake +22 -0
- data/spec/.keep.txt +1 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/uploaded_txt_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
- data/spec/helpers/iiif_print_helper_spec.rb +43 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
- data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
- data/spec/iiif_print/configuration_spec.rb +67 -0
- data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
- data/spec/iiif_print/data/work_file_spec.rb +99 -0
- data/spec/iiif_print/data/work_files_spec.rb +237 -0
- data/spec/iiif_print/image_tool_spec.rb +109 -0
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
- data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
- data/spec/iiif_print/lineage_service_spec.rb +13 -0
- data/spec/iiif_print/metadata_spec.rb +115 -0
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
- data/spec/iiif_print_spec.rb +51 -0
- data/spec/misc_shared.rb +111 -0
- data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
- data/spec/spec_helper.rb +181 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/support/iiif_print_models.rb +127 -0
- data/spec/test_app_templates/blacklight.yml +9 -0
- data/spec/test_app_templates/fedora.yml +15 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/test_app_templates/redis.yml +9 -0
- data/spec/test_app_templates/solr/conf/schema.xml +362 -0
- data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
- data/spec/test_app_templates/solr.yml +7 -0
- data/tasks/iiif_print_dev.rake +34 -0
- data/tmp/.keep +0 -0
- metadata +605 -0
@@ -0,0 +1,178 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
RSpec.describe IiifPrint::PluggableDerivativeService do
|
5
|
+
let(:persisted_file_set) do
|
6
|
+
fs = FileSet.new
|
7
|
+
work.title = ['This is a page!']
|
8
|
+
work.members.push(fs)
|
9
|
+
fs.instance_variable_set(:@mime_type, 'image/tiff')
|
10
|
+
fs.save!(validate: false)
|
11
|
+
work.save!(validate: false)
|
12
|
+
fs
|
13
|
+
end
|
14
|
+
|
15
|
+
let(:fixture_path) do
|
16
|
+
File.join(
|
17
|
+
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
18
|
+
)
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "service registration" do
|
22
|
+
# integration test with Hyrax, verify services is registered
|
23
|
+
|
24
|
+
it "is registered with Hyrax" do
|
25
|
+
expect(Hyrax::DerivativeService.services).to include described_class
|
26
|
+
end
|
27
|
+
|
28
|
+
it "is the first valid service found" do
|
29
|
+
file_set = double(FileSet,
|
30
|
+
class: FileSet,
|
31
|
+
mime_type: 'application/pdf',
|
32
|
+
parent: MyIiifConfiguredWorkWithAllDerivativeServices.new)
|
33
|
+
found = Hyrax::DerivativeService.for(file_set)
|
34
|
+
expect(found).to be_a described_class
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context "when the FileSet's parent is not IiifPrint configured" do
|
39
|
+
before do
|
40
|
+
allow(persisted_file_set).to receive(:in_works).and_return([work])
|
41
|
+
end
|
42
|
+
|
43
|
+
let(:work) { MyWork.new }
|
44
|
+
|
45
|
+
describe "#plugins" do
|
46
|
+
it "uses the default derivatives service" do
|
47
|
+
file_set = double(FileSet,
|
48
|
+
class: FileSet,
|
49
|
+
mime_type: 'application/pdf',
|
50
|
+
parent: MyWork.new)
|
51
|
+
service = described_class.new(file_set)
|
52
|
+
expect(service.plugins).to eq [Hyrax::FileSetDerivativesService]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context "when the FileSet's parent is IiifPrint configured" do
|
58
|
+
describe "calls the configured derivative plugins" do
|
59
|
+
before do
|
60
|
+
allow(persisted_file_set).to receive(:in_works).and_return([work])
|
61
|
+
allow_any_instance_of(Hyrax::FileSetDerivativesService).to receive(:send)
|
62
|
+
end
|
63
|
+
|
64
|
+
let(:work) { MyIiifConfiguredWork.new }
|
65
|
+
let(:plugin) { FakeDerivativeService.new }
|
66
|
+
|
67
|
+
it "calls each plugin on create" do
|
68
|
+
service = described_class.new(persisted_file_set, plugins: [plugin])
|
69
|
+
expect do
|
70
|
+
service.create_derivatives('not_a_real_filename')
|
71
|
+
end.to change(plugin, :create_called).by(1)
|
72
|
+
end
|
73
|
+
|
74
|
+
def touch_fake_derivative_file(file_set, ext)
|
75
|
+
path = Hyrax::DerivativePath.derivative_path_for_reference(file_set, ext)
|
76
|
+
FileUtils.mkdir_p(File.join(path.split('/')[0..-2]))
|
77
|
+
FileUtils.touch(path)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "does not re-create existing derivative" do
|
81
|
+
service = described_class.new(persisted_file_set, plugins: [plugin])
|
82
|
+
expect(persisted_file_set.id).not_to be_nil
|
83
|
+
expect do
|
84
|
+
touch_fake_derivative_file(persisted_file_set, plugin.target_extension)
|
85
|
+
service.create_derivatives('/nonsense/source/path/ignored ')
|
86
|
+
end.not_to change(plugin, :create_called)
|
87
|
+
end
|
88
|
+
|
89
|
+
it "calls each plugin on cleanup" do
|
90
|
+
service = described_class.new(persisted_file_set, plugins: [plugin])
|
91
|
+
expect { service.cleanup_derivatives }.to change(plugin, :cleanup_called).by(1)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context "integration tests for plugins" do
|
96
|
+
before do
|
97
|
+
allow(persisted_file_set).to receive(:in_works).and_return([work])
|
98
|
+
end
|
99
|
+
|
100
|
+
let(:work) { MyIiifConfiguredWorkWithAllDerivativeServices.new }
|
101
|
+
|
102
|
+
describe "calls all derivative plugins" do
|
103
|
+
def source_image(name)
|
104
|
+
File.join(fixture_path, name)
|
105
|
+
end
|
106
|
+
|
107
|
+
def derivatives_for(file_set)
|
108
|
+
Hyrax::DerivativePath.derivatives_for_reference(file_set)
|
109
|
+
end
|
110
|
+
|
111
|
+
def expected_plugins
|
112
|
+
[
|
113
|
+
Hyrax::FileSetDerivativesService,
|
114
|
+
IiifPrint::JP2DerivativeService,
|
115
|
+
IiifPrint::PDFDerivativeService,
|
116
|
+
IiifPrint::TextExtractionDerivativeService,
|
117
|
+
IiifPrint::TIFFDerivativeService
|
118
|
+
]
|
119
|
+
end
|
120
|
+
|
121
|
+
# The expected set of Plugins that will run for file set
|
122
|
+
it "has expected valid plugins configured" do
|
123
|
+
plugins = described_class.new(persisted_file_set).plugins
|
124
|
+
fs = persisted_file_set
|
125
|
+
services = plugins.map { |plugin| plugin.new(fs) }.select(&:valid?)
|
126
|
+
expect(services.length).to eq 5
|
127
|
+
used_plugins = services.map(&:class)
|
128
|
+
expected_plugins.each do |plugin|
|
129
|
+
expect(used_plugins).to include plugin
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
it "creates expected derivatives from TIFF source" do
|
134
|
+
svc = described_class.new(persisted_file_set)
|
135
|
+
svc.create_derivatives(source_image('4.1.07.tiff'))
|
136
|
+
made = derivatives_for(persisted_file_set)
|
137
|
+
made.each { |path| expect(File.exist?(path)) }
|
138
|
+
extensions = made.map { |path| path.split('.')[-1] }
|
139
|
+
expect(extensions).to include 'pdf'
|
140
|
+
expect(extensions).to include 'jp2'
|
141
|
+
expect(extensions).not_to include 'tiff'
|
142
|
+
# Thumbnail, created by Hyrax:
|
143
|
+
expect(extensions).to include 'jpeg'
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "ingest integration" do
|
148
|
+
def log_attachment(file_set)
|
149
|
+
# create a log entry for the fileset given destination name 'jp2'
|
150
|
+
IiifPrint::DerivativeAttachment.create(
|
151
|
+
fileset_id: file_set.id,
|
152
|
+
path: '/some/arbitrary/path/to.jp2',
|
153
|
+
destination_name: 'jp2'
|
154
|
+
)
|
155
|
+
end
|
156
|
+
|
157
|
+
def jp2_plugin?(plugins)
|
158
|
+
r = plugins.select { |p| p.is_a? IiifPrint::JP2DerivativeService }
|
159
|
+
!r.empty?
|
160
|
+
end
|
161
|
+
|
162
|
+
it "will not attempt creating over pre-made derivative" do
|
163
|
+
service = described_class.new(persisted_file_set)
|
164
|
+
# this should be respected, evaluate by obtaining filtered
|
165
|
+
# services list, which must omit JP2DerivativeService
|
166
|
+
plugins = service.services(:create_derivatives)
|
167
|
+
# initially has jp2 plugin
|
168
|
+
expect(jp2_plugin?(plugins)).to be true
|
169
|
+
# blacklist jp2 by effect of log entry of pre-made attachment
|
170
|
+
log_attachment(service.file_set)
|
171
|
+
# omits, after logging intent of previous attachment:
|
172
|
+
plugins = service.services(:create_derivatives)
|
173
|
+
expect(jp2_plugin?(plugins)).to be false
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'misc_shared'
|
4
|
+
|
5
|
+
RSpec.describe IiifPrint::TextExtractionDerivativeService do
|
6
|
+
include_context "shared setup"
|
7
|
+
|
8
|
+
let(:valid_file_set) do
|
9
|
+
file_set = FileSet.new
|
10
|
+
file_set.save!(validate: false)
|
11
|
+
file_set
|
12
|
+
end
|
13
|
+
|
14
|
+
let(:work) do
|
15
|
+
work = NewspaperPage.create(title: ["Hello"])
|
16
|
+
work.members << valid_file_set
|
17
|
+
work.save!
|
18
|
+
end
|
19
|
+
|
20
|
+
let(:minimal_alto) do
|
21
|
+
File.join(fixture_path, 'minimal-alto.xml')
|
22
|
+
end
|
23
|
+
|
24
|
+
let(:altoxsd) do
|
25
|
+
xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
|
26
|
+
Nokogiri::XML::Schema(File.read(xsdpath))
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "Creates ALTO derivative" do
|
30
|
+
def source_image(name)
|
31
|
+
File.join(fixture_path, name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def expected_path(file_set, ext)
|
35
|
+
Hyrax::DerivativePath.derivative_path_for_reference(file_set, ext)
|
36
|
+
end
|
37
|
+
|
38
|
+
def validate_alto(filename)
|
39
|
+
altoxsd.validate(filename)
|
40
|
+
end
|
41
|
+
|
42
|
+
def derivative_exists(ext)
|
43
|
+
path = expected_path(valid_file_set, ext)
|
44
|
+
expect(File.exist?(path)).to be true
|
45
|
+
expect(File.size(path)).to be > 0
|
46
|
+
end
|
47
|
+
|
48
|
+
xit "creates, stores valid ALTO and plain-text derivatives" do
|
49
|
+
# these are in same test to avoid duplicate OCR operation
|
50
|
+
service = described_class.new(valid_file_set)
|
51
|
+
service.create_derivatives(source_image('ocr_mono.tiff'))
|
52
|
+
# ALTO derivative file exists at expected path and validates:
|
53
|
+
altoxsd.validate(expected_path(valid_file_set, 'xml'))
|
54
|
+
# Plain text exists as non-empty file:
|
55
|
+
derivative_exists('txt')
|
56
|
+
derivative_exists('json')
|
57
|
+
json_path = expected_path(valid_file_set, 'json')
|
58
|
+
loaded_result = JSON.parse(File.read(json_path))
|
59
|
+
expect(loaded_result['coords'].length).to be > 1
|
60
|
+
end
|
61
|
+
|
62
|
+
xit "usually uses OCR, when no existing text" do
|
63
|
+
service = described_class.new(valid_file_set)
|
64
|
+
# here, service will delegate create_derivatives to OCR impl method:
|
65
|
+
expect(service).to receive(:create_derivatives_from_ocr)
|
66
|
+
service.create_derivatives(source_image('ocr_mono.tiff'))
|
67
|
+
end
|
68
|
+
|
69
|
+
xit "defers to existing ALTO sources, when present" do
|
70
|
+
# Attach some ALTO to a work
|
71
|
+
derivatives = IiifPrint::Data::WorkDerivatives.of(
|
72
|
+
work,
|
73
|
+
valid_file_set
|
74
|
+
)
|
75
|
+
derivatives.attach(minimal_alto, 'xml')
|
76
|
+
# In this case, service will not call the OCR implementation method:
|
77
|
+
service = described_class.new(valid_file_set)
|
78
|
+
expect(service).not_to receive(:create_derivatives_from_ocr)
|
79
|
+
service.create_derivatives(source_image('ocr_mono.tiff'))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'misc_shared'
|
4
|
+
|
5
|
+
RSpec.describe IiifPrint::TextFormatsFromALTOService do
|
6
|
+
include_context "shared setup"
|
7
|
+
|
8
|
+
let(:valid_file_set) do
|
9
|
+
file_set = FileSet.new
|
10
|
+
file_set.save!(validate: false)
|
11
|
+
file_set
|
12
|
+
end
|
13
|
+
|
14
|
+
let(:work) do
|
15
|
+
work = NewspaperPage.create(title: ["Hello"])
|
16
|
+
work.members << valid_file_set
|
17
|
+
work.save!
|
18
|
+
work
|
19
|
+
end
|
20
|
+
|
21
|
+
let(:minimal_alto) do
|
22
|
+
File.join(fixture_path, 'minimal-alto.xml')
|
23
|
+
end
|
24
|
+
|
25
|
+
def log_incoming_attachment(fsid)
|
26
|
+
IiifPrint::DerivativeAttachment.create!(
|
27
|
+
fileset_id: fsid,
|
28
|
+
path: minimal_alto,
|
29
|
+
destination_name: 'xml'
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
def derivatives_of(work, fileset)
|
34
|
+
IiifPrint::Data::WorkDerivatives.of(work, fileset)
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "Saves other formats from ALTO" do
|
38
|
+
xit "saves JSON, text from existing ALTO derivative" do
|
39
|
+
derivatives = derivatives_of(work, valid_file_set)
|
40
|
+
expect(derivatives.keys.size).to eq 0
|
41
|
+
derivatives.attach(minimal_alto, 'xml')
|
42
|
+
expect(derivatives.keys.size).to eq 1
|
43
|
+
service = described_class.new(valid_file_set)
|
44
|
+
service.create_derivatives('/some/random/primary/path/does_not/matter')
|
45
|
+
derivatives.load_paths
|
46
|
+
expect(derivatives.keys.size).to eq 3
|
47
|
+
expect(derivatives.keys).to include 'json', 'txt'
|
48
|
+
end
|
49
|
+
|
50
|
+
xit "saves JSON, text from incoming ALTO derivative" do
|
51
|
+
derivatives = derivatives_of(work, valid_file_set)
|
52
|
+
expect(derivatives.keys.size).to eq 0
|
53
|
+
log_incoming_attachment(valid_file_set.id)
|
54
|
+
service = described_class.new(valid_file_set)
|
55
|
+
service.create_derivatives('/some/random/primary/path/does_not/matter')
|
56
|
+
# reload keys to check derivatives:
|
57
|
+
derivatives.load_paths
|
58
|
+
expect(derivatives.keys).to include 'json', 'txt'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "scaling matters" do
|
63
|
+
# we need an ingested, characterized file:
|
64
|
+
do_now_jobs = [
|
65
|
+
IngestLocalFileJob,
|
66
|
+
IngestJob,
|
67
|
+
InheritPermissionsJob,
|
68
|
+
CharacterizeJob
|
69
|
+
]
|
70
|
+
# we omit CreateDerivativesJob from above, as obviously duplicative and
|
71
|
+
# therefore potential cause of problems here.
|
72
|
+
|
73
|
+
# remove any previous test run (development) artifacts in file
|
74
|
+
# attachment logging tables
|
75
|
+
before(:all) do
|
76
|
+
IiifPrint::DerivativeAttachment.all.delete_all
|
77
|
+
IiifPrint::IngestFileRelation.all.delete_all
|
78
|
+
end
|
79
|
+
|
80
|
+
let(:work) do
|
81
|
+
work = NewspaperPage.create(title: ["Hello"])
|
82
|
+
work
|
83
|
+
end
|
84
|
+
|
85
|
+
let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
|
86
|
+
let(:ocr_alto_path) do
|
87
|
+
File.join(fixture_path, 'ocr_alto_scaled_4pts_per_px.xml')
|
88
|
+
end
|
89
|
+
|
90
|
+
def attach_primary_file(work)
|
91
|
+
IiifPrint::Data::WorkFiles.assign!(to: work, path: tiff_path)
|
92
|
+
work.reload
|
93
|
+
pcdm_file = IiifPrint::Data::WorkFiles.of(work).values[0].unwrapped
|
94
|
+
expect(pcdm_file).not_to be_nil
|
95
|
+
# we have image dimensions (px) to work with:
|
96
|
+
expect(pcdm_file.width[0].to_i).to be_an Integer
|
97
|
+
expect(pcdm_file.height[0].to_i).to be_an Integer
|
98
|
+
end
|
99
|
+
|
100
|
+
def derivatives_of(work)
|
101
|
+
IiifPrint::Data::WorkFiles.of(work).derivatives
|
102
|
+
end
|
103
|
+
|
104
|
+
def attach_alto(work)
|
105
|
+
derivatives = derivatives_of(work)
|
106
|
+
derivatives.attach(ocr_alto_path, 'xml')
|
107
|
+
# has a path to now-stored derivative:
|
108
|
+
expect(derivatives.path('xml')).not_to be_nil
|
109
|
+
end
|
110
|
+
|
111
|
+
xit "scales ALTO points to original image", perform_enqueued: do_now_jobs do
|
112
|
+
attach_primary_file(work)
|
113
|
+
attach_alto(work)
|
114
|
+
work.reload
|
115
|
+
file_set = work.ordered_members.to_a.find { |m| m.is_a? FileSet }
|
116
|
+
service = described_class.new(file_set)
|
117
|
+
service.create_derivatives('/a/path/here/needed/but/will/not/matter')
|
118
|
+
coords = JSON.parse(derivatives_of(work).data('json'))
|
119
|
+
word = coords['coords'].select { |k, _v| k == 'Bethesda' }
|
120
|
+
# test against known scaled coordinate of OCR data:
|
121
|
+
# This roughly matches unscaled ALTO data for token 'Bethesda'
|
122
|
+
# in spec/fixtures/files/ocr_alto.xml, with the disclaimer that
|
123
|
+
# round-trip rounding error of 1px is noted for VPOS.
|
124
|
+
expect(word['Bethesda']).to eq [[16, 665, 78, 16]]
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
RSpec.describe IiifPrint::TIFFDerivativeService do
|
3
|
+
let(:valid_file_set) do
|
4
|
+
file_set = FileSet.new
|
5
|
+
file_set.save!(validate: false)
|
6
|
+
file_set
|
7
|
+
end
|
8
|
+
|
9
|
+
let(:fixture_path) do
|
10
|
+
File.join(
|
11
|
+
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "Creates TIFF derivatives" do
|
16
|
+
def source_image(name)
|
17
|
+
File.join(fixture_path, name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def expected_path(file_set)
|
21
|
+
Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'tiff')
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_res(path)
|
25
|
+
tool = IiifPrint::ImageTool.new(path)
|
26
|
+
"#{tool.metadata[:width]}x#{tool.metadata[:height]}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def check_dpi_match(orig, dest)
|
30
|
+
# check ppi, but skip pdf to avoid ghostscript warnings to stderr
|
31
|
+
expect(get_res(orig)).to eq get_res(dest) unless orig.end_with?('pdf')
|
32
|
+
end
|
33
|
+
|
34
|
+
def makes_tiff(filename)
|
35
|
+
path = source_image(filename)
|
36
|
+
expected = expected_path(valid_file_set)
|
37
|
+
expect(File.exist?(expected)).to be false
|
38
|
+
svc = described_class.new(valid_file_set)
|
39
|
+
svc.create_derivatives(path)
|
40
|
+
expect(File.exist?(expected)).to be true
|
41
|
+
mime = IiifPrint::ImageTool.new(expected).metadata[:content_type]
|
42
|
+
expect(mime).to eq 'image/tiff'
|
43
|
+
check_dpi_match(path, expected)
|
44
|
+
svc.cleanup_derivatives
|
45
|
+
end
|
46
|
+
|
47
|
+
# for cases where primary file is TIFF already
|
48
|
+
def avoids_duplicative_creation(filename)
|
49
|
+
expected = expected_path(valid_file_set)
|
50
|
+
expect(File.exist?(expected)).to be false
|
51
|
+
svc = described_class.new(valid_file_set)
|
52
|
+
svc.create_derivatives(source_image(filename))
|
53
|
+
expect(File.exist?(expected)).not_to be true
|
54
|
+
end
|
55
|
+
|
56
|
+
it "Does not make TIFF derivatives when primary is TIFF" do
|
57
|
+
avoids_duplicative_creation('ocr_mono.tiff')
|
58
|
+
avoids_duplicative_creation('ocr_gray.tiff')
|
59
|
+
end
|
60
|
+
|
61
|
+
it "creates TIFF from PDF source, robust to multi-page" do
|
62
|
+
makes_tiff('sample-color-newsletter.pdf')
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
# testing environent:
|
4
|
+
ENV['RAILS_ENV'] ||= 'test'
|
5
|
+
|
6
|
+
require 'coveralls'
|
7
|
+
Coveralls.wear!
|
8
|
+
|
9
|
+
require 'shoulda/matchers'
|
10
|
+
Shoulda::Matchers.configure do |config|
|
11
|
+
config.integrate do |with|
|
12
|
+
with.test_framework :rspec
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# engine_cart:
|
17
|
+
require 'bundler/setup'
|
18
|
+
require 'engine_cart'
|
19
|
+
EngineCart.load_application!
|
20
|
+
|
21
|
+
require 'rspec/rails'
|
22
|
+
require 'support/iiif_print_models'
|
23
|
+
require 'support/controller_level_helpers'
|
24
|
+
require 'rspec/active_model/mocks'
|
25
|
+
|
26
|
+
ActiveJob::Base.queue_adapter = :test
|
27
|
+
|
28
|
+
RSpec.configure do |config|
|
29
|
+
# enable FactoryBot:
|
30
|
+
require 'factory_bot'
|
31
|
+
config.include FactoryBot::Syntax::Methods
|
32
|
+
# auto-detect and load all factories in spec/factories:
|
33
|
+
FactoryBot.find_definitions
|
34
|
+
|
35
|
+
config.infer_spec_type_from_file_location!
|
36
|
+
|
37
|
+
# Transactional
|
38
|
+
config.use_transactional_fixtures = false
|
39
|
+
config.include Devise::Test::ControllerHelpers, type: :controller
|
40
|
+
|
41
|
+
# ensure Hyrax has active sipity workflow for default admin set:
|
42
|
+
config.before(:suite) do
|
43
|
+
require 'active_fedora/cleaner'
|
44
|
+
require 'database_cleaner'
|
45
|
+
|
46
|
+
# By default, Hyrax uses a database minter class. That's the preferred pathway (because you are
|
47
|
+
# tracking minting state in the database). However, for testing purposes we don't need to / nor
|
48
|
+
# want to install the minter migrations. Hence we're favoring this approach.
|
49
|
+
minter_class = ::Noid::Rails::Minter::File
|
50
|
+
::Noid::Rails.config.minter_class = minter_class
|
51
|
+
Hyrax.config.noid_minter_class = minter_class
|
52
|
+
|
53
|
+
ActiveFedora::Cleaner.clean!
|
54
|
+
DatabaseCleaner.clean_with(:truncation)
|
55
|
+
|
56
|
+
begin
|
57
|
+
# TODO: switch the below methods to use the appropriate services
|
58
|
+
# rather than the deprecated methods currently being used.
|
59
|
+
# ensure permission template actually exists in RDBMS:
|
60
|
+
id = 'admin_set/default'
|
61
|
+
no_template = Hyrax::PermissionTemplate.find_by(source_id: id).nil?
|
62
|
+
Hyrax::PermissionTemplate.create!(source_id: id) if no_template
|
63
|
+
# ensure workflows exist, presumes permission template does first:
|
64
|
+
Hyrax::Workflow::WorkflowImporter.load_workflows
|
65
|
+
# Default admin set needs to exist in Fedora, with relation to its
|
66
|
+
# PermissionTemplate object:
|
67
|
+
begin
|
68
|
+
admin_set = AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
|
69
|
+
admin_set.save!
|
70
|
+
rescue ActiveRecord::RecordNotUnique
|
71
|
+
admin_set = AdminSet.find(AdminSet::DEFAULT_ID)
|
72
|
+
end
|
73
|
+
permission_template = admin_set.permission_template
|
74
|
+
workflow = permission_template.available_workflows.where(
|
75
|
+
name: 'default'
|
76
|
+
).first
|
77
|
+
Sipity::Workflow.activate!(
|
78
|
+
permission_template: permission_template,
|
79
|
+
workflow_id: workflow.id
|
80
|
+
)
|
81
|
+
rescue Faraday::ConnectionFailed
|
82
|
+
STDERR.puts "Attempting to run test suite without Fedora and/or Solr..."
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# :perform_enqueued config setting below copied from Hyrax spec_helper.rb
|
87
|
+
config.before(:example, :perform_enqueued) do |example|
|
88
|
+
ActiveJob::Base.queue_adapter.filter = example.metadata[:perform_enqueued].try(:to_a)
|
89
|
+
ActiveJob::Base.queue_adapter.perform_enqueued_jobs = true
|
90
|
+
ActiveJob::Base.queue_adapter.perform_enqueued_at_jobs = true
|
91
|
+
end
|
92
|
+
config.after(:example, :perform_enqueued) do
|
93
|
+
ActiveJob::Base.queue_adapter.filter = nil
|
94
|
+
ActiveJob::Base.queue_adapter.enqueued_jobs = []
|
95
|
+
ActiveJob::Base.queue_adapter.performed_jobs = []
|
96
|
+
ActiveJob::Base.queue_adapter.perform_enqueued_jobs = false
|
97
|
+
ActiveJob::Base.queue_adapter.perform_enqueued_at_jobs = false
|
98
|
+
end
|
99
|
+
config.after(:suite) do # or :each or :all
|
100
|
+
FileUtils.rm_rf(Dir[Rails.root.join('tmp', 'derivatives', '*')])
|
101
|
+
end
|
102
|
+
|
103
|
+
# rspec-expectations config goes here. You can use an alternate
|
104
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
105
|
+
# assertions if you prefer.
|
106
|
+
config.expect_with :rspec do |expectations|
|
107
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
108
|
+
# and `failure_message` of custom matchers include text for helper methods
|
109
|
+
# defined using `chain`, e.g.:
|
110
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
111
|
+
# # => "be bigger than 2 and smaller than 4"
|
112
|
+
# ...rather than:
|
113
|
+
# # => "be bigger than 2"
|
114
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
115
|
+
end
|
116
|
+
|
117
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
118
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
119
|
+
config.mock_with :rspec do |mocks|
|
120
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
121
|
+
# a real object. This is generally recommended, and will default to
|
122
|
+
# `true` in RSpec 4.
|
123
|
+
mocks.verify_partial_doubles = true
|
124
|
+
end
|
125
|
+
|
126
|
+
# This option will default to `:apply_to_host_groups` in RSpec 4 (and will
|
127
|
+
# have no way to turn it off -- the option exists only for backwards
|
128
|
+
# compatibility in RSpec 3). It causes shared context metadata to be
|
129
|
+
# inherited by the metadata hash of host groups and examples, rather than
|
130
|
+
# triggering implicit auto-inclusion in groups with matching metadata.
|
131
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
132
|
+
|
133
|
+
# The settings below are suggested to provide a good initial experience
|
134
|
+
# with RSpec, but feel free to customize to your heart's content.
|
135
|
+
|
136
|
+
# This allows you to limit a spec run to individual examples or groups
|
137
|
+
# you care about by tagging them with `:focus` metadata. When nothing
|
138
|
+
# is tagged with `:focus`, all examples get run. RSpec also provides
|
139
|
+
# aliases for `it`, `describe`, and `context` that include `:focus`
|
140
|
+
# metadata: `fit`, `fdescribe` and `fcontext`, respectively.
|
141
|
+
# config.filter_run_when_matching :focus
|
142
|
+
|
143
|
+
# Allows RSpec to persist some state between runs in order to support
|
144
|
+
# the `--only-failures` and `--next-failure` CLI options. We recommend
|
145
|
+
# you configure your source control system to ignore this file.
|
146
|
+
# config.example_status_persistence_file_path = "spec/examples.txt"
|
147
|
+
|
148
|
+
# Limits the available syntax to the non-monkey patched syntax that is
|
149
|
+
# recommended. For more details, see:
|
150
|
+
# - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
|
151
|
+
# - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
152
|
+
# - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
|
153
|
+
# config.disable_monkey_patching!
|
154
|
+
|
155
|
+
# Many RSpec users commonly either run the entire suite or an individual
|
156
|
+
# file, and it's useful to allow more verbose output when running an
|
157
|
+
# individual spec file.
|
158
|
+
# if config.files_to_run.one?
|
159
|
+
# Use the documentation formatter for detailed output,
|
160
|
+
# unless a formatter has already been configured
|
161
|
+
# (e.g. via a command-line flag).
|
162
|
+
# config.default_formatter = "doc"
|
163
|
+
# end
|
164
|
+
|
165
|
+
# Print the 10 slowest examples and example groups at the
|
166
|
+
# end of the spec run, to help surface which specs are running
|
167
|
+
# particularly slow.
|
168
|
+
config.profile_examples = 10
|
169
|
+
|
170
|
+
# Run specs in random order to surface order dependencies. If you find an
|
171
|
+
# order dependency and want to debug it, you can fix the order by providing
|
172
|
+
# the seed, which is printed after each run.
|
173
|
+
# --seed 1234
|
174
|
+
config.order = :random
|
175
|
+
|
176
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
177
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
178
|
+
# test failures related to randomization by passing the same `--seed` value
|
179
|
+
# as the one that triggered the failure.
|
180
|
+
Kernel.srand config.seed
|
181
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# copied from Hyrax
|
3
|
+
|
4
|
+
module ControllerLevelHelpers
|
5
|
+
# This provides some common mock methods for view tests.
|
6
|
+
# These are normally provided by the controller.
|
7
|
+
module ControllerViewHelpers
|
8
|
+
def search_state
|
9
|
+
@search_state ||= CatalogController.search_state_class.new(params, blacklight_config, controller)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This allows you to set the configuration
|
13
|
+
# @example: view.blacklight_config = Blacklight::Configuration.new
|
14
|
+
attr_writer :blacklight_config
|
15
|
+
|
16
|
+
def blacklight_config
|
17
|
+
@blacklight_config ||= CatalogController.blacklight_config
|
18
|
+
end
|
19
|
+
|
20
|
+
def blacklight_configuration_context
|
21
|
+
@blacklight_configuration_context ||= Blacklight::Configuration::Context.new(controller)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize_controller_helpers(helper)
|
26
|
+
helper.extend ControllerViewHelpers
|
27
|
+
end
|
28
|
+
end
|