iiif_print 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.env +5 -0
- data/.fcrepo_wrapper +4 -0
- data/.github/release.yml +20 -0
- data/.github/workflows/branches.yml +24 -0
- data/.github/workflows/build-lint-test-action.yaml +33 -0
- data/.github/workflows/release_labels.yml +25 -0
- data/.gitignore +52 -0
- data/.rubocop.yml +177 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +49 -0
- data/CONTRIBUTING.md +181 -0
- data/Dockerfile +15 -0
- data/Gemfile +52 -0
- data/LICENSE +203 -0
- data/README.md +203 -0
- data/Rakefile +38 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
- data/app/assets/config/iiif_print_manifest.js +2 -0
- data/app/assets/images/iiif_print/.keep +0 -0
- data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/iiif_print.js +3 -0
- data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
- data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
- data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
- data/app/helpers/hyrax/iiif_helper.rb +22 -0
- data/app/helpers/iiif_print/application_helper.rb +5 -0
- data/app/helpers/iiif_print_helper.rb +64 -0
- data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
- data/app/mailers/iiif_print/application_mailer.rb +8 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
- data/app/models/concerns/iiif_print/solr/document.rb +47 -0
- data/app/models/iiif_print/application_record.rb +6 -0
- data/app/models/iiif_print/derivative_attachment.rb +8 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
- data/app/models/iiif_print/ingest_file_relation.rb +14 -0
- data/app/models/iiif_print/pending_relationship.rb +7 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
- data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
- data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
- data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/hyrax/base/_representative_media.html.erb +9 -0
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/iiif_print.de.yml +148 -0
- data/config/locales/iiif_print.en.yml +119 -0
- data/config/locales/iiif_print.es.yml +148 -0
- data/config/locales/iiif_print.fr.yml +149 -0
- data/config/locales/iiif_print.it.yml +142 -0
- data/config/locales/iiif_print.pt-BR.yml +148 -0
- data/config/locales/iiif_print.zh.yml +142 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
- data/docker-compose.yml +129 -0
- data/iiif_print.gemspec +43 -0
- data/lib/generators/iiif_print/assets_generator.rb +29 -0
- data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
- data/lib/generators/iiif_print/install_generator.rb +52 -0
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
- data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
- data/lib/iiif_print/base_derivative_service.rb +113 -0
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
- data/lib/iiif_print/catalog_search_builder.rb +31 -0
- data/lib/iiif_print/configuration.rb +99 -0
- data/lib/iiif_print/data/fileset_helper.rb +25 -0
- data/lib/iiif_print/data/path_helper.rb +40 -0
- data/lib/iiif_print/data/work_derivatives.rb +323 -0
- data/lib/iiif_print/data/work_file.rb +92 -0
- data/lib/iiif_print/data/work_files.rb +199 -0
- data/lib/iiif_print/data.rb +35 -0
- data/lib/iiif_print/engine.rb +77 -0
- data/lib/iiif_print/errors.rb +9 -0
- data/lib/iiif_print/image_tool.rb +119 -0
- data/lib/iiif_print/jobs/application_job.rb +8 -0
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
- data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
- data/lib/iiif_print/jp2_derivative_service.rb +118 -0
- data/lib/iiif_print/jp2_image_metadata.rb +81 -0
- data/lib/iiif_print/lineage_service.rb +41 -0
- data/lib/iiif_print/metadata.rb +125 -0
- data/lib/iiif_print/pdf_derivative_service.rb +42 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
- data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
- data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
- data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
- data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
- data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
- data/lib/iiif_print/text_extraction.rb +11 -0
- data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
- data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
- data/lib/iiif_print/tiff_derivative_service.rb +50 -0
- data/lib/iiif_print/version.rb +3 -0
- data/lib/iiif_print/works_controller_behavior.rb +9 -0
- data/lib/iiif_print.rb +136 -0
- data/lib/tasks/set_child_works.rake +22 -0
- data/spec/.keep.txt +1 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/uploaded_txt_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
- data/spec/helpers/iiif_print_helper_spec.rb +43 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
- data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
- data/spec/iiif_print/configuration_spec.rb +67 -0
- data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
- data/spec/iiif_print/data/work_file_spec.rb +99 -0
- data/spec/iiif_print/data/work_files_spec.rb +237 -0
- data/spec/iiif_print/image_tool_spec.rb +109 -0
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
- data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
- data/spec/iiif_print/lineage_service_spec.rb +13 -0
- data/spec/iiif_print/metadata_spec.rb +115 -0
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
- data/spec/iiif_print_spec.rb +51 -0
- data/spec/misc_shared.rb +111 -0
- data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
- data/spec/spec_helper.rb +181 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/support/iiif_print_models.rb +127 -0
- data/spec/test_app_templates/blacklight.yml +9 -0
- data/spec/test_app_templates/fedora.yml +15 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/test_app_templates/redis.yml +9 -0
- data/spec/test_app_templates/solr/conf/schema.xml +362 -0
- data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
- data/spec/test_app_templates/solr.yml +7 -0
- data/tasks/iiif_print_dev.rake +34 -0
- data/tmp/.keep +0 -0
- metadata +605 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'misc_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe IiifPrint::Data::WorkFiles do
|
|
5
|
+
include_context "shared setup"
|
|
6
|
+
|
|
7
|
+
let(:work) { work_with_file }
|
|
8
|
+
let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
|
|
9
|
+
let(:tiff_uri) { 'file://' + File.expand_path(tiff_path) }
|
|
10
|
+
|
|
11
|
+
describe "adapter composition" do
|
|
12
|
+
it "adapts work" do
|
|
13
|
+
adapter = described_class.new(work)
|
|
14
|
+
expect(adapter.work).to be work
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "adapts work with 'of' alt constructor" do
|
|
18
|
+
adapter = described_class.of(work)
|
|
19
|
+
expect(adapter.work).to be work
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
describe "path assignment queueing" do
|
|
24
|
+
it "queues assigned file path" do
|
|
25
|
+
adapter = described_class.of(work)
|
|
26
|
+
expect(adapter.assigned).to be_empty
|
|
27
|
+
# assign a valid source path
|
|
28
|
+
adapter.assign(tiff_path)
|
|
29
|
+
expect(adapter.assigned).to include tiff_path
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "will fail to assign file in non registered dir" do
|
|
33
|
+
adapter = described_class.new(work)
|
|
34
|
+
# need a non-registered file that exists:
|
|
35
|
+
bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
|
|
36
|
+
expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "queues a file:/// URI" do
|
|
40
|
+
adapter = described_class.of(work)
|
|
41
|
+
expect(adapter.assigned).to be_empty
|
|
42
|
+
adapter.assign(tiff_uri)
|
|
43
|
+
expect(adapter.assigned).to include tiff_uri
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "queues a Pathname, normalized to string" do
|
|
47
|
+
adapter = described_class.of(work)
|
|
48
|
+
expect(adapter.assigned).to be_empty
|
|
49
|
+
adapter.assign(Pathname.new(tiff_path))
|
|
50
|
+
expect(adapter.assigned).to include tiff_path
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it "unqueues a queued path" do
|
|
54
|
+
adapter = described_class.of(work)
|
|
55
|
+
adapter.assign(tiff_path)
|
|
56
|
+
expect(adapter.assigned).to include tiff_path
|
|
57
|
+
adapter.unassign(tiff_path)
|
|
58
|
+
expect(adapter.assigned).to be_empty
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe "hash/mapping-like file enumeration" do
|
|
63
|
+
it "has expected WorkFile in values for work" do
|
|
64
|
+
adapter = described_class.of(work)
|
|
65
|
+
values = adapter.values
|
|
66
|
+
expect(values).to be_an Array
|
|
67
|
+
expect(values.size).to eq 1
|
|
68
|
+
expect(values[0]).to be_an IiifPrint::Data::WorkFile
|
|
69
|
+
expect(values[0].parent).to be adapter
|
|
70
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
|
71
|
+
expect(values[0].fileset).to eq first_fileset
|
|
72
|
+
expect(values[0].unwrapped).to be_a Hydra::PCDM::File
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "has expected fileset keys for work" do
|
|
76
|
+
adapter = described_class.of(work)
|
|
77
|
+
keys = adapter.keys
|
|
78
|
+
expect(keys).to be_an Array
|
|
79
|
+
expect(keys[0]).to be_a String
|
|
80
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
|
81
|
+
expect(keys[0]).to eq first_fileset.id
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it "has expected entries for work" do
|
|
85
|
+
adapter = described_class.of(work)
|
|
86
|
+
entries = adapter.entries
|
|
87
|
+
expect(entries).to be_an Array
|
|
88
|
+
expect(entries[0]).to be_an Array
|
|
89
|
+
expect(entries[0].size).to eq 2
|
|
90
|
+
expect(entries[0][0]).to eq adapter.keys[0]
|
|
91
|
+
expect(entries[0][1]).to eq adapter.values[0]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "gets work file by fileset id" do
|
|
95
|
+
adapter = described_class.of(work)
|
|
96
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
|
97
|
+
fsid = adapter.keys[0]
|
|
98
|
+
expect(fsid).to eq first_fileset.id
|
|
99
|
+
work_file = adapter.get(fsid)
|
|
100
|
+
expect(work_file.unwrapped).to eq first_fileset.original_file
|
|
101
|
+
work_file = adapter[fsid]
|
|
102
|
+
expect(work_file.unwrapped).to eq first_fileset.original_file
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "gets work file by work-local filename" do
|
|
106
|
+
adapter = described_class.of(work)
|
|
107
|
+
first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
|
108
|
+
name = first_fileset.original_file.original_name
|
|
109
|
+
work_file = adapter.get(name)
|
|
110
|
+
expect(work_file).to eq adapter.get(first_fileset.id)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it "verifies inclusion of fileset id key" do
|
|
114
|
+
adapter = described_class.of(work)
|
|
115
|
+
fsid = adapter.keys[0]
|
|
116
|
+
expect(adapter.include?(fsid)).to be true
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
describe "assignment state" do
|
|
121
|
+
it "has empty state for work with no files" do
|
|
122
|
+
bare_work = MyWork.new
|
|
123
|
+
bare_work.title = ['No files to see here']
|
|
124
|
+
bare_work.save!
|
|
125
|
+
adapter = described_class.of(bare_work)
|
|
126
|
+
expect(adapter.keys.empty?).to be true
|
|
127
|
+
expect(adapter.state).to eq 'empty'
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "has 'dirty' state when files assigned" do
|
|
131
|
+
adapter = described_class.of(work)
|
|
132
|
+
expect(adapter.state).to eq 'saved'
|
|
133
|
+
adapter.assign(tiff_path)
|
|
134
|
+
# changes to dirty
|
|
135
|
+
expect(adapter.state).to eq 'dirty'
|
|
136
|
+
# unassign path again to empty assigned queue:
|
|
137
|
+
adapter.unassign(tiff_path)
|
|
138
|
+
# no we are back to 'saved' since no changes are queued now:
|
|
139
|
+
expect(adapter.state).to eq 'saved'
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
describe "commits changes" do
|
|
144
|
+
# We need to register these jobs to run now, at minimum:
|
|
145
|
+
do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
|
|
146
|
+
# These we skip: [CharacterizeJob, CreateDerivativesJob]
|
|
147
|
+
# -- skipping these saves 10-15 seconds on attachment example
|
|
148
|
+
|
|
149
|
+
permission_methods = [
|
|
150
|
+
:edit_users,
|
|
151
|
+
:read_users,
|
|
152
|
+
:discover_users,
|
|
153
|
+
:edit_groups,
|
|
154
|
+
:read_groups,
|
|
155
|
+
:discover_groups
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
let(:bare_work) do
|
|
159
|
+
bare_work = MyWork.new
|
|
160
|
+
bare_work.title = ['No files to see here']
|
|
161
|
+
bare_work.save!
|
|
162
|
+
bare_work
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
it "commits unassign (file deletions)" do
|
|
166
|
+
adapter = described_class.of(work)
|
|
167
|
+
expect(adapter.keys.size).to eq 1
|
|
168
|
+
adapter.unassign(adapter.keys[0])
|
|
169
|
+
adapter.commit!
|
|
170
|
+
expect(adapter.keys.size).to eq 0
|
|
171
|
+
expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 0
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
context "when it is a new work" do
|
|
175
|
+
it "commit for assignment invokes actor stack" do
|
|
176
|
+
work = MyWork.new(title: ['Just a new work'])
|
|
177
|
+
adapter = described_class.of(work)
|
|
178
|
+
adapter.assign(tiff_path)
|
|
179
|
+
allow(Hyrax::CurationConcern.actor).to receive(:create).and_return(true)
|
|
180
|
+
expect(Hyrax::CurationConcern.actor).to receive(:create)
|
|
181
|
+
expect(adapter.commit!).to be true
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
context "when the work already exists" do
|
|
186
|
+
it "commit for assignment invokes actor stack" do
|
|
187
|
+
work = bare_work
|
|
188
|
+
adapter = described_class.of(work)
|
|
189
|
+
adapter.assign(tiff_path)
|
|
190
|
+
allow(Hyrax::CurationConcern.actor).to receive(:update).and_return(true)
|
|
191
|
+
expect(Hyrax::CurationConcern.actor).to receive(:update)
|
|
192
|
+
expect(adapter.commit!).to be true
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
xit "commits successful file attachment", perform_enqueued: do_now_jobs do
|
|
197
|
+
work = bare_work
|
|
198
|
+
adapter = described_class.of(work)
|
|
199
|
+
adapter.assign(tiff_path)
|
|
200
|
+
adapter.commit!
|
|
201
|
+
# registered jobs (do_now_jobs) performed as effect of commit!
|
|
202
|
+
# are configured to effectively run inline. Reloading work
|
|
203
|
+
# should refresh the work.members, and by consequence adapter.keys
|
|
204
|
+
work.reload
|
|
205
|
+
expect(adapter.keys.size).to eq 1
|
|
206
|
+
expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 1
|
|
207
|
+
expect(adapter.names).to include 'ocr_gray.tiff'
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
xit "copies work perimssions to fileset", perform_enqueued: do_now_jobs do
|
|
211
|
+
adapter = described_class.of(bare_work)
|
|
212
|
+
adapter.assign(tiff_path)
|
|
213
|
+
adapter.commit!
|
|
214
|
+
bare_work.reload
|
|
215
|
+
fileset = bare_work.members.detect { |m| m.is_a?(FileSet) }
|
|
216
|
+
permission_methods.each do |m|
|
|
217
|
+
expect(fileset.send(m)).to match_array bare_work.send(m)
|
|
218
|
+
end
|
|
219
|
+
expect(fileset.visibility).to eq bare_work.visibility
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
describe "derivative access" do
|
|
224
|
+
it "gets derivatives for first fileset" do
|
|
225
|
+
fileset = work.members.detect { |m| m.is_a?(FileSet) }
|
|
226
|
+
adapter = described_class.of(work)
|
|
227
|
+
# adapts same context(s):
|
|
228
|
+
expect(adapter.derivatives.fileset.id).to eq fileset.id
|
|
229
|
+
expect(adapter.derivatives.work).to be work
|
|
230
|
+
expect(adapter.derivatives.class).to eq \
|
|
231
|
+
IiifPrint::Data::WorkDerivatives
|
|
232
|
+
# transitive parent/child relationship, can traverse to adapter from
|
|
233
|
+
# derivatives:
|
|
234
|
+
expect(adapter.derivatives.parent.parent).to be adapter
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'tmpdir'
|
|
3
|
+
|
|
4
|
+
describe IiifPrint::ImageTool do
|
|
5
|
+
let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
|
|
6
|
+
|
|
7
|
+
# Image fixtures to test identification, metadata extraction for:
|
|
8
|
+
let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
|
|
9
|
+
let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
|
|
10
|
+
let(:gray_tiff) { File.join(fixtures, 'ocr_gray.tiff') }
|
|
11
|
+
let(:mono_tiff) { File.join(fixtures, 'ocr_mono.tiff') }
|
|
12
|
+
let(:color_tiff) { File.join(fixtures, '4.1.07.tiff') }
|
|
13
|
+
let(:pdf) { File.join(fixtures, 'minimal-1-page.pdf') }
|
|
14
|
+
|
|
15
|
+
describe "Extracts metadata with JP2 backend" do
|
|
16
|
+
it "constructs with a path" do
|
|
17
|
+
identify = described_class.new(gray_jp2)
|
|
18
|
+
expect(identify.path).to eq gray_jp2
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "gets metadata for grayscale JP2 image" do
|
|
22
|
+
result = described_class.new(gray_jp2).metadata
|
|
23
|
+
expect(result[:color]).to eq 'gray'
|
|
24
|
+
expect(result[:width]).to eq 418
|
|
25
|
+
expect(result[:height]).to eq 1046
|
|
26
|
+
expect(result[:bits_per_component]).to eq 8
|
|
27
|
+
expect(result[:num_components]).to eq 1
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "gets metadata for color JP2 image" do
|
|
31
|
+
result = described_class.new(color_jp2).metadata
|
|
32
|
+
expect(result[:color]).to eq 'color'
|
|
33
|
+
expect(result[:width]).to eq 256
|
|
34
|
+
expect(result[:height]).to eq 256
|
|
35
|
+
expect(result[:bits_per_component]).to eq 8
|
|
36
|
+
# e.g. is 3, but would be four if sample image had an alpha channel
|
|
37
|
+
expect(result[:num_components]).to eq 3
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
describe "Extracts metadata for non-JP2 images with imagemagick" do
|
|
42
|
+
it "gets metadata for gray TIFF image" do
|
|
43
|
+
result = described_class.new(gray_tiff).metadata
|
|
44
|
+
expect(result[:color]).to eq 'gray'
|
|
45
|
+
expect(result[:width]).to eq 418
|
|
46
|
+
expect(result[:height]).to eq 1046
|
|
47
|
+
expect(result[:bits_per_component]).to eq 8
|
|
48
|
+
expect(result[:num_components]).to eq 1
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "gets metadata for monochrome TIFF image" do
|
|
52
|
+
result = described_class.new(mono_tiff).metadata
|
|
53
|
+
expect(result[:color]).to eq 'monochrome'
|
|
54
|
+
expect(result[:width]).to eq 1261
|
|
55
|
+
expect(result[:height]).to eq 1744
|
|
56
|
+
expect(result[:bits_per_component]).to eq 1
|
|
57
|
+
expect(result[:num_components]).to eq 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "gets metadata for color TIFF image" do
|
|
61
|
+
result = described_class.new(color_tiff).metadata
|
|
62
|
+
expect(result[:color]).to eq 'color'
|
|
63
|
+
expect(result[:width]).to eq 256
|
|
64
|
+
expect(result[:height]).to eq 256
|
|
65
|
+
expect(result[:bits_per_component]).to eq 8
|
|
66
|
+
# e.g. is 3, but would be four if sample image had an alpha channel
|
|
67
|
+
expect(result[:num_components]).to eq 3
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "detects mime type of pdf" do
|
|
71
|
+
result = described_class.new(pdf).metadata
|
|
72
|
+
expect(result[:content_type]).to eq 'application/pdf'
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
describe "converts images" do
|
|
77
|
+
it "makes a monochrome TIFF from JP2" do
|
|
78
|
+
tool = described_class.new(gray_jp2)
|
|
79
|
+
dest = File.join(Dir.mktmpdir, 'mono.tif')
|
|
80
|
+
tool.convert(dest, true)
|
|
81
|
+
expect(File.exist?(dest)).to be true
|
|
82
|
+
expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
it "makes a gray TIFF from JP2" do
|
|
86
|
+
tool = described_class.new(gray_jp2)
|
|
87
|
+
dest = File.join(Dir.mktmpdir, 'gray.tif')
|
|
88
|
+
tool.convert(dest, false)
|
|
89
|
+
expect(File.exist?(dest)).to be true
|
|
90
|
+
expect(described_class.new(dest).metadata[:color]).to eq 'gray'
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it "makes a monochrome TIFF from grayscale TIFF" do
|
|
94
|
+
tool = described_class.new(gray_tiff)
|
|
95
|
+
dest = File.join(Dir.mktmpdir, 'mono.tif')
|
|
96
|
+
tool.convert(dest, true)
|
|
97
|
+
expect(File.exist?(dest)).to be true
|
|
98
|
+
expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Not yet supported to use this tool to make JP2, for now the only
|
|
102
|
+
# component in IiifPrint doing that is
|
|
103
|
+
# IiifPrint::JP2DerivativeService
|
|
104
|
+
it "raises error on JP2 destination" do
|
|
105
|
+
expect { described_class.new(gray_tiff).convert('out.jp2') }.to \
|
|
106
|
+
raise_error(RuntimeError)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'misc_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe IiifPrint::Jobs::ChildWorksFromPdfJob do
|
|
5
|
+
# TODO: add specs
|
|
6
|
+
let(:work) { WorkWithIiifPrintConfig.new(title: ['required title']) }
|
|
7
|
+
let(:my_user) { build(:user) }
|
|
8
|
+
let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
|
|
9
|
+
let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
|
|
10
|
+
let(:pdf_paths) do
|
|
11
|
+
uploads = Hyrax::UploadedFile.find(uploaded_file_ids)
|
|
12
|
+
upload_paths = uploads.map { |upload| upload.file.file.file }
|
|
13
|
+
upload_paths.select { |path| path.end_with?('.pdf', '.PDF') }
|
|
14
|
+
end
|
|
15
|
+
let(:admin_set_id) { "admin_set/default" }
|
|
16
|
+
let(:prior_pdfs) { 0 }
|
|
17
|
+
|
|
18
|
+
let(:subject) { described_class.perform(work, paths, user, admin_set_id, prior_pdfs) }
|
|
19
|
+
|
|
20
|
+
describe '#perform' do
|
|
21
|
+
xit 'calls pdf splitter service with path' do
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
xit 'submits one BatchCreateJob per PDF' do
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
xit 'submits IiifPrint::Jobs::CreateRelationshipsJob' do
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'misc_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe IiifPrint::Jobs::CreateRelationshipsJob do
|
|
5
|
+
# TODO: add specs
|
|
6
|
+
let(:parent) { WorkWithIiifPrintConfig.new(title: ['required title']) }
|
|
7
|
+
let(:my_user) { build(:user) }
|
|
8
|
+
let(:parent_model) { WorkWithIiifPrintConfig }
|
|
9
|
+
let(:child_model) { WorkWithIiifPrintConfig }
|
|
10
|
+
|
|
11
|
+
let(:subject) { described_class.perform(user: my_user, parent_id: parent.id, parent_model: parent_model, child_model: child_model) }
|
|
12
|
+
|
|
13
|
+
describe '#perform' do
|
|
14
|
+
xit 'loads all child work ids into ordered_members' do
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe IiifPrint::JP2ImageMetadata do
|
|
4
|
+
let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
|
|
5
|
+
|
|
6
|
+
let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
|
|
7
|
+
|
|
8
|
+
let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
|
|
9
|
+
|
|
10
|
+
describe "Extracts technical metadata from a JP2 file" do
|
|
11
|
+
it "constructs with a path" do
|
|
12
|
+
meta = described_class.new(gray_jp2)
|
|
13
|
+
expect(meta.path).to eq gray_jp2
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "gets metadata for grayscale image" do
|
|
17
|
+
meta = described_class.new(gray_jp2)
|
|
18
|
+
result = meta.technical_metadata
|
|
19
|
+
expect(result[:color]).to eq 'gray'
|
|
20
|
+
expect(result[:width]).to eq 418
|
|
21
|
+
expect(result[:height]).to eq 1046
|
|
22
|
+
expect(result[:bits_per_component]).to eq 8
|
|
23
|
+
expect(result[:num_components]).to eq 1
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "gets metadata for color image" do
|
|
27
|
+
meta = described_class.new(color_jp2)
|
|
28
|
+
result = meta.technical_metadata
|
|
29
|
+
expect(result[:color]).to eq 'color'
|
|
30
|
+
expect(result[:width]).to eq 256
|
|
31
|
+
expect(result[:height]).to eq 256
|
|
32
|
+
expect(result[:bits_per_component]).to eq 8
|
|
33
|
+
# e.g. is 3, but would be four if sample image had an alpha channel
|
|
34
|
+
expect(result[:num_components]).to eq 3
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe IiifPrint::Metadata do
|
|
4
|
+
let(:base_url) { "https://my.dev.test" }
|
|
5
|
+
let(:solr_document) { SolrDocument.new(attributes) }
|
|
6
|
+
let(:fields) do
|
|
7
|
+
metadata_fields.map do |field|
|
|
8
|
+
SampleField.new(
|
|
9
|
+
name: field.first,
|
|
10
|
+
label: Hyrax::Renderers::AttributeRenderer.new(field, nil).label,
|
|
11
|
+
options: field.last
|
|
12
|
+
)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
let(:metadata_fields) do
|
|
16
|
+
{
|
|
17
|
+
title: {},
|
|
18
|
+
description: {},
|
|
19
|
+
date_modified: {}
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
SampleField = Struct.new(:name, :label, :options, keyword_init: true)
|
|
24
|
+
|
|
25
|
+
describe ".build_metadata_for" do
|
|
26
|
+
subject(:manifest_metadata) do
|
|
27
|
+
described_class.build_metadata_for(
|
|
28
|
+
work: solr_document,
|
|
29
|
+
version: version,
|
|
30
|
+
fields: fields,
|
|
31
|
+
current_ability: double(Ability),
|
|
32
|
+
base_url: base_url
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
context "for version 2 of the IIIF spec" do
|
|
37
|
+
let(:version) { 2 }
|
|
38
|
+
|
|
39
|
+
context "with a field that has some plain text" do
|
|
40
|
+
let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
|
|
41
|
+
|
|
42
|
+
it "maps the metadata accordingly" do
|
|
43
|
+
expect(manifest_metadata).to eq [
|
|
44
|
+
{ "label" => "Title", "value" => ["My Awesome Title"] }
|
|
45
|
+
]
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
context "with a field that contains a url string" do
|
|
50
|
+
let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
|
|
51
|
+
|
|
52
|
+
it "creates a link for the url string" do
|
|
53
|
+
expect(manifest_metadata).to eq [
|
|
54
|
+
{ "label" => "Description",
|
|
55
|
+
"value" =>
|
|
56
|
+
[
|
|
57
|
+
"A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"
|
|
58
|
+
] }
|
|
59
|
+
]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
context "with a date" do
|
|
64
|
+
let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
|
|
65
|
+
|
|
66
|
+
it "displays it just the date" do
|
|
67
|
+
expect(manifest_metadata).to eq [{ "label" => "Date modified", "value" => ["2011-11-11"] }]
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
context "with a faceted option" do
|
|
72
|
+
let(:metadata_fields) { { creator: { render_as: :faceted } } }
|
|
73
|
+
let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
|
|
74
|
+
|
|
75
|
+
it "adds a link to the faceted search" do
|
|
76
|
+
expect(manifest_metadata). to eq [
|
|
77
|
+
{ "label" => "Creator",
|
|
78
|
+
"value" =>
|
|
79
|
+
["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
|
|
80
|
+
]
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
context "when the work is apart of a collection" do
|
|
85
|
+
let(:metadata_fields) { { collection: {} } }
|
|
86
|
+
let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
|
|
87
|
+
let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
|
|
88
|
+
let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
|
|
89
|
+
|
|
90
|
+
it "renders a link to the collection" do
|
|
91
|
+
allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
|
|
92
|
+
expect(manifest_metadata).to eq [
|
|
93
|
+
{ "label" => "Collection",
|
|
94
|
+
"value" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] }
|
|
95
|
+
]
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
context "for version 3 of the IIIF spec", skip: "version 3 metadata not implemented yet" do
|
|
101
|
+
let(:version) { 3 }
|
|
102
|
+
|
|
103
|
+
it "maps the metadata accordingly" do
|
|
104
|
+
# NOTE: this assumes the I18n.locale is set as :en
|
|
105
|
+
expect(manifest_metadata).to eq [
|
|
106
|
+
{ "label" => { "en" => ["Title"] }, "value" => { "none" => ["My Awesome Title"] } },
|
|
107
|
+
{ "label" => { "en" => ["Description"] },
|
|
108
|
+
"value" => { "none" => ["This is and awesome description"] } },
|
|
109
|
+
{ "label" => { "en" => ["Date modified"] }, "value" => { "none" => ["2011-11-11"] } },
|
|
110
|
+
{ "label" => { "en" => ["Creator"] }, "value" => { "none" => ["McAuthor, Arthur"] } }
|
|
111
|
+
]
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'spec_helper'
|
|
3
|
+
|
|
4
|
+
RSpec.describe IiifPrint::TextExtraction::AltoReader do
|
|
5
|
+
let(:fixture_path) do
|
|
6
|
+
File.join(
|
|
7
|
+
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
|
8
|
+
)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
|
|
12
|
+
let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
|
|
13
|
+
let(:minimal) { File.read(minimal_path) }
|
|
14
|
+
|
|
15
|
+
let(:reader_minimal) { described_class.new(minimal) }
|
|
16
|
+
let(:reader_minimal_path) { described_class.new(minimal_path) }
|
|
17
|
+
let(:reader_ndnp) { described_class.new(ndnp_alto_path) }
|
|
18
|
+
|
|
19
|
+
describe "reads alto" do
|
|
20
|
+
it "loads ALTO source" do
|
|
21
|
+
expect(reader_minimal_path.source).to eq reader_minimal.source
|
|
22
|
+
expect(reader_minimal_path.source.size).to eq 1383
|
|
23
|
+
expect(reader_ndnp.source.size).to eq 1_050_876
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "loads document stream" do
|
|
27
|
+
expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
|
|
28
|
+
expect(reader_minimal_path.doc_stream).to respond_to :text
|
|
29
|
+
expect(reader_minimal_path.doc_stream).to respond_to :words
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
describe "outputs text derivative formats" do
|
|
34
|
+
it "outputs plain text" do
|
|
35
|
+
# try simple flat text input
|
|
36
|
+
expect(reader_minimal.text).to eq "This is only a test."
|
|
37
|
+
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
|
38
|
+
# try more complex input
|
|
39
|
+
expect(reader_ndnp.text.size).to eq 30_519
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it "passes args to WordCoordsBuilder and receives output" do
|
|
43
|
+
parsed = JSON.parse(reader_minimal.json)
|
|
44
|
+
expect(parsed['coords'].length).to be > 1
|
|
45
|
+
parsed = JSON.parse(reader_ndnp.json)
|
|
46
|
+
expect(parsed['coords'].size).to eq 2_125
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe IiifPrint::TextExtraction::HOCRReader do
|
|
6
|
+
let(:fixture_path) do
|
|
7
|
+
File.join(
|
|
8
|
+
IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
|
|
9
|
+
)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
let(:minimal_path) { File.join(fixture_path, 'ocr_mono_text_hocr.html') }
|
|
13
|
+
let(:minimal) { File.read(minimal_path) }
|
|
14
|
+
|
|
15
|
+
let(:reader_minimal) { described_class.new(minimal) }
|
|
16
|
+
let(:reader_minimal_path) { described_class.new(minimal_path) }
|
|
17
|
+
|
|
18
|
+
describe "reads hOCR" do
|
|
19
|
+
it "loads hOCR either from path or source text" do
|
|
20
|
+
expect(reader_minimal_path.source).to eq reader_minimal.source
|
|
21
|
+
# size here is in Unicode characters, not bytes:
|
|
22
|
+
expect(reader_minimal_path.source.size).to eq 16_590
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "loads document stream" do
|
|
26
|
+
expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
|
|
27
|
+
expect(reader_minimal_path.doc_stream).to respond_to :text
|
|
28
|
+
expect(reader_minimal_path.doc_stream).to respond_to :words
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
describe "outputs text derivative formats" do
|
|
33
|
+
it "outputs plain text" do
|
|
34
|
+
plain_text = reader_minimal.text
|
|
35
|
+
expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. "
|
|
36
|
+
expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
|
|
37
|
+
expect(reader_minimal.text.size).to eq 831
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "passes args to WordCoordsBuilder and receives output" do
|
|
41
|
+
parsed = JSON.parse(reader_minimal.json)
|
|
42
|
+
expect(parsed['coords'].length).to be > 1
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|