iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,237 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::Data::WorkFiles do
5
+ include_context "shared setup"
6
+
7
+ let(:work) { work_with_file }
8
+ let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
9
+ let(:tiff_uri) { 'file://' + File.expand_path(tiff_path) }
10
+
11
+ describe "adapter composition" do
12
+ it "adapts work" do
13
+ adapter = described_class.new(work)
14
+ expect(adapter.work).to be work
15
+ end
16
+
17
+ it "adapts work with 'of' alt constructor" do
18
+ adapter = described_class.of(work)
19
+ expect(adapter.work).to be work
20
+ end
21
+ end
22
+
23
+ describe "path assignment queueing" do
24
+ it "queues assigned file path" do
25
+ adapter = described_class.of(work)
26
+ expect(adapter.assigned).to be_empty
27
+ # assign a valid source path
28
+ adapter.assign(tiff_path)
29
+ expect(adapter.assigned).to include tiff_path
30
+ end
31
+
32
+ it "will fail to assign file in non registered dir" do
33
+ adapter = described_class.new(work)
34
+ # need a non-registered file that exists:
35
+ bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
36
+ expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
37
+ end
38
+
39
+ it "queues a file:/// URI" do
40
+ adapter = described_class.of(work)
41
+ expect(adapter.assigned).to be_empty
42
+ adapter.assign(tiff_uri)
43
+ expect(adapter.assigned).to include tiff_uri
44
+ end
45
+
46
+ it "queues a Pathname, normalized to string" do
47
+ adapter = described_class.of(work)
48
+ expect(adapter.assigned).to be_empty
49
+ adapter.assign(Pathname.new(tiff_path))
50
+ expect(adapter.assigned).to include tiff_path
51
+ end
52
+
53
+ it "unqueues a queued path" do
54
+ adapter = described_class.of(work)
55
+ adapter.assign(tiff_path)
56
+ expect(adapter.assigned).to include tiff_path
57
+ adapter.unassign(tiff_path)
58
+ expect(adapter.assigned).to be_empty
59
+ end
60
+ end
61
+
62
+ describe "hash/mapping-like file enumeration" do
63
+ it "has expected WorkFile in values for work" do
64
+ adapter = described_class.of(work)
65
+ values = adapter.values
66
+ expect(values).to be_an Array
67
+ expect(values.size).to eq 1
68
+ expect(values[0]).to be_an IiifPrint::Data::WorkFile
69
+ expect(values[0].parent).to be adapter
70
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
71
+ expect(values[0].fileset).to eq first_fileset
72
+ expect(values[0].unwrapped).to be_a Hydra::PCDM::File
73
+ end
74
+
75
+ it "has expected fileset keys for work" do
76
+ adapter = described_class.of(work)
77
+ keys = adapter.keys
78
+ expect(keys).to be_an Array
79
+ expect(keys[0]).to be_a String
80
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
81
+ expect(keys[0]).to eq first_fileset.id
82
+ end
83
+
84
+ it "has expected entries for work" do
85
+ adapter = described_class.of(work)
86
+ entries = adapter.entries
87
+ expect(entries).to be_an Array
88
+ expect(entries[0]).to be_an Array
89
+ expect(entries[0].size).to eq 2
90
+ expect(entries[0][0]).to eq adapter.keys[0]
91
+ expect(entries[0][1]).to eq adapter.values[0]
92
+ end
93
+
94
+ it "gets work file by fileset id" do
95
+ adapter = described_class.of(work)
96
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
97
+ fsid = adapter.keys[0]
98
+ expect(fsid).to eq first_fileset.id
99
+ work_file = adapter.get(fsid)
100
+ expect(work_file.unwrapped).to eq first_fileset.original_file
101
+ work_file = adapter[fsid]
102
+ expect(work_file.unwrapped).to eq first_fileset.original_file
103
+ end
104
+
105
+ it "gets work file by work-local filename" do
106
+ adapter = described_class.of(work)
107
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
108
+ name = first_fileset.original_file.original_name
109
+ work_file = adapter.get(name)
110
+ expect(work_file).to eq adapter.get(first_fileset.id)
111
+ end
112
+
113
+ it "verifies inclusion of fileset id key" do
114
+ adapter = described_class.of(work)
115
+ fsid = adapter.keys[0]
116
+ expect(adapter.include?(fsid)).to be true
117
+ end
118
+ end
119
+
120
+ describe "assignment state" do
121
+ it "has empty state for work with no files" do
122
+ bare_work = MyWork.new
123
+ bare_work.title = ['No files to see here']
124
+ bare_work.save!
125
+ adapter = described_class.of(bare_work)
126
+ expect(adapter.keys.empty?).to be true
127
+ expect(adapter.state).to eq 'empty'
128
+ end
129
+
130
+ it "has 'dirty' state when files assigned" do
131
+ adapter = described_class.of(work)
132
+ expect(adapter.state).to eq 'saved'
133
+ adapter.assign(tiff_path)
134
+ # changes to dirty
135
+ expect(adapter.state).to eq 'dirty'
136
+ # unassign path again to empty assigned queue:
137
+ adapter.unassign(tiff_path)
138
+ # no we are back to 'saved' since no changes are queued now:
139
+ expect(adapter.state).to eq 'saved'
140
+ end
141
+ end
142
+
143
+ describe "commits changes" do
144
+ # We need to register these jobs to run now, at minimum:
145
+ do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
146
+ # These we skip: [CharacterizeJob, CreateDerivativesJob]
147
+ # -- skipping these saves 10-15 seconds on attachment example
148
+
149
+ permission_methods = [
150
+ :edit_users,
151
+ :read_users,
152
+ :discover_users,
153
+ :edit_groups,
154
+ :read_groups,
155
+ :discover_groups
156
+ ]
157
+
158
+ let(:bare_work) do
159
+ bare_work = MyWork.new
160
+ bare_work.title = ['No files to see here']
161
+ bare_work.save!
162
+ bare_work
163
+ end
164
+
165
+ it "commits unassign (file deletions)" do
166
+ adapter = described_class.of(work)
167
+ expect(adapter.keys.size).to eq 1
168
+ adapter.unassign(adapter.keys[0])
169
+ adapter.commit!
170
+ expect(adapter.keys.size).to eq 0
171
+ expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 0
172
+ end
173
+
174
+ context "when it is a new work" do
175
+ it "commit for assignment invokes actor stack" do
176
+ work = MyWork.new(title: ['Just a new work'])
177
+ adapter = described_class.of(work)
178
+ adapter.assign(tiff_path)
179
+ allow(Hyrax::CurationConcern.actor).to receive(:create).and_return(true)
180
+ expect(Hyrax::CurationConcern.actor).to receive(:create)
181
+ expect(adapter.commit!).to be true
182
+ end
183
+ end
184
+
185
+ context "when the work already exists" do
186
+ it "commit for assignment invokes actor stack" do
187
+ work = bare_work
188
+ adapter = described_class.of(work)
189
+ adapter.assign(tiff_path)
190
+ allow(Hyrax::CurationConcern.actor).to receive(:update).and_return(true)
191
+ expect(Hyrax::CurationConcern.actor).to receive(:update)
192
+ expect(adapter.commit!).to be true
193
+ end
194
+ end
195
+
196
+ xit "commits successful file attachment", perform_enqueued: do_now_jobs do
197
+ work = bare_work
198
+ adapter = described_class.of(work)
199
+ adapter.assign(tiff_path)
200
+ adapter.commit!
201
+ # registered jobs (do_now_jobs) performed as effect of commit!
202
+ # are configured to effectively run inline. Reloading work
203
+ # should refresh the work.members, and by consequence adapter.keys
204
+ work.reload
205
+ expect(adapter.keys.size).to eq 1
206
+ expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 1
207
+ expect(adapter.names).to include 'ocr_gray.tiff'
208
+ end
209
+
210
+ xit "copies work perimssions to fileset", perform_enqueued: do_now_jobs do
211
+ adapter = described_class.of(bare_work)
212
+ adapter.assign(tiff_path)
213
+ adapter.commit!
214
+ bare_work.reload
215
+ fileset = bare_work.members.detect { |m| m.is_a?(FileSet) }
216
+ permission_methods.each do |m|
217
+ expect(fileset.send(m)).to match_array bare_work.send(m)
218
+ end
219
+ expect(fileset.visibility).to eq bare_work.visibility
220
+ end
221
+ end
222
+
223
+ describe "derivative access" do
224
+ it "gets derivatives for first fileset" do
225
+ fileset = work.members.detect { |m| m.is_a?(FileSet) }
226
+ adapter = described_class.of(work)
227
+ # adapts same context(s):
228
+ expect(adapter.derivatives.fileset.id).to eq fileset.id
229
+ expect(adapter.derivatives.work).to be work
230
+ expect(adapter.derivatives.class).to eq \
231
+ IiifPrint::Data::WorkDerivatives
232
+ # transitive parent/child relationship, can traverse to adapter from
233
+ # derivatives:
234
+ expect(adapter.derivatives.parent.parent).to be adapter
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,109 @@
1
+ require 'spec_helper'
2
+ require 'tmpdir'
3
+
4
+ describe IiifPrint::ImageTool do
5
+ let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
6
+
7
+ # Image fixtures to test identification, metadata extraction for:
8
+ let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
9
+ let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
10
+ let(:gray_tiff) { File.join(fixtures, 'ocr_gray.tiff') }
11
+ let(:mono_tiff) { File.join(fixtures, 'ocr_mono.tiff') }
12
+ let(:color_tiff) { File.join(fixtures, '4.1.07.tiff') }
13
+ let(:pdf) { File.join(fixtures, 'minimal-1-page.pdf') }
14
+
15
+ describe "Extracts metadata with JP2 backend" do
16
+ it "constructs with a path" do
17
+ identify = described_class.new(gray_jp2)
18
+ expect(identify.path).to eq gray_jp2
19
+ end
20
+
21
+ it "gets metadata for grayscale JP2 image" do
22
+ result = described_class.new(gray_jp2).metadata
23
+ expect(result[:color]).to eq 'gray'
24
+ expect(result[:width]).to eq 418
25
+ expect(result[:height]).to eq 1046
26
+ expect(result[:bits_per_component]).to eq 8
27
+ expect(result[:num_components]).to eq 1
28
+ end
29
+
30
+ it "gets metadata for color JP2 image" do
31
+ result = described_class.new(color_jp2).metadata
32
+ expect(result[:color]).to eq 'color'
33
+ expect(result[:width]).to eq 256
34
+ expect(result[:height]).to eq 256
35
+ expect(result[:bits_per_component]).to eq 8
36
+ # e.g. is 3, but would be four if sample image had an alpha channel
37
+ expect(result[:num_components]).to eq 3
38
+ end
39
+ end
40
+
41
+ describe "Extracts metadata for non-JP2 images with imagemagick" do
42
+ it "gets metadata for gray TIFF image" do
43
+ result = described_class.new(gray_tiff).metadata
44
+ expect(result[:color]).to eq 'gray'
45
+ expect(result[:width]).to eq 418
46
+ expect(result[:height]).to eq 1046
47
+ expect(result[:bits_per_component]).to eq 8
48
+ expect(result[:num_components]).to eq 1
49
+ end
50
+
51
+ it "gets metadata for monochrome TIFF image" do
52
+ result = described_class.new(mono_tiff).metadata
53
+ expect(result[:color]).to eq 'monochrome'
54
+ expect(result[:width]).to eq 1261
55
+ expect(result[:height]).to eq 1744
56
+ expect(result[:bits_per_component]).to eq 1
57
+ expect(result[:num_components]).to eq 1
58
+ end
59
+
60
+ it "gets metadata for color TIFF image" do
61
+ result = described_class.new(color_tiff).metadata
62
+ expect(result[:color]).to eq 'color'
63
+ expect(result[:width]).to eq 256
64
+ expect(result[:height]).to eq 256
65
+ expect(result[:bits_per_component]).to eq 8
66
+ # e.g. is 3, but would be four if sample image had an alpha channel
67
+ expect(result[:num_components]).to eq 3
68
+ end
69
+
70
+ it "detects mime type of pdf" do
71
+ result = described_class.new(pdf).metadata
72
+ expect(result[:content_type]).to eq 'application/pdf'
73
+ end
74
+ end
75
+
76
+ describe "converts images" do
77
+ it "makes a monochrome TIFF from JP2" do
78
+ tool = described_class.new(gray_jp2)
79
+ dest = File.join(Dir.mktmpdir, 'mono.tif')
80
+ tool.convert(dest, true)
81
+ expect(File.exist?(dest)).to be true
82
+ expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
83
+ end
84
+
85
+ it "makes a gray TIFF from JP2" do
86
+ tool = described_class.new(gray_jp2)
87
+ dest = File.join(Dir.mktmpdir, 'gray.tif')
88
+ tool.convert(dest, false)
89
+ expect(File.exist?(dest)).to be true
90
+ expect(described_class.new(dest).metadata[:color]).to eq 'gray'
91
+ end
92
+
93
+ it "makes a monochrome TIFF from grayscale TIFF" do
94
+ tool = described_class.new(gray_tiff)
95
+ dest = File.join(Dir.mktmpdir, 'mono.tif')
96
+ tool.convert(dest, true)
97
+ expect(File.exist?(dest)).to be true
98
+ expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
99
+ end
100
+
101
+ # Not yet supported to use this tool to make JP2, for now the only
102
+ # component in IiifPrint doing that is
103
+ # IiifPrint::JP2DerivativeService
104
+ it "raises error on JP2 destination" do
105
+ expect { described_class.new(gray_tiff).convert('out.jp2') }.to \
106
+ raise_error(RuntimeError)
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::Jobs::ChildWorksFromPdfJob do
5
+ # TODO: add specs
6
+ let(:work) { WorkWithIiifPrintConfig.new(title: ['required title']) }
7
+ let(:my_user) { build(:user) }
8
+ let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
9
+ let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
10
+ let(:pdf_paths) do
11
+ uploads = Hyrax::UploadedFile.find(uploaded_file_ids)
12
+ upload_paths = uploads.map { |upload| upload.file.file.file }
13
+ upload_paths.select { |path| path.end_with?('.pdf', '.PDF') }
14
+ end
15
+ let(:admin_set_id) { "admin_set/default" }
16
+ let(:prior_pdfs) { 0 }
17
+
18
+ let(:subject) { described_class.perform(work, paths, user, admin_set_id, prior_pdfs) }
19
+
20
+ describe '#perform' do
21
+ xit 'calls pdf splitter service with path' do
22
+ end
23
+
24
+ xit 'submits one BatchCreateJob per PDF' do
25
+ end
26
+
27
+ xit 'submits IiifPrint::Jobs::CreateRelationshipsJob' do
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::Jobs::CreateRelationshipsJob do
5
+ # TODO: add specs
6
+ let(:parent) { WorkWithIiifPrintConfig.new(title: ['required title']) }
7
+ let(:my_user) { build(:user) }
8
+ let(:parent_model) { WorkWithIiifPrintConfig }
9
+ let(:child_model) { WorkWithIiifPrintConfig }
10
+
11
+ let(:subject) { described_class.perform(user: my_user, parent_id: parent.id, parent_model: parent_model, child_model: child_model) }
12
+
13
+ describe '#perform' do
14
+ xit 'loads all child work ids into ordered_members' do
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ describe IiifPrint::JP2ImageMetadata do
4
+ let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
5
+
6
+ let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
7
+
8
+ let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
9
+
10
+ describe "Extracts technical metadata from a JP2 file" do
11
+ it "constructs with a path" do
12
+ meta = described_class.new(gray_jp2)
13
+ expect(meta.path).to eq gray_jp2
14
+ end
15
+
16
+ it "gets metadata for grayscale image" do
17
+ meta = described_class.new(gray_jp2)
18
+ result = meta.technical_metadata
19
+ expect(result[:color]).to eq 'gray'
20
+ expect(result[:width]).to eq 418
21
+ expect(result[:height]).to eq 1046
22
+ expect(result[:bits_per_component]).to eq 8
23
+ expect(result[:num_components]).to eq 1
24
+ end
25
+
26
+ it "gets metadata for color image" do
27
+ meta = described_class.new(color_jp2)
28
+ result = meta.technical_metadata
29
+ expect(result[:color]).to eq 'color'
30
+ expect(result[:width]).to eq 256
31
+ expect(result[:height]).to eq 256
32
+ expect(result[:bits_per_component]).to eq 8
33
+ # e.g. is 3, but would be four if sample image had an alpha channel
34
+ expect(result[:num_components]).to eq 3
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::LineageService do
6
+ describe '.ancestor_ids_for' do
7
+ xit 'works'
8
+ end
9
+
10
+ describe '.descendent_file_set_ids_for' do
11
+ xit 'works'
12
+ end
13
+ end
@@ -0,0 +1,115 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::Metadata do
4
+ let(:base_url) { "https://my.dev.test" }
5
+ let(:solr_document) { SolrDocument.new(attributes) }
6
+ let(:fields) do
7
+ metadata_fields.map do |field|
8
+ SampleField.new(
9
+ name: field.first,
10
+ label: Hyrax::Renderers::AttributeRenderer.new(field, nil).label,
11
+ options: field.last
12
+ )
13
+ end
14
+ end
15
+ let(:metadata_fields) do
16
+ {
17
+ title: {},
18
+ description: {},
19
+ date_modified: {}
20
+ }
21
+ end
22
+
23
+ SampleField = Struct.new(:name, :label, :options, keyword_init: true)
24
+
25
+ describe ".build_metadata_for" do
26
+ subject(:manifest_metadata) do
27
+ described_class.build_metadata_for(
28
+ work: solr_document,
29
+ version: version,
30
+ fields: fields,
31
+ current_ability: double(Ability),
32
+ base_url: base_url
33
+ )
34
+ end
35
+
36
+ context "for version 2 of the IIIF spec" do
37
+ let(:version) { 2 }
38
+
39
+ context "with a field that has some plain text" do
40
+ let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
41
+
42
+ it "maps the metadata accordingly" do
43
+ expect(manifest_metadata).to eq [
44
+ { "label" => "Title", "value" => ["My Awesome Title"] }
45
+ ]
46
+ end
47
+ end
48
+
49
+ context "with a field that contains a url string" do
50
+ let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
51
+
52
+ it "creates a link for the url string" do
53
+ expect(manifest_metadata).to eq [
54
+ { "label" => "Description",
55
+ "value" =>
56
+ [
57
+ "A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"
58
+ ] }
59
+ ]
60
+ end
61
+ end
62
+
63
+ context "with a date" do
64
+ let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
65
+
66
+ it "displays it just the date" do
67
+ expect(manifest_metadata).to eq [{ "label" => "Date modified", "value" => ["2011-11-11"] }]
68
+ end
69
+ end
70
+
71
+ context "with a faceted option" do
72
+ let(:metadata_fields) { { creator: { render_as: :faceted } } }
73
+ let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
74
+
75
+ it "adds a link to the faceted search" do
76
+ expect(manifest_metadata). to eq [
77
+ { "label" => "Creator",
78
+ "value" =>
79
+ ["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
80
+ ]
81
+ end
82
+ end
83
+
84
+ context "when the work is apart of a collection" do
85
+ let(:metadata_fields) { { collection: {} } }
86
+ let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
87
+ let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
88
+ let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
89
+
90
+ it "renders a link to the collection" do
91
+ allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
92
+ expect(manifest_metadata).to eq [
93
+ { "label" => "Collection",
94
+ "value" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] }
95
+ ]
96
+ end
97
+ end
98
+ end
99
+
100
+ context "for version 3 of the IIIF spec", skip: "version 3 metadata not implemented yet" do
101
+ let(:version) { 3 }
102
+
103
+ it "maps the metadata accordingly" do
104
+ # NOTE: this assumes the I18n.locale is set as :en
105
+ expect(manifest_metadata).to eq [
106
+ { "label" => { "en" => ["Title"] }, "value" => { "none" => ["My Awesome Title"] } },
107
+ { "label" => { "en" => ["Description"] },
108
+ "value" => { "none" => ["This is and awesome description"] } },
109
+ { "label" => { "en" => ["Date modified"] }, "value" => { "none" => ["2011-11-11"] } },
110
+ { "label" => { "en" => ["Creator"] }, "value" => { "none" => ["McAuthor, Arthur"] } }
111
+ ]
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,6 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::SplitPdfs::PagesIntoImagesService do
5
+ # TODO: add specs
6
+ end
@@ -0,0 +1,49 @@
1
+ require 'json'
2
+ require 'spec_helper'
3
+
4
+ RSpec.describe IiifPrint::TextExtraction::AltoReader do
5
+ let(:fixture_path) do
6
+ File.join(
7
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
8
+ )
9
+ end
10
+
11
+ let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
12
+ let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
13
+ let(:minimal) { File.read(minimal_path) }
14
+
15
+ let(:reader_minimal) { described_class.new(minimal) }
16
+ let(:reader_minimal_path) { described_class.new(minimal_path) }
17
+ let(:reader_ndnp) { described_class.new(ndnp_alto_path) }
18
+
19
+ describe "reads alto" do
20
+ it "loads ALTO source" do
21
+ expect(reader_minimal_path.source).to eq reader_minimal.source
22
+ expect(reader_minimal_path.source.size).to eq 1383
23
+ expect(reader_ndnp.source.size).to eq 1_050_876
24
+ end
25
+
26
+ it "loads document stream" do
27
+ expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
28
+ expect(reader_minimal_path.doc_stream).to respond_to :text
29
+ expect(reader_minimal_path.doc_stream).to respond_to :words
30
+ end
31
+ end
32
+
33
+ describe "outputs text derivative formats" do
34
+ it "outputs plain text" do
35
+ # try simple flat text input
36
+ expect(reader_minimal.text).to eq "This is only a test."
37
+ expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
38
+ # try more complex input
39
+ expect(reader_ndnp.text.size).to eq 30_519
40
+ end
41
+
42
+ it "passes args to WordCoordsBuilder and receives output" do
43
+ parsed = JSON.parse(reader_minimal.json)
44
+ expect(parsed['coords'].length).to be > 1
45
+ parsed = JSON.parse(reader_ndnp.json)
46
+ expect(parsed['coords'].size).to eq 2_125
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ require 'json'
2
+ require 'nokogiri'
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::TextExtraction::HOCRReader do
6
+ let(:fixture_path) do
7
+ File.join(
8
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
9
+ )
10
+ end
11
+
12
+ let(:minimal_path) { File.join(fixture_path, 'ocr_mono_text_hocr.html') }
13
+ let(:minimal) { File.read(minimal_path) }
14
+
15
+ let(:reader_minimal) { described_class.new(minimal) }
16
+ let(:reader_minimal_path) { described_class.new(minimal_path) }
17
+
18
+ describe "reads hOCR" do
19
+ it "loads hOCR either from path or source text" do
20
+ expect(reader_minimal_path.source).to eq reader_minimal.source
21
+ # size here is in Unicode characters, not bytes:
22
+ expect(reader_minimal_path.source.size).to eq 16_590
23
+ end
24
+
25
+ it "loads document stream" do
26
+ expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
27
+ expect(reader_minimal_path.doc_stream).to respond_to :text
28
+ expect(reader_minimal_path.doc_stream).to respond_to :words
29
+ end
30
+ end
31
+
32
+ describe "outputs text derivative formats" do
33
+ it "outputs plain text" do
34
+ plain_text = reader_minimal.text
35
+ expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. "
36
+ expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
37
+ expect(reader_minimal.text.size).to eq 831
38
+ end
39
+
40
+ it "passes args to WordCoordsBuilder and receives output" do
41
+ parsed = JSON.parse(reader_minimal.json)
42
+ expect(parsed['coords'].length).to be > 1
43
+ end
44
+ end
45
+ end