iiif_print 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,237 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::Data::WorkFiles do
5
+ include_context "shared setup"
6
+
7
+ let(:work) { work_with_file }
8
+ let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
9
+ let(:tiff_uri) { 'file://' + File.expand_path(tiff_path) }
10
+
11
+ describe "adapter composition" do
12
+ it "adapts work" do
13
+ adapter = described_class.new(work)
14
+ expect(adapter.work).to be work
15
+ end
16
+
17
+ it "adapts work with 'of' alt constructor" do
18
+ adapter = described_class.of(work)
19
+ expect(adapter.work).to be work
20
+ end
21
+ end
22
+
23
+ describe "path assignment queueing" do
24
+ it "queues assigned file path" do
25
+ adapter = described_class.of(work)
26
+ expect(adapter.assigned).to be_empty
27
+ # assign a valid source path
28
+ adapter.assign(tiff_path)
29
+ expect(adapter.assigned).to include tiff_path
30
+ end
31
+
32
+ it "will fail to assign file in non registered dir" do
33
+ adapter = described_class.new(work)
34
+ # need a non-registered file that exists:
35
+ bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
36
+ expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
37
+ end
38
+
39
+ it "queues a file:/// URI" do
40
+ adapter = described_class.of(work)
41
+ expect(adapter.assigned).to be_empty
42
+ adapter.assign(tiff_uri)
43
+ expect(adapter.assigned).to include tiff_uri
44
+ end
45
+
46
+ it "queues a Pathname, normalized to string" do
47
+ adapter = described_class.of(work)
48
+ expect(adapter.assigned).to be_empty
49
+ adapter.assign(Pathname.new(tiff_path))
50
+ expect(adapter.assigned).to include tiff_path
51
+ end
52
+
53
+ it "unqueues a queued path" do
54
+ adapter = described_class.of(work)
55
+ adapter.assign(tiff_path)
56
+ expect(adapter.assigned).to include tiff_path
57
+ adapter.unassign(tiff_path)
58
+ expect(adapter.assigned).to be_empty
59
+ end
60
+ end
61
+
62
+ describe "hash/mapping-like file enumeration" do
63
+ it "has expected WorkFile in values for work" do
64
+ adapter = described_class.of(work)
65
+ values = adapter.values
66
+ expect(values).to be_an Array
67
+ expect(values.size).to eq 1
68
+ expect(values[0]).to be_an IiifPrint::Data::WorkFile
69
+ expect(values[0].parent).to be adapter
70
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
71
+ expect(values[0].fileset).to eq first_fileset
72
+ expect(values[0].unwrapped).to be_a Hydra::PCDM::File
73
+ end
74
+
75
+ it "has expected fileset keys for work" do
76
+ adapter = described_class.of(work)
77
+ keys = adapter.keys
78
+ expect(keys).to be_an Array
79
+ expect(keys[0]).to be_a String
80
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
81
+ expect(keys[0]).to eq first_fileset.id
82
+ end
83
+
84
+ it "has expected entries for work" do
85
+ adapter = described_class.of(work)
86
+ entries = adapter.entries
87
+ expect(entries).to be_an Array
88
+ expect(entries[0]).to be_an Array
89
+ expect(entries[0].size).to eq 2
90
+ expect(entries[0][0]).to eq adapter.keys[0]
91
+ expect(entries[0][1]).to eq adapter.values[0]
92
+ end
93
+
94
+ it "gets work file by fileset id" do
95
+ adapter = described_class.of(work)
96
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
97
+ fsid = adapter.keys[0]
98
+ expect(fsid).to eq first_fileset.id
99
+ work_file = adapter.get(fsid)
100
+ expect(work_file.unwrapped).to eq first_fileset.original_file
101
+ work_file = adapter[fsid]
102
+ expect(work_file.unwrapped).to eq first_fileset.original_file
103
+ end
104
+
105
+ it "gets work file by work-local filename" do
106
+ adapter = described_class.of(work)
107
+ first_fileset = work.members.detect { |m| m.is_a?(FileSet) }
108
+ name = first_fileset.original_file.original_name
109
+ work_file = adapter.get(name)
110
+ expect(work_file).to eq adapter.get(first_fileset.id)
111
+ end
112
+
113
+ it "verifies inclusion of fileset id key" do
114
+ adapter = described_class.of(work)
115
+ fsid = adapter.keys[0]
116
+ expect(adapter.include?(fsid)).to be true
117
+ end
118
+ end
119
+
120
+ describe "assignment state" do
121
+ it "has empty state for work with no files" do
122
+ bare_work = MyWork.new
123
+ bare_work.title = ['No files to see here']
124
+ bare_work.save!
125
+ adapter = described_class.of(bare_work)
126
+ expect(adapter.keys.empty?).to be true
127
+ expect(adapter.state).to eq 'empty'
128
+ end
129
+
130
+ it "has 'dirty' state when files assigned" do
131
+ adapter = described_class.of(work)
132
+ expect(adapter.state).to eq 'saved'
133
+ adapter.assign(tiff_path)
134
+ # changes to dirty
135
+ expect(adapter.state).to eq 'dirty'
136
+ # unassign path again to empty assigned queue:
137
+ adapter.unassign(tiff_path)
138
+ # no we are back to 'saved' since no changes are queued now:
139
+ expect(adapter.state).to eq 'saved'
140
+ end
141
+ end
142
+
143
+ describe "commits changes" do
144
+ # We need to register these jobs to run now, at minimum:
145
+ do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
146
+ # These we skip: [CharacterizeJob, CreateDerivativesJob]
147
+ # -- skipping these saves 10-15 seconds on attachment example
148
+
149
+ permission_methods = [
150
+ :edit_users,
151
+ :read_users,
152
+ :discover_users,
153
+ :edit_groups,
154
+ :read_groups,
155
+ :discover_groups
156
+ ]
157
+
158
+ let(:bare_work) do
159
+ bare_work = MyWork.new
160
+ bare_work.title = ['No files to see here']
161
+ bare_work.save!
162
+ bare_work
163
+ end
164
+
165
+ it "commits unassign (file deletions)" do
166
+ adapter = described_class.of(work)
167
+ expect(adapter.keys.size).to eq 1
168
+ adapter.unassign(adapter.keys[0])
169
+ adapter.commit!
170
+ expect(adapter.keys.size).to eq 0
171
+ expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 0
172
+ end
173
+
174
+ context "when it is a new work" do
175
+ it "commit for assignment invokes actor stack" do
176
+ work = MyWork.new(title: ['Just a new work'])
177
+ adapter = described_class.of(work)
178
+ adapter.assign(tiff_path)
179
+ allow(Hyrax::CurationConcern.actor).to receive(:create).and_return(true)
180
+ expect(Hyrax::CurationConcern.actor).to receive(:create)
181
+ expect(adapter.commit!).to be true
182
+ end
183
+ end
184
+
185
+ context "when the work already exists" do
186
+ it "commit for assignment invokes actor stack" do
187
+ work = bare_work
188
+ adapter = described_class.of(work)
189
+ adapter.assign(tiff_path)
190
+ allow(Hyrax::CurationConcern.actor).to receive(:update).and_return(true)
191
+ expect(Hyrax::CurationConcern.actor).to receive(:update)
192
+ expect(adapter.commit!).to be true
193
+ end
194
+ end
195
+
196
+ xit "commits successful file attachment", perform_enqueued: do_now_jobs do
197
+ work = bare_work
198
+ adapter = described_class.of(work)
199
+ adapter.assign(tiff_path)
200
+ adapter.commit!
201
+ # registered jobs (do_now_jobs) performed as effect of commit!
202
+ # are configured to effectively run inline. Reloading work
203
+ # should refresh the work.members, and by consequence adapter.keys
204
+ work.reload
205
+ expect(adapter.keys.size).to eq 1
206
+ expect(work.members.to_a.count { |m| m.is_a? FileSet }).to eq 1
207
+ expect(adapter.names).to include 'ocr_gray.tiff'
208
+ end
209
+
210
+ xit "copies work perimssions to fileset", perform_enqueued: do_now_jobs do
211
+ adapter = described_class.of(bare_work)
212
+ adapter.assign(tiff_path)
213
+ adapter.commit!
214
+ bare_work.reload
215
+ fileset = bare_work.members.detect { |m| m.is_a?(FileSet) }
216
+ permission_methods.each do |m|
217
+ expect(fileset.send(m)).to match_array bare_work.send(m)
218
+ end
219
+ expect(fileset.visibility).to eq bare_work.visibility
220
+ end
221
+ end
222
+
223
+ describe "derivative access" do
224
+ it "gets derivatives for first fileset" do
225
+ fileset = work.members.detect { |m| m.is_a?(FileSet) }
226
+ adapter = described_class.of(work)
227
+ # adapts same context(s):
228
+ expect(adapter.derivatives.fileset.id).to eq fileset.id
229
+ expect(adapter.derivatives.work).to be work
230
+ expect(adapter.derivatives.class).to eq \
231
+ IiifPrint::Data::WorkDerivatives
232
+ # transitive parent/child relationship, can traverse to adapter from
233
+ # derivatives:
234
+ expect(adapter.derivatives.parent.parent).to be adapter
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,109 @@
1
+ require 'spec_helper'
2
+ require 'tmpdir'
3
+
4
+ describe IiifPrint::ImageTool do
5
+ let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
6
+
7
+ # Image fixtures to test identification, metadata extraction for:
8
+ let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
9
+ let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
10
+ let(:gray_tiff) { File.join(fixtures, 'ocr_gray.tiff') }
11
+ let(:mono_tiff) { File.join(fixtures, 'ocr_mono.tiff') }
12
+ let(:color_tiff) { File.join(fixtures, '4.1.07.tiff') }
13
+ let(:pdf) { File.join(fixtures, 'minimal-1-page.pdf') }
14
+
15
+ describe "Extracts metadata with JP2 backend" do
16
+ it "constructs with a path" do
17
+ identify = described_class.new(gray_jp2)
18
+ expect(identify.path).to eq gray_jp2
19
+ end
20
+
21
+ it "gets metadata for grayscale JP2 image" do
22
+ result = described_class.new(gray_jp2).metadata
23
+ expect(result[:color]).to eq 'gray'
24
+ expect(result[:width]).to eq 418
25
+ expect(result[:height]).to eq 1046
26
+ expect(result[:bits_per_component]).to eq 8
27
+ expect(result[:num_components]).to eq 1
28
+ end
29
+
30
+ it "gets metadata for color JP2 image" do
31
+ result = described_class.new(color_jp2).metadata
32
+ expect(result[:color]).to eq 'color'
33
+ expect(result[:width]).to eq 256
34
+ expect(result[:height]).to eq 256
35
+ expect(result[:bits_per_component]).to eq 8
36
+ # e.g. is 3, but would be four if sample image had an alpha channel
37
+ expect(result[:num_components]).to eq 3
38
+ end
39
+ end
40
+
41
+ describe "Extracts metadata for non-JP2 images with imagemagick" do
42
+ it "gets metadata for gray TIFF image" do
43
+ result = described_class.new(gray_tiff).metadata
44
+ expect(result[:color]).to eq 'gray'
45
+ expect(result[:width]).to eq 418
46
+ expect(result[:height]).to eq 1046
47
+ expect(result[:bits_per_component]).to eq 8
48
+ expect(result[:num_components]).to eq 1
49
+ end
50
+
51
+ it "gets metadata for monochrome TIFF image" do
52
+ result = described_class.new(mono_tiff).metadata
53
+ expect(result[:color]).to eq 'monochrome'
54
+ expect(result[:width]).to eq 1261
55
+ expect(result[:height]).to eq 1744
56
+ expect(result[:bits_per_component]).to eq 1
57
+ expect(result[:num_components]).to eq 1
58
+ end
59
+
60
+ it "gets metadata for color TIFF image" do
61
+ result = described_class.new(color_tiff).metadata
62
+ expect(result[:color]).to eq 'color'
63
+ expect(result[:width]).to eq 256
64
+ expect(result[:height]).to eq 256
65
+ expect(result[:bits_per_component]).to eq 8
66
+ # e.g. is 3, but would be four if sample image had an alpha channel
67
+ expect(result[:num_components]).to eq 3
68
+ end
69
+
70
+ it "detects mime type of pdf" do
71
+ result = described_class.new(pdf).metadata
72
+ expect(result[:content_type]).to eq 'application/pdf'
73
+ end
74
+ end
75
+
76
+ describe "converts images" do
77
+ it "makes a monochrome TIFF from JP2" do
78
+ tool = described_class.new(gray_jp2)
79
+ dest = File.join(Dir.mktmpdir, 'mono.tif')
80
+ tool.convert(dest, true)
81
+ expect(File.exist?(dest)).to be true
82
+ expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
83
+ end
84
+
85
+ it "makes a gray TIFF from JP2" do
86
+ tool = described_class.new(gray_jp2)
87
+ dest = File.join(Dir.mktmpdir, 'gray.tif')
88
+ tool.convert(dest, false)
89
+ expect(File.exist?(dest)).to be true
90
+ expect(described_class.new(dest).metadata[:color]).to eq 'gray'
91
+ end
92
+
93
+ it "makes a monochrome TIFF from grayscale TIFF" do
94
+ tool = described_class.new(gray_tiff)
95
+ dest = File.join(Dir.mktmpdir, 'mono.tif')
96
+ tool.convert(dest, true)
97
+ expect(File.exist?(dest)).to be true
98
+ expect(described_class.new(dest).metadata[:color]).to eq 'monochrome'
99
+ end
100
+
101
+ # Not yet supported to use this tool to make JP2, for now the only
102
+ # component in IiifPrint doing that is
103
+ # IiifPrint::JP2DerivativeService
104
+ it "raises error on JP2 destination" do
105
+ expect { described_class.new(gray_tiff).convert('out.jp2') }.to \
106
+ raise_error(RuntimeError)
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::Jobs::ChildWorksFromPdfJob do
5
+ # TODO: add specs
6
+ let(:work) { WorkWithIiifPrintConfig.new(title: ['required title']) }
7
+ let(:my_user) { build(:user) }
8
+ let(:uploaded_pdf_file) { create(:uploaded_pdf_file) }
9
+ let(:uploaded_file_ids) { [uploaded_pdf_file.id] }
10
+ let(:pdf_paths) do
11
+ uploads = Hyrax::UploadedFile.find(uploaded_file_ids)
12
+ upload_paths = uploads.map { |upload| upload.file.file.file }
13
+ upload_paths.select { |path| path.end_with?('.pdf', '.PDF') }
14
+ end
15
+ let(:admin_set_id) { "admin_set/default" }
16
+ let(:prior_pdfs) { 0 }
17
+
18
+ let(:subject) { described_class.perform(work, paths, user, admin_set_id, prior_pdfs) }
19
+
20
+ describe '#perform' do
21
+ xit 'calls pdf splitter service with path' do
22
+ end
23
+
24
+ xit 'submits one BatchCreateJob per PDF' do
25
+ end
26
+
27
+ xit 'submits IiifPrint::Jobs::CreateRelationshipsJob' do
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::Jobs::CreateRelationshipsJob do
5
+ # TODO: add specs
6
+ let(:parent) { WorkWithIiifPrintConfig.new(title: ['required title']) }
7
+ let(:my_user) { build(:user) }
8
+ let(:parent_model) { WorkWithIiifPrintConfig }
9
+ let(:child_model) { WorkWithIiifPrintConfig }
10
+
11
+ let(:subject) { described_class.perform(user: my_user, parent_id: parent.id, parent_model: parent_model, child_model: child_model) }
12
+
13
+ describe '#perform' do
14
+ xit 'loads all child work ids into ordered_members' do
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ describe IiifPrint::JP2ImageMetadata do
4
+ let(:fixtures) { File.join(IiifPrint::GEM_PATH, 'spec/fixtures/files') }
5
+
6
+ let(:gray_jp2) { File.join(fixtures, 'ocr_gray.jp2') }
7
+
8
+ let(:color_jp2) { File.join(fixtures, '4.1.07.jp2') }
9
+
10
+ describe "Extracts technical metadata from a JP2 file" do
11
+ it "constructs with a path" do
12
+ meta = described_class.new(gray_jp2)
13
+ expect(meta.path).to eq gray_jp2
14
+ end
15
+
16
+ it "gets metadata for grayscale image" do
17
+ meta = described_class.new(gray_jp2)
18
+ result = meta.technical_metadata
19
+ expect(result[:color]).to eq 'gray'
20
+ expect(result[:width]).to eq 418
21
+ expect(result[:height]).to eq 1046
22
+ expect(result[:bits_per_component]).to eq 8
23
+ expect(result[:num_components]).to eq 1
24
+ end
25
+
26
+ it "gets metadata for color image" do
27
+ meta = described_class.new(color_jp2)
28
+ result = meta.technical_metadata
29
+ expect(result[:color]).to eq 'color'
30
+ expect(result[:width]).to eq 256
31
+ expect(result[:height]).to eq 256
32
+ expect(result[:bits_per_component]).to eq 8
33
+ # e.g. is 3, but would be four if sample image had an alpha channel
34
+ expect(result[:num_components]).to eq 3
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::LineageService do
6
+ describe '.ancestor_ids_for' do
7
+ xit 'works'
8
+ end
9
+
10
+ describe '.descendent_file_set_ids_for' do
11
+ xit 'works'
12
+ end
13
+ end
@@ -0,0 +1,115 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::Metadata do
4
+ let(:base_url) { "https://my.dev.test" }
5
+ let(:solr_document) { SolrDocument.new(attributes) }
6
+ let(:fields) do
7
+ metadata_fields.map do |field|
8
+ SampleField.new(
9
+ name: field.first,
10
+ label: Hyrax::Renderers::AttributeRenderer.new(field, nil).label,
11
+ options: field.last
12
+ )
13
+ end
14
+ end
15
+ let(:metadata_fields) do
16
+ {
17
+ title: {},
18
+ description: {},
19
+ date_modified: {}
20
+ }
21
+ end
22
+
23
+ SampleField = Struct.new(:name, :label, :options, keyword_init: true)
24
+
25
+ describe ".build_metadata_for" do
26
+ subject(:manifest_metadata) do
27
+ described_class.build_metadata_for(
28
+ work: solr_document,
29
+ version: version,
30
+ fields: fields,
31
+ current_ability: double(Ability),
32
+ base_url: base_url
33
+ )
34
+ end
35
+
36
+ context "for version 2 of the IIIF spec" do
37
+ let(:version) { 2 }
38
+
39
+ context "with a field that has some plain text" do
40
+ let(:attributes) { { "title_tesim" => ["My Awesome Title"] } }
41
+
42
+ it "maps the metadata accordingly" do
43
+ expect(manifest_metadata).to eq [
44
+ { "label" => "Title", "value" => ["My Awesome Title"] }
45
+ ]
46
+ end
47
+ end
48
+
49
+ context "with a field that contains a url string" do
50
+ let(:attributes) { { "description_tesim" => ["A url like https://www.example.com/, cool!"] } }
51
+
52
+ it "creates a link for the url string" do
53
+ expect(manifest_metadata).to eq [
54
+ { "label" => "Description",
55
+ "value" =>
56
+ [
57
+ "A url like <a href='https://www.example.com/' target='_blank'>https://www.example.com/</a>, cool!"
58
+ ] }
59
+ ]
60
+ end
61
+ end
62
+
63
+ context "with a date" do
64
+ let(:attributes) { { "date_modified_dtsi" => "2011-11-11T11:11:11Z" } }
65
+
66
+ it "displays it just the date" do
67
+ expect(manifest_metadata).to eq [{ "label" => "Date modified", "value" => ["2011-11-11"] }]
68
+ end
69
+ end
70
+
71
+ context "with a faceted option" do
72
+ let(:metadata_fields) { { creator: { render_as: :faceted } } }
73
+ let(:attributes) { { "creator_tesim" => ["McAuthor, Arthur"] } }
74
+
75
+ it "adds a link to the faceted search" do
76
+ expect(manifest_metadata). to eq [
77
+ { "label" => "Creator",
78
+ "value" =>
79
+ ["<a href='#{base_url}/catalog?f%5Bcreator_sim%5D%5B%5D=McAuthor%2C+Arthur&locale=en'>McAuthor, Arthur</a>"] }
80
+ ]
81
+ end
82
+ end
83
+
84
+ context "when the work is apart of a collection" do
85
+ let(:metadata_fields) { { collection: {} } }
86
+ let(:collection_attributes) { { "id" => "321cba", "title_tesim" => ["My Cool Collection"] } }
87
+ let(:collection_solr_doc) { SolrDocument.new(collection_attributes) }
88
+ let(:attributes) { { "member_of_collection_ids_ssim" => "321cba" } }
89
+
90
+ it "renders a link to the collection" do
91
+ allow(Hyrax::CollectionMemberService).to receive(:run).and_return([collection_solr_doc])
92
+ expect(manifest_metadata).to eq [
93
+ { "label" => "Collection",
94
+ "value" => ["<a href='#{base_url}/collections/321cba'>My Cool Collection</a>"] }
95
+ ]
96
+ end
97
+ end
98
+ end
99
+
100
+ context "for version 3 of the IIIF spec", skip: "version 3 metadata not implemented yet" do
101
+ let(:version) { 3 }
102
+
103
+ it "maps the metadata accordingly" do
104
+ # NOTE: this assumes the I18n.locale is set as :en
105
+ expect(manifest_metadata).to eq [
106
+ { "label" => { "en" => ["Title"] }, "value" => { "none" => ["My Awesome Title"] } },
107
+ { "label" => { "en" => ["Description"] },
108
+ "value" => { "none" => ["This is and awesome description"] } },
109
+ { "label" => { "en" => ["Date modified"] }, "value" => { "none" => ["2011-11-11"] } },
110
+ { "label" => { "en" => ["Creator"] }, "value" => { "none" => ["McAuthor, Arthur"] } }
111
+ ]
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,6 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe IiifPrint::SplitPdfs::PagesIntoImagesService do
5
+ # TODO: add specs
6
+ end
@@ -0,0 +1,49 @@
1
+ require 'json'
2
+ require 'spec_helper'
3
+
4
+ RSpec.describe IiifPrint::TextExtraction::AltoReader do
5
+ let(:fixture_path) do
6
+ File.join(
7
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
8
+ )
9
+ end
10
+
11
+ let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
12
+ let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
13
+ let(:minimal) { File.read(minimal_path) }
14
+
15
+ let(:reader_minimal) { described_class.new(minimal) }
16
+ let(:reader_minimal_path) { described_class.new(minimal_path) }
17
+ let(:reader_ndnp) { described_class.new(ndnp_alto_path) }
18
+
19
+ describe "reads alto" do
20
+ it "loads ALTO source" do
21
+ expect(reader_minimal_path.source).to eq reader_minimal.source
22
+ expect(reader_minimal_path.source.size).to eq 1383
23
+ expect(reader_ndnp.source.size).to eq 1_050_876
24
+ end
25
+
26
+ it "loads document stream" do
27
+ expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
28
+ expect(reader_minimal_path.doc_stream).to respond_to :text
29
+ expect(reader_minimal_path.doc_stream).to respond_to :words
30
+ end
31
+ end
32
+
33
+ describe "outputs text derivative formats" do
34
+ it "outputs plain text" do
35
+ # try simple flat text input
36
+ expect(reader_minimal.text).to eq "This is only a test."
37
+ expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
38
+ # try more complex input
39
+ expect(reader_ndnp.text.size).to eq 30_519
40
+ end
41
+
42
+ it "passes args to WordCoordsBuilder and receives output" do
43
+ parsed = JSON.parse(reader_minimal.json)
44
+ expect(parsed['coords'].length).to be > 1
45
+ parsed = JSON.parse(reader_ndnp.json)
46
+ expect(parsed['coords'].size).to eq 2_125
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,45 @@
1
+ require 'json'
2
+ require 'nokogiri'
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::TextExtraction::HOCRReader do
6
+ let(:fixture_path) do
7
+ File.join(
8
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
9
+ )
10
+ end
11
+
12
+ let(:minimal_path) { File.join(fixture_path, 'ocr_mono_text_hocr.html') }
13
+ let(:minimal) { File.read(minimal_path) }
14
+
15
+ let(:reader_minimal) { described_class.new(minimal) }
16
+ let(:reader_minimal_path) { described_class.new(minimal_path) }
17
+
18
+ describe "reads hOCR" do
19
+ it "loads hOCR either from path or source text" do
20
+ expect(reader_minimal_path.source).to eq reader_minimal.source
21
+ # size here is in Unicode characters, not bytes:
22
+ expect(reader_minimal_path.source.size).to eq 16_590
23
+ end
24
+
25
+ it "loads document stream" do
26
+ expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
27
+ expect(reader_minimal_path.doc_stream).to respond_to :text
28
+ expect(reader_minimal_path.doc_stream).to respond_to :words
29
+ end
30
+ end
31
+
32
+ describe "outputs text derivative formats" do
33
+ it "outputs plain text" do
34
+ plain_text = reader_minimal.text
35
+ expect(plain_text.slice(0, 40)).to eq "_A FEARFUL ADVENTURE.\n‘The Missouri. "
36
+ expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
37
+ expect(reader_minimal.text.size).to eq 831
38
+ end
39
+
40
+ it "passes args to WordCoordsBuilder and receives output" do
41
+ parsed = JSON.parse(reader_minimal.json)
42
+ expect(parsed['coords'].length).to be > 1
43
+ end
44
+ end
45
+ end