iiif_print 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,178 @@
1
+ require 'fileutils'
2
+ require 'spec_helper'
3
+
4
+ RSpec.describe IiifPrint::PluggableDerivativeService do
5
+ let(:persisted_file_set) do
6
+ fs = FileSet.new
7
+ work.title = ['This is a page!']
8
+ work.members.push(fs)
9
+ fs.instance_variable_set(:@mime_type, 'image/tiff')
10
+ fs.save!(validate: false)
11
+ work.save!(validate: false)
12
+ fs
13
+ end
14
+
15
+ let(:fixture_path) do
16
+ File.join(
17
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
18
+ )
19
+ end
20
+
21
+ describe "service registration" do
22
+ # integration test with Hyrax, verify services is registered
23
+
24
+ it "is registered with Hyrax" do
25
+ expect(Hyrax::DerivativeService.services).to include described_class
26
+ end
27
+
28
+ it "is the first valid service found" do
29
+ file_set = double(FileSet,
30
+ class: FileSet,
31
+ mime_type: 'application/pdf',
32
+ parent: MyIiifConfiguredWorkWithAllDerivativeServices.new)
33
+ found = Hyrax::DerivativeService.for(file_set)
34
+ expect(found).to be_a described_class
35
+ end
36
+ end
37
+
38
+ context "when the FileSet's parent is not IiifPrint configured" do
39
+ before do
40
+ allow(persisted_file_set).to receive(:in_works).and_return([work])
41
+ end
42
+
43
+ let(:work) { MyWork.new }
44
+
45
+ describe "#plugins" do
46
+ it "uses the default derivatives service" do
47
+ file_set = double(FileSet,
48
+ class: FileSet,
49
+ mime_type: 'application/pdf',
50
+ parent: MyWork.new)
51
+ service = described_class.new(file_set)
52
+ expect(service.plugins).to eq [Hyrax::FileSetDerivativesService]
53
+ end
54
+ end
55
+ end
56
+
57
+ context "when the FileSet's parent is IiifPrint configured" do
58
+ describe "calls the configured derivative plugins" do
59
+ before do
60
+ allow(persisted_file_set).to receive(:in_works).and_return([work])
61
+ allow_any_instance_of(Hyrax::FileSetDerivativesService).to receive(:send)
62
+ end
63
+
64
+ let(:work) { MyIiifConfiguredWork.new }
65
+ let(:plugin) { FakeDerivativeService.new }
66
+
67
+ it "calls each plugin on create" do
68
+ service = described_class.new(persisted_file_set, plugins: [plugin])
69
+ expect do
70
+ service.create_derivatives('not_a_real_filename')
71
+ end.to change(plugin, :create_called).by(1)
72
+ end
73
+
74
+ def touch_fake_derivative_file(file_set, ext)
75
+ path = Hyrax::DerivativePath.derivative_path_for_reference(file_set, ext)
76
+ FileUtils.mkdir_p(File.join(path.split('/')[0..-2]))
77
+ FileUtils.touch(path)
78
+ end
79
+
80
+ it "does not re-create existing derivative" do
81
+ service = described_class.new(persisted_file_set, plugins: [plugin])
82
+ expect(persisted_file_set.id).not_to be_nil
83
+ expect do
84
+ touch_fake_derivative_file(persisted_file_set, plugin.target_extension)
85
+ service.create_derivatives('/nonsense/source/path/ignored ')
86
+ end.not_to change(plugin, :create_called)
87
+ end
88
+
89
+ it "calls each plugin on cleanup" do
90
+ service = described_class.new(persisted_file_set, plugins: [plugin])
91
+ expect { service.cleanup_derivatives }.to change(plugin, :cleanup_called).by(1)
92
+ end
93
+ end
94
+
95
+ context "integration tests for plugins" do
96
+ before do
97
+ allow(persisted_file_set).to receive(:in_works).and_return([work])
98
+ end
99
+
100
+ let(:work) { MyIiifConfiguredWorkWithAllDerivativeServices.new }
101
+
102
+ describe "calls all derivative plugins" do
103
+ def source_image(name)
104
+ File.join(fixture_path, name)
105
+ end
106
+
107
+ def derivatives_for(file_set)
108
+ Hyrax::DerivativePath.derivatives_for_reference(file_set)
109
+ end
110
+
111
+ def expected_plugins
112
+ [
113
+ Hyrax::FileSetDerivativesService,
114
+ IiifPrint::JP2DerivativeService,
115
+ IiifPrint::PDFDerivativeService,
116
+ IiifPrint::TextExtractionDerivativeService,
117
+ IiifPrint::TIFFDerivativeService
118
+ ]
119
+ end
120
+
121
+ # The expected set of Plugins that will run for file set
122
+ it "has expected valid plugins configured" do
123
+ plugins = described_class.new(persisted_file_set).plugins
124
+ fs = persisted_file_set
125
+ services = plugins.map { |plugin| plugin.new(fs) }.select(&:valid?)
126
+ expect(services.length).to eq 5
127
+ used_plugins = services.map(&:class)
128
+ expected_plugins.each do |plugin|
129
+ expect(used_plugins).to include plugin
130
+ end
131
+ end
132
+
133
+ it "creates expected derivatives from TIFF source" do
134
+ svc = described_class.new(persisted_file_set)
135
+ svc.create_derivatives(source_image('4.1.07.tiff'))
136
+ made = derivatives_for(persisted_file_set)
137
+ made.each { |path| expect(File.exist?(path)) }
138
+ extensions = made.map { |path| path.split('.')[-1] }
139
+ expect(extensions).to include 'pdf'
140
+ expect(extensions).to include 'jp2'
141
+ expect(extensions).not_to include 'tiff'
142
+ # Thumbnail, created by Hyrax:
143
+ expect(extensions).to include 'jpeg'
144
+ end
145
+ end
146
+
147
+ describe "ingest integration" do
148
+ def log_attachment(file_set)
149
+ # create a log entry for the fileset given destination name 'jp2'
150
+ IiifPrint::DerivativeAttachment.create(
151
+ fileset_id: file_set.id,
152
+ path: '/some/arbitrary/path/to.jp2',
153
+ destination_name: 'jp2'
154
+ )
155
+ end
156
+
157
+ def jp2_plugin?(plugins)
158
+ r = plugins.select { |p| p.is_a? IiifPrint::JP2DerivativeService }
159
+ !r.empty?
160
+ end
161
+
162
+ it "will not attempt creating over pre-made derivative" do
163
+ service = described_class.new(persisted_file_set)
164
+ # this should be respected, evaluate by obtaining filtered
165
+ # services list, which must omit JP2DerivativeService
166
+ plugins = service.services(:create_derivatives)
167
+ # initially has jp2 plugin
168
+ expect(jp2_plugin?(plugins)).to be true
169
+ # blacklist jp2 by effect of log entry of pre-made attachment
170
+ log_attachment(service.file_set)
171
+ # omits, after logging intent of previous attachment:
172
+ plugins = service.services(:create_derivatives)
173
+ expect(jp2_plugin?(plugins)).to be false
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,82 @@
1
+ require 'nokogiri'
2
+ require 'spec_helper'
3
+ require 'misc_shared'
4
+
5
+ RSpec.describe IiifPrint::TextExtractionDerivativeService do
6
+ include_context "shared setup"
7
+
8
+ let(:valid_file_set) do
9
+ file_set = FileSet.new
10
+ file_set.save!(validate: false)
11
+ file_set
12
+ end
13
+
14
+ let(:work) do
15
+ work = NewspaperPage.create(title: ["Hello"])
16
+ work.members << valid_file_set
17
+ work.save!
18
+ end
19
+
20
+ let(:minimal_alto) do
21
+ File.join(fixture_path, 'minimal-alto.xml')
22
+ end
23
+
24
+ let(:altoxsd) do
25
+ xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
26
+ Nokogiri::XML::Schema(File.read(xsdpath))
27
+ end
28
+
29
+ describe "Creates ALTO derivative" do
30
+ def source_image(name)
31
+ File.join(fixture_path, name)
32
+ end
33
+
34
+ def expected_path(file_set, ext)
35
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, ext)
36
+ end
37
+
38
+ def validate_alto(filename)
39
+ altoxsd.validate(filename)
40
+ end
41
+
42
+ def derivative_exists(ext)
43
+ path = expected_path(valid_file_set, ext)
44
+ expect(File.exist?(path)).to be true
45
+ expect(File.size(path)).to be > 0
46
+ end
47
+
48
+ xit "creates, stores valid ALTO and plain-text derivatives" do
49
+ # these are in same test to avoid duplicate OCR operation
50
+ service = described_class.new(valid_file_set)
51
+ service.create_derivatives(source_image('ocr_mono.tiff'))
52
+ # ALTO derivative file exists at expected path and validates:
53
+ altoxsd.validate(expected_path(valid_file_set, 'xml'))
54
+ # Plain text exists as non-empty file:
55
+ derivative_exists('txt')
56
+ derivative_exists('json')
57
+ json_path = expected_path(valid_file_set, 'json')
58
+ loaded_result = JSON.parse(File.read(json_path))
59
+ expect(loaded_result['coords'].length).to be > 1
60
+ end
61
+
62
+ xit "usually uses OCR, when no existing text" do
63
+ service = described_class.new(valid_file_set)
64
+ # here, service will delegate create_derivatives to OCR impl method:
65
+ expect(service).to receive(:create_derivatives_from_ocr)
66
+ service.create_derivatives(source_image('ocr_mono.tiff'))
67
+ end
68
+
69
+ xit "defers to existing ALTO sources, when present" do
70
+ # Attach some ALTO to a work
71
+ derivatives = IiifPrint::Data::WorkDerivatives.of(
72
+ work,
73
+ valid_file_set
74
+ )
75
+ derivatives.attach(minimal_alto, 'xml')
76
+ # In this case, service will not call the OCR implementation method:
77
+ service = described_class.new(valid_file_set)
78
+ expect(service).not_to receive(:create_derivatives_from_ocr)
79
+ service.create_derivatives(source_image('ocr_mono.tiff'))
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,127 @@
1
+ require 'nokogiri'
2
+ require 'spec_helper'
3
+ require 'misc_shared'
4
+
5
+ RSpec.describe IiifPrint::TextFormatsFromALTOService do
6
+ include_context "shared setup"
7
+
8
+ let(:valid_file_set) do
9
+ file_set = FileSet.new
10
+ file_set.save!(validate: false)
11
+ file_set
12
+ end
13
+
14
+ let(:work) do
15
+ work = NewspaperPage.create(title: ["Hello"])
16
+ work.members << valid_file_set
17
+ work.save!
18
+ work
19
+ end
20
+
21
+ let(:minimal_alto) do
22
+ File.join(fixture_path, 'minimal-alto.xml')
23
+ end
24
+
25
+ def log_incoming_attachment(fsid)
26
+ IiifPrint::DerivativeAttachment.create!(
27
+ fileset_id: fsid,
28
+ path: minimal_alto,
29
+ destination_name: 'xml'
30
+ )
31
+ end
32
+
33
+ def derivatives_of(work, fileset)
34
+ IiifPrint::Data::WorkDerivatives.of(work, fileset)
35
+ end
36
+
37
+ describe "Saves other formats from ALTO" do
38
+ xit "saves JSON, text from existing ALTO derivative" do
39
+ derivatives = derivatives_of(work, valid_file_set)
40
+ expect(derivatives.keys.size).to eq 0
41
+ derivatives.attach(minimal_alto, 'xml')
42
+ expect(derivatives.keys.size).to eq 1
43
+ service = described_class.new(valid_file_set)
44
+ service.create_derivatives('/some/random/primary/path/does_not/matter')
45
+ derivatives.load_paths
46
+ expect(derivatives.keys.size).to eq 3
47
+ expect(derivatives.keys).to include 'json', 'txt'
48
+ end
49
+
50
+ xit "saves JSON, text from incoming ALTO derivative" do
51
+ derivatives = derivatives_of(work, valid_file_set)
52
+ expect(derivatives.keys.size).to eq 0
53
+ log_incoming_attachment(valid_file_set.id)
54
+ service = described_class.new(valid_file_set)
55
+ service.create_derivatives('/some/random/primary/path/does_not/matter')
56
+ # reload keys to check derivatives:
57
+ derivatives.load_paths
58
+ expect(derivatives.keys).to include 'json', 'txt'
59
+ end
60
+ end
61
+
62
+ describe "scaling matters" do
63
+ # we need an ingested, characterized file:
64
+ do_now_jobs = [
65
+ IngestLocalFileJob,
66
+ IngestJob,
67
+ InheritPermissionsJob,
68
+ CharacterizeJob
69
+ ]
70
+ # we omit CreateDerivativesJob from above, as obviously duplicative and
71
+ # therefore potential cause of problems here.
72
+
73
+ # remove any previous test run (development) artifacts in file
74
+ # attachment logging tables
75
+ before(:all) do
76
+ IiifPrint::DerivativeAttachment.all.delete_all
77
+ IiifPrint::IngestFileRelation.all.delete_all
78
+ end
79
+
80
+ let(:work) do
81
+ work = NewspaperPage.create(title: ["Hello"])
82
+ work
83
+ end
84
+
85
+ let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
86
+ let(:ocr_alto_path) do
87
+ File.join(fixture_path, 'ocr_alto_scaled_4pts_per_px.xml')
88
+ end
89
+
90
+ def attach_primary_file(work)
91
+ IiifPrint::Data::WorkFiles.assign!(to: work, path: tiff_path)
92
+ work.reload
93
+ pcdm_file = IiifPrint::Data::WorkFiles.of(work).values[0].unwrapped
94
+ expect(pcdm_file).not_to be_nil
95
+ # we have image dimensions (px) to work with:
96
+ expect(pcdm_file.width[0].to_i).to be_an Integer
97
+ expect(pcdm_file.height[0].to_i).to be_an Integer
98
+ end
99
+
100
+ def derivatives_of(work)
101
+ IiifPrint::Data::WorkFiles.of(work).derivatives
102
+ end
103
+
104
+ def attach_alto(work)
105
+ derivatives = derivatives_of(work)
106
+ derivatives.attach(ocr_alto_path, 'xml')
107
+ # has a path to now-stored derivative:
108
+ expect(derivatives.path('xml')).not_to be_nil
109
+ end
110
+
111
+ xit "scales ALTO points to original image", perform_enqueued: do_now_jobs do
112
+ attach_primary_file(work)
113
+ attach_alto(work)
114
+ work.reload
115
+ file_set = work.ordered_members.to_a.find { |m| m.is_a? FileSet }
116
+ service = described_class.new(file_set)
117
+ service.create_derivatives('/a/path/here/needed/but/will/not/matter')
118
+ coords = JSON.parse(derivatives_of(work).data('json'))
119
+ word = coords['coords'].select { |k, _v| k == 'Bethesda' }
120
+ # test against known scaled coordinate of OCR data:
121
+ # This roughly matches unscaled ALTO data for token 'Bethesda'
122
+ # in spec/fixtures/files/ocr_alto.xml, with the disclaimer that
123
+ # round-trip rounding error of 1px is noted for VPOS.
124
+ expect(word['Bethesda']).to eq [[16, 665, 78, 16]]
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+ RSpec.describe IiifPrint::TIFFDerivativeService do
3
+ let(:valid_file_set) do
4
+ file_set = FileSet.new
5
+ file_set.save!(validate: false)
6
+ file_set
7
+ end
8
+
9
+ let(:fixture_path) do
10
+ File.join(
11
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
12
+ )
13
+ end
14
+
15
+ describe "Creates TIFF derivatives" do
16
+ def source_image(name)
17
+ File.join(fixture_path, name)
18
+ end
19
+
20
+ def expected_path(file_set)
21
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'tiff')
22
+ end
23
+
24
+ def get_res(path)
25
+ tool = IiifPrint::ImageTool.new(path)
26
+ "#{tool.metadata[:width]}x#{tool.metadata[:height]}"
27
+ end
28
+
29
+ def check_dpi_match(orig, dest)
30
+ # check ppi, but skip pdf to avoid ghostscript warnings to stderr
31
+ expect(get_res(orig)).to eq get_res(dest) unless orig.end_with?('pdf')
32
+ end
33
+
34
+ def makes_tiff(filename)
35
+ path = source_image(filename)
36
+ expected = expected_path(valid_file_set)
37
+ expect(File.exist?(expected)).to be false
38
+ svc = described_class.new(valid_file_set)
39
+ svc.create_derivatives(path)
40
+ expect(File.exist?(expected)).to be true
41
+ mime = IiifPrint::ImageTool.new(expected).metadata[:content_type]
42
+ expect(mime).to eq 'image/tiff'
43
+ check_dpi_match(path, expected)
44
+ svc.cleanup_derivatives
45
+ end
46
+
47
+ # for cases where primary file is TIFF already
48
+ def avoids_duplicative_creation(filename)
49
+ expected = expected_path(valid_file_set)
50
+ expect(File.exist?(expected)).to be false
51
+ svc = described_class.new(valid_file_set)
52
+ svc.create_derivatives(source_image(filename))
53
+ expect(File.exist?(expected)).not_to be true
54
+ end
55
+
56
+ it "Does not make TIFF derivatives when primary is TIFF" do
57
+ avoids_duplicative_creation('ocr_mono.tiff')
58
+ avoids_duplicative_creation('ocr_gray.tiff')
59
+ end
60
+
61
+ it "creates TIFF from PDF source, robust to multi-page" do
62
+ makes_tiff('sample-color-newsletter.pdf')
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,181 @@
1
+ require 'json'
2
+
3
+ # testing environent:
4
+ ENV['RAILS_ENV'] ||= 'test'
5
+
6
+ require 'coveralls'
7
+ Coveralls.wear!
8
+
9
+ require 'shoulda/matchers'
10
+ Shoulda::Matchers.configure do |config|
11
+ config.integrate do |with|
12
+ with.test_framework :rspec
13
+ end
14
+ end
15
+
16
+ # engine_cart:
17
+ require 'bundler/setup'
18
+ require 'engine_cart'
19
+ EngineCart.load_application!
20
+
21
+ require 'rspec/rails'
22
+ require 'support/iiif_print_models'
23
+ require 'support/controller_level_helpers'
24
+ require 'rspec/active_model/mocks'
25
+
26
+ ActiveJob::Base.queue_adapter = :test
27
+
28
+ RSpec.configure do |config|
29
+ # enable FactoryBot:
30
+ require 'factory_bot'
31
+ config.include FactoryBot::Syntax::Methods
32
+ # auto-detect and load all factories in spec/factories:
33
+ FactoryBot.find_definitions
34
+
35
+ config.infer_spec_type_from_file_location!
36
+
37
+ # Transactional
38
+ config.use_transactional_fixtures = false
39
+ config.include Devise::Test::ControllerHelpers, type: :controller
40
+
41
+ # ensure Hyrax has active sipity workflow for default admin set:
42
+ config.before(:suite) do
43
+ require 'active_fedora/cleaner'
44
+ require 'database_cleaner'
45
+
46
+ # By default, Hyrax uses a database minter class. That's the preferred pathway (because you are
47
+ # tracking minting state in the database). However, for testing purposes we don't need to / nor
48
+ # want to install the minter migrations. Hence we're favoring this approach.
49
+ minter_class = ::Noid::Rails::Minter::File
50
+ ::Noid::Rails.config.minter_class = minter_class
51
+ Hyrax.config.noid_minter_class = minter_class
52
+
53
+ ActiveFedora::Cleaner.clean!
54
+ DatabaseCleaner.clean_with(:truncation)
55
+
56
+ begin
57
+ # TODO: switch the below methods to use the appropriate services
58
+ # rather than the deprecated methods currently being used.
59
+ # ensure permission template actually exists in RDBMS:
60
+ id = 'admin_set/default'
61
+ no_template = Hyrax::PermissionTemplate.find_by(source_id: id).nil?
62
+ Hyrax::PermissionTemplate.create!(source_id: id) if no_template
63
+ # ensure workflows exist, presumes permission template does first:
64
+ Hyrax::Workflow::WorkflowImporter.load_workflows
65
+ # Default admin set needs to exist in Fedora, with relation to its
66
+ # PermissionTemplate object:
67
+ begin
68
+ admin_set = AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
69
+ admin_set.save!
70
+ rescue ActiveRecord::RecordNotUnique
71
+ admin_set = AdminSet.find(AdminSet::DEFAULT_ID)
72
+ end
73
+ permission_template = admin_set.permission_template
74
+ workflow = permission_template.available_workflows.where(
75
+ name: 'default'
76
+ ).first
77
+ Sipity::Workflow.activate!(
78
+ permission_template: permission_template,
79
+ workflow_id: workflow.id
80
+ )
81
+ rescue Faraday::ConnectionFailed
82
+ STDERR.puts "Attempting to run test suite without Fedora and/or Solr..."
83
+ end
84
+ end
85
+
86
+ # :perform_enqueued config setting below copied from Hyrax spec_helper.rb
87
+ config.before(:example, :perform_enqueued) do |example|
88
+ ActiveJob::Base.queue_adapter.filter = example.metadata[:perform_enqueued].try(:to_a)
89
+ ActiveJob::Base.queue_adapter.perform_enqueued_jobs = true
90
+ ActiveJob::Base.queue_adapter.perform_enqueued_at_jobs = true
91
+ end
92
+ config.after(:example, :perform_enqueued) do
93
+ ActiveJob::Base.queue_adapter.filter = nil
94
+ ActiveJob::Base.queue_adapter.enqueued_jobs = []
95
+ ActiveJob::Base.queue_adapter.performed_jobs = []
96
+ ActiveJob::Base.queue_adapter.perform_enqueued_jobs = false
97
+ ActiveJob::Base.queue_adapter.perform_enqueued_at_jobs = false
98
+ end
99
+ config.after(:suite) do # or :each or :all
100
+ FileUtils.rm_rf(Dir[Rails.root.join('tmp', 'derivatives', '*')])
101
+ end
102
+
103
+ # rspec-expectations config goes here. You can use an alternate
104
+ # assertion/expectation library such as wrong or the stdlib/minitest
105
+ # assertions if you prefer.
106
+ config.expect_with :rspec do |expectations|
107
+ # This option will default to `true` in RSpec 4. It makes the `description`
108
+ # and `failure_message` of custom matchers include text for helper methods
109
+ # defined using `chain`, e.g.:
110
+ # be_bigger_than(2).and_smaller_than(4).description
111
+ # # => "be bigger than 2 and smaller than 4"
112
+ # ...rather than:
113
+ # # => "be bigger than 2"
114
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
115
+ end
116
+
117
+ # rspec-mocks config goes here. You can use an alternate test double
118
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
119
+ config.mock_with :rspec do |mocks|
120
+ # Prevents you from mocking or stubbing a method that does not exist on
121
+ # a real object. This is generally recommended, and will default to
122
+ # `true` in RSpec 4.
123
+ mocks.verify_partial_doubles = true
124
+ end
125
+
126
+ # This option will default to `:apply_to_host_groups` in RSpec 4 (and will
127
+ # have no way to turn it off -- the option exists only for backwards
128
+ # compatibility in RSpec 3). It causes shared context metadata to be
129
+ # inherited by the metadata hash of host groups and examples, rather than
130
+ # triggering implicit auto-inclusion in groups with matching metadata.
131
+ config.shared_context_metadata_behavior = :apply_to_host_groups
132
+
133
+ # The settings below are suggested to provide a good initial experience
134
+ # with RSpec, but feel free to customize to your heart's content.
135
+
136
+ # This allows you to limit a spec run to individual examples or groups
137
+ # you care about by tagging them with `:focus` metadata. When nothing
138
+ # is tagged with `:focus`, all examples get run. RSpec also provides
139
+ # aliases for `it`, `describe`, and `context` that include `:focus`
140
+ # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
141
+ # config.filter_run_when_matching :focus
142
+
143
+ # Allows RSpec to persist some state between runs in order to support
144
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
145
+ # you configure your source control system to ignore this file.
146
+ # config.example_status_persistence_file_path = "spec/examples.txt"
147
+
148
+ # Limits the available syntax to the non-monkey patched syntax that is
149
+ # recommended. For more details, see:
150
+ # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
151
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
152
+ # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
153
+ # config.disable_monkey_patching!
154
+
155
+ # Many RSpec users commonly either run the entire suite or an individual
156
+ # file, and it's useful to allow more verbose output when running an
157
+ # individual spec file.
158
+ # if config.files_to_run.one?
159
+ # Use the documentation formatter for detailed output,
160
+ # unless a formatter has already been configured
161
+ # (e.g. via a command-line flag).
162
+ # config.default_formatter = "doc"
163
+ # end
164
+
165
+ # Print the 10 slowest examples and example groups at the
166
+ # end of the spec run, to help surface which specs are running
167
+ # particularly slow.
168
+ config.profile_examples = 10
169
+
170
+ # Run specs in random order to surface order dependencies. If you find an
171
+ # order dependency and want to debug it, you can fix the order by providing
172
+ # the seed, which is printed after each run.
173
+ # --seed 1234
174
+ config.order = :random
175
+
176
+ # Seed global randomization in this process using the `--seed` CLI option.
177
+ # Setting this allows you to use `--seed` to deterministically reproduce
178
+ # test failures related to randomization by passing the same `--seed` value
179
+ # as the one that triggered the failure.
180
+ Kernel.srand config.seed
181
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+ # copied from Hyrax
3
+
4
+ module ControllerLevelHelpers
5
+ # This provides some common mock methods for view tests.
6
+ # These are normally provided by the controller.
7
+ module ControllerViewHelpers
8
+ def search_state
9
+ @search_state ||= CatalogController.search_state_class.new(params, blacklight_config, controller)
10
+ end
11
+
12
+ # This allows you to set the configuration
13
+ # @example: view.blacklight_config = Blacklight::Configuration.new
14
+ attr_writer :blacklight_config
15
+
16
+ def blacklight_config
17
+ @blacklight_config ||= CatalogController.blacklight_config
18
+ end
19
+
20
+ def blacklight_configuration_context
21
+ @blacklight_configuration_context ||= Blacklight::Configuration::Context.new(controller)
22
+ end
23
+ end
24
+
25
+ def initialize_controller_helpers(helper)
26
+ helper.extend ControllerViewHelpers
27
+ end
28
+ end