iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,178 @@
1
+ require 'fileutils'
2
+ require 'spec_helper'
3
+
4
+ RSpec.describe IiifPrint::PluggableDerivativeService do
5
+ let(:persisted_file_set) do
6
+ fs = FileSet.new
7
+ work.title = ['This is a page!']
8
+ work.members.push(fs)
9
+ fs.instance_variable_set(:@mime_type, 'image/tiff')
10
+ fs.save!(validate: false)
11
+ work.save!(validate: false)
12
+ fs
13
+ end
14
+
15
+ let(:fixture_path) do
16
+ File.join(
17
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
18
+ )
19
+ end
20
+
21
+ describe "service registration" do
22
+ # integration test with Hyrax, verify services is registered
23
+
24
+ it "is registered with Hyrax" do
25
+ expect(Hyrax::DerivativeService.services).to include described_class
26
+ end
27
+
28
+ it "is the first valid service found" do
29
+ file_set = double(FileSet,
30
+ class: FileSet,
31
+ mime_type: 'application/pdf',
32
+ parent: MyIiifConfiguredWorkWithAllDerivativeServices.new)
33
+ found = Hyrax::DerivativeService.for(file_set)
34
+ expect(found).to be_a described_class
35
+ end
36
+ end
37
+
38
+ context "when the FileSet's parent is not IiifPrint configured" do
39
+ before do
40
+ allow(persisted_file_set).to receive(:in_works).and_return([work])
41
+ end
42
+
43
+ let(:work) { MyWork.new }
44
+
45
+ describe "#plugins" do
46
+ it "uses the default derivatives service" do
47
+ file_set = double(FileSet,
48
+ class: FileSet,
49
+ mime_type: 'application/pdf',
50
+ parent: MyWork.new)
51
+ service = described_class.new(file_set)
52
+ expect(service.plugins).to eq [Hyrax::FileSetDerivativesService]
53
+ end
54
+ end
55
+ end
56
+
57
+ context "when the FileSet's parent is IiifPrint configured" do
58
+ describe "calls the configured derivative plugins" do
59
+ before do
60
+ allow(persisted_file_set).to receive(:in_works).and_return([work])
61
+ allow_any_instance_of(Hyrax::FileSetDerivativesService).to receive(:send)
62
+ end
63
+
64
+ let(:work) { MyIiifConfiguredWork.new }
65
+ let(:plugin) { FakeDerivativeService.new }
66
+
67
+ it "calls each plugin on create" do
68
+ service = described_class.new(persisted_file_set, plugins: [plugin])
69
+ expect do
70
+ service.create_derivatives('not_a_real_filename')
71
+ end.to change(plugin, :create_called).by(1)
72
+ end
73
+
74
+ def touch_fake_derivative_file(file_set, ext)
75
+ path = Hyrax::DerivativePath.derivative_path_for_reference(file_set, ext)
76
+ FileUtils.mkdir_p(File.join(path.split('/')[0..-2]))
77
+ FileUtils.touch(path)
78
+ end
79
+
80
+ it "does not re-create existing derivative" do
81
+ service = described_class.new(persisted_file_set, plugins: [plugin])
82
+ expect(persisted_file_set.id).not_to be_nil
83
+ expect do
84
+ touch_fake_derivative_file(persisted_file_set, plugin.target_extension)
85
+ service.create_derivatives('/nonsense/source/path/ignored ')
86
+ end.not_to change(plugin, :create_called)
87
+ end
88
+
89
+ it "calls each plugin on cleanup" do
90
+ service = described_class.new(persisted_file_set, plugins: [plugin])
91
+ expect { service.cleanup_derivatives }.to change(plugin, :cleanup_called).by(1)
92
+ end
93
+ end
94
+
95
+ context "integration tests for plugins" do
96
+ before do
97
+ allow(persisted_file_set).to receive(:in_works).and_return([work])
98
+ end
99
+
100
+ let(:work) { MyIiifConfiguredWorkWithAllDerivativeServices.new }
101
+
102
+ describe "calls all derivative plugins" do
103
+ def source_image(name)
104
+ File.join(fixture_path, name)
105
+ end
106
+
107
+ def derivatives_for(file_set)
108
+ Hyrax::DerivativePath.derivatives_for_reference(file_set)
109
+ end
110
+
111
+ def expected_plugins
112
+ [
113
+ Hyrax::FileSetDerivativesService,
114
+ IiifPrint::JP2DerivativeService,
115
+ IiifPrint::PDFDerivativeService,
116
+ IiifPrint::TextExtractionDerivativeService,
117
+ IiifPrint::TIFFDerivativeService
118
+ ]
119
+ end
120
+
121
+ # The expected set of Plugins that will run for file set
122
+ it "has expected valid plugins configured" do
123
+ plugins = described_class.new(persisted_file_set).plugins
124
+ fs = persisted_file_set
125
+ services = plugins.map { |plugin| plugin.new(fs) }.select(&:valid?)
126
+ expect(services.length).to eq 5
127
+ used_plugins = services.map(&:class)
128
+ expected_plugins.each do |plugin|
129
+ expect(used_plugins).to include plugin
130
+ end
131
+ end
132
+
133
+ it "creates expected derivatives from TIFF source" do
134
+ svc = described_class.new(persisted_file_set)
135
+ svc.create_derivatives(source_image('4.1.07.tiff'))
136
+ made = derivatives_for(persisted_file_set)
137
+ made.each { |path| expect(File.exist?(path)) }
138
+ extensions = made.map { |path| path.split('.')[-1] }
139
+ expect(extensions).to include 'pdf'
140
+ expect(extensions).to include 'jp2'
141
+ expect(extensions).not_to include 'tiff'
142
+ # Thumbnail, created by Hyrax:
143
+ expect(extensions).to include 'jpeg'
144
+ end
145
+ end
146
+
147
+ describe "ingest integration" do
148
+ def log_attachment(file_set)
149
+ # create a log entry for the fileset given destination name 'jp2'
150
+ IiifPrint::DerivativeAttachment.create(
151
+ fileset_id: file_set.id,
152
+ path: '/some/arbitrary/path/to.jp2',
153
+ destination_name: 'jp2'
154
+ )
155
+ end
156
+
157
+ def jp2_plugin?(plugins)
158
+ r = plugins.select { |p| p.is_a? IiifPrint::JP2DerivativeService }
159
+ !r.empty?
160
+ end
161
+
162
+ it "will not attempt creating over pre-made derivative" do
163
+ service = described_class.new(persisted_file_set)
164
+ # this should be respected, evaluate by obtaining filtered
165
+ # services list, which must omit JP2DerivativeService
166
+ plugins = service.services(:create_derivatives)
167
+ # initially has jp2 plugin
168
+ expect(jp2_plugin?(plugins)).to be true
169
+ # blacklist jp2 by effect of log entry of pre-made attachment
170
+ log_attachment(service.file_set)
171
+ # omits, after logging intent of previous attachment:
172
+ plugins = service.services(:create_derivatives)
173
+ expect(jp2_plugin?(plugins)).to be false
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,82 @@
1
+ require 'nokogiri'
2
+ require 'spec_helper'
3
+ require 'misc_shared'
4
+
5
+ RSpec.describe IiifPrint::TextExtractionDerivativeService do
6
+ include_context "shared setup"
7
+
8
+ let(:valid_file_set) do
9
+ file_set = FileSet.new
10
+ file_set.save!(validate: false)
11
+ file_set
12
+ end
13
+
14
+ let(:work) do
15
+ work = NewspaperPage.create(title: ["Hello"])
16
+ work.members << valid_file_set
17
+ work.save!
18
+ end
19
+
20
+ let(:minimal_alto) do
21
+ File.join(fixture_path, 'minimal-alto.xml')
22
+ end
23
+
24
+ let(:altoxsd) do
25
+ xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
26
+ Nokogiri::XML::Schema(File.read(xsdpath))
27
+ end
28
+
29
+ describe "Creates ALTO derivative" do
30
+ def source_image(name)
31
+ File.join(fixture_path, name)
32
+ end
33
+
34
+ def expected_path(file_set, ext)
35
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, ext)
36
+ end
37
+
38
+ def validate_alto(filename)
39
+ altoxsd.validate(filename)
40
+ end
41
+
42
+ def derivative_exists(ext)
43
+ path = expected_path(valid_file_set, ext)
44
+ expect(File.exist?(path)).to be true
45
+ expect(File.size(path)).to be > 0
46
+ end
47
+
48
+ xit "creates, stores valid ALTO and plain-text derivatives" do
49
+ # these are in same test to avoid duplicate OCR operation
50
+ service = described_class.new(valid_file_set)
51
+ service.create_derivatives(source_image('ocr_mono.tiff'))
52
+ # ALTO derivative file exists at expected path and validates:
53
+ altoxsd.validate(expected_path(valid_file_set, 'xml'))
54
+ # Plain text exists as non-empty file:
55
+ derivative_exists('txt')
56
+ derivative_exists('json')
57
+ json_path = expected_path(valid_file_set, 'json')
58
+ loaded_result = JSON.parse(File.read(json_path))
59
+ expect(loaded_result['coords'].length).to be > 1
60
+ end
61
+
62
+ xit "usually uses OCR, when no existing text" do
63
+ service = described_class.new(valid_file_set)
64
+ # here, service will delegate create_derivatives to OCR impl method:
65
+ expect(service).to receive(:create_derivatives_from_ocr)
66
+ service.create_derivatives(source_image('ocr_mono.tiff'))
67
+ end
68
+
69
+ xit "defers to existing ALTO sources, when present" do
70
+ # Attach some ALTO to a work
71
+ derivatives = IiifPrint::Data::WorkDerivatives.of(
72
+ work,
73
+ valid_file_set
74
+ )
75
+ derivatives.attach(minimal_alto, 'xml')
76
+ # In this case, service will not call the OCR implementation method:
77
+ service = described_class.new(valid_file_set)
78
+ expect(service).not_to receive(:create_derivatives_from_ocr)
79
+ service.create_derivatives(source_image('ocr_mono.tiff'))
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,127 @@
1
+ require 'nokogiri'
2
+ require 'spec_helper'
3
+ require 'misc_shared'
4
+
5
+ RSpec.describe IiifPrint::TextFormatsFromALTOService do
6
+ include_context "shared setup"
7
+
8
+ let(:valid_file_set) do
9
+ file_set = FileSet.new
10
+ file_set.save!(validate: false)
11
+ file_set
12
+ end
13
+
14
+ let(:work) do
15
+ work = NewspaperPage.create(title: ["Hello"])
16
+ work.members << valid_file_set
17
+ work.save!
18
+ work
19
+ end
20
+
21
+ let(:minimal_alto) do
22
+ File.join(fixture_path, 'minimal-alto.xml')
23
+ end
24
+
25
+ def log_incoming_attachment(fsid)
26
+ IiifPrint::DerivativeAttachment.create!(
27
+ fileset_id: fsid,
28
+ path: minimal_alto,
29
+ destination_name: 'xml'
30
+ )
31
+ end
32
+
33
+ def derivatives_of(work, fileset)
34
+ IiifPrint::Data::WorkDerivatives.of(work, fileset)
35
+ end
36
+
37
+ describe "Saves other formats from ALTO" do
38
+ xit "saves JSON, text from existing ALTO derivative" do
39
+ derivatives = derivatives_of(work, valid_file_set)
40
+ expect(derivatives.keys.size).to eq 0
41
+ derivatives.attach(minimal_alto, 'xml')
42
+ expect(derivatives.keys.size).to eq 1
43
+ service = described_class.new(valid_file_set)
44
+ service.create_derivatives('/some/random/primary/path/does_not/matter')
45
+ derivatives.load_paths
46
+ expect(derivatives.keys.size).to eq 3
47
+ expect(derivatives.keys).to include 'json', 'txt'
48
+ end
49
+
50
+ xit "saves JSON, text from incoming ALTO derivative" do
51
+ derivatives = derivatives_of(work, valid_file_set)
52
+ expect(derivatives.keys.size).to eq 0
53
+ log_incoming_attachment(valid_file_set.id)
54
+ service = described_class.new(valid_file_set)
55
+ service.create_derivatives('/some/random/primary/path/does_not/matter')
56
+ # reload keys to check derivatives:
57
+ derivatives.load_paths
58
+ expect(derivatives.keys).to include 'json', 'txt'
59
+ end
60
+ end
61
+
62
+ describe "scaling matters" do
63
+ # we need an ingested, characterized file:
64
+ do_now_jobs = [
65
+ IngestLocalFileJob,
66
+ IngestJob,
67
+ InheritPermissionsJob,
68
+ CharacterizeJob
69
+ ]
70
+ # we omit CreateDerivativesJob from above, as obviously duplicative and
71
+ # therefore potential cause of problems here.
72
+
73
+ # remove any previous test run (development) artifacts in file
74
+ # attachment logging tables
75
+ before(:all) do
76
+ IiifPrint::DerivativeAttachment.all.delete_all
77
+ IiifPrint::IngestFileRelation.all.delete_all
78
+ end
79
+
80
+ let(:work) do
81
+ work = NewspaperPage.create(title: ["Hello"])
82
+ work
83
+ end
84
+
85
+ let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
86
+ let(:ocr_alto_path) do
87
+ File.join(fixture_path, 'ocr_alto_scaled_4pts_per_px.xml')
88
+ end
89
+
90
+ def attach_primary_file(work)
91
+ IiifPrint::Data::WorkFiles.assign!(to: work, path: tiff_path)
92
+ work.reload
93
+ pcdm_file = IiifPrint::Data::WorkFiles.of(work).values[0].unwrapped
94
+ expect(pcdm_file).not_to be_nil
95
+ # we have image dimensions (px) to work with:
96
+ expect(pcdm_file.width[0].to_i).to be_an Integer
97
+ expect(pcdm_file.height[0].to_i).to be_an Integer
98
+ end
99
+
100
+ def derivatives_of(work)
101
+ IiifPrint::Data::WorkFiles.of(work).derivatives
102
+ end
103
+
104
+ def attach_alto(work)
105
+ derivatives = derivatives_of(work)
106
+ derivatives.attach(ocr_alto_path, 'xml')
107
+ # has a path to now-stored derivative:
108
+ expect(derivatives.path('xml')).not_to be_nil
109
+ end
110
+
111
+ xit "scales ALTO points to original image", perform_enqueued: do_now_jobs do
112
+ attach_primary_file(work)
113
+ attach_alto(work)
114
+ work.reload
115
+ file_set = work.ordered_members.to_a.find { |m| m.is_a? FileSet }
116
+ service = described_class.new(file_set)
117
+ service.create_derivatives('/a/path/here/needed/but/will/not/matter')
118
+ coords = JSON.parse(derivatives_of(work).data('json'))
119
+ word = coords['coords'].select { |k, _v| k == 'Bethesda' }
120
+ # test against known scaled coordinate of OCR data:
121
+ # This roughly matches unscaled ALTO data for token 'Bethesda'
122
+ # in spec/fixtures/files/ocr_alto.xml, with the disclaimer that
123
+ # round-trip rounding error of 1px is noted for VPOS.
124
+ expect(word['Bethesda']).to eq [[16, 665, 78, 16]]
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+ RSpec.describe IiifPrint::TIFFDerivativeService do
3
+ let(:valid_file_set) do
4
+ file_set = FileSet.new
5
+ file_set.save!(validate: false)
6
+ file_set
7
+ end
8
+
9
+ let(:fixture_path) do
10
+ File.join(
11
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
12
+ )
13
+ end
14
+
15
+ describe "Creates TIFF derivatives" do
16
+ def source_image(name)
17
+ File.join(fixture_path, name)
18
+ end
19
+
20
+ def expected_path(file_set)
21
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'tiff')
22
+ end
23
+
24
+ def get_res(path)
25
+ tool = IiifPrint::ImageTool.new(path)
26
+ "#{tool.metadata[:width]}x#{tool.metadata[:height]}"
27
+ end
28
+
29
+ def check_dpi_match(orig, dest)
30
+ # check ppi, but skip pdf to avoid ghostscript warnings to stderr
31
+ expect(get_res(orig)).to eq get_res(dest) unless orig.end_with?('pdf')
32
+ end
33
+
34
+ def makes_tiff(filename)
35
+ path = source_image(filename)
36
+ expected = expected_path(valid_file_set)
37
+ expect(File.exist?(expected)).to be false
38
+ svc = described_class.new(valid_file_set)
39
+ svc.create_derivatives(path)
40
+ expect(File.exist?(expected)).to be true
41
+ mime = IiifPrint::ImageTool.new(expected).metadata[:content_type]
42
+ expect(mime).to eq 'image/tiff'
43
+ check_dpi_match(path, expected)
44
+ svc.cleanup_derivatives
45
+ end
46
+
47
+ # for cases where primary file is TIFF already
48
+ def avoids_duplicative_creation(filename)
49
+ expected = expected_path(valid_file_set)
50
+ expect(File.exist?(expected)).to be false
51
+ svc = described_class.new(valid_file_set)
52
+ svc.create_derivatives(source_image(filename))
53
+ expect(File.exist?(expected)).not_to be true
54
+ end
55
+
56
+ it "Does not make TIFF derivatives when primary is TIFF" do
57
+ avoids_duplicative_creation('ocr_mono.tiff')
58
+ avoids_duplicative_creation('ocr_gray.tiff')
59
+ end
60
+
61
+ it "creates TIFF from PDF source, robust to multi-page" do
62
+ makes_tiff('sample-color-newsletter.pdf')
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,181 @@
1
+ require 'json'
2
+
3
+ # testing environent:
4
+ ENV['RAILS_ENV'] ||= 'test'
5
+
6
+ require 'coveralls'
7
+ Coveralls.wear!
8
+
9
+ require 'shoulda/matchers'
10
+ Shoulda::Matchers.configure do |config|
11
+ config.integrate do |with|
12
+ with.test_framework :rspec
13
+ end
14
+ end
15
+
16
+ # engine_cart:
17
+ require 'bundler/setup'
18
+ require 'engine_cart'
19
+ EngineCart.load_application!
20
+
21
+ require 'rspec/rails'
22
+ require 'support/iiif_print_models'
23
+ require 'support/controller_level_helpers'
24
+ require 'rspec/active_model/mocks'
25
+
26
+ ActiveJob::Base.queue_adapter = :test
27
+
28
+ RSpec.configure do |config|
29
+ # enable FactoryBot:
30
+ require 'factory_bot'
31
+ config.include FactoryBot::Syntax::Methods
32
+ # auto-detect and load all factories in spec/factories:
33
+ FactoryBot.find_definitions
34
+
35
+ config.infer_spec_type_from_file_location!
36
+
37
+ # Transactional
38
+ config.use_transactional_fixtures = false
39
+ config.include Devise::Test::ControllerHelpers, type: :controller
40
+
41
+ # ensure Hyrax has active sipity workflow for default admin set:
42
+ config.before(:suite) do
43
+ require 'active_fedora/cleaner'
44
+ require 'database_cleaner'
45
+
46
+ # By default, Hyrax uses a database minter class. That's the preferred pathway (because you are
47
+ # tracking minting state in the database). However, for testing purposes we don't need to / nor
48
+ # want to install the minter migrations. Hence we're favoring this approach.
49
+ minter_class = ::Noid::Rails::Minter::File
50
+ ::Noid::Rails.config.minter_class = minter_class
51
+ Hyrax.config.noid_minter_class = minter_class
52
+
53
+ ActiveFedora::Cleaner.clean!
54
+ DatabaseCleaner.clean_with(:truncation)
55
+
56
+ begin
57
+ # TODO: switch the below methods to use the appropriate services
58
+ # rather than the deprecated methods currently being used.
59
+ # ensure permission template actually exists in RDBMS:
60
+ id = 'admin_set/default'
61
+ no_template = Hyrax::PermissionTemplate.find_by(source_id: id).nil?
62
+ Hyrax::PermissionTemplate.create!(source_id: id) if no_template
63
+ # ensure workflows exist, presumes permission template does first:
64
+ Hyrax::Workflow::WorkflowImporter.load_workflows
65
+ # Default admin set needs to exist in Fedora, with relation to its
66
+ # PermissionTemplate object:
67
+ begin
68
+ admin_set = AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
69
+ admin_set.save!
70
+ rescue ActiveRecord::RecordNotUnique
71
+ admin_set = AdminSet.find(AdminSet::DEFAULT_ID)
72
+ end
73
+ permission_template = admin_set.permission_template
74
+ workflow = permission_template.available_workflows.where(
75
+ name: 'default'
76
+ ).first
77
+ Sipity::Workflow.activate!(
78
+ permission_template: permission_template,
79
+ workflow_id: workflow.id
80
+ )
81
+ rescue Faraday::ConnectionFailed
82
+ STDERR.puts "Attempting to run test suite without Fedora and/or Solr..."
83
+ end
84
+ end
85
+
86
+ # :perform_enqueued config setting below copied from Hyrax spec_helper.rb
87
+ config.before(:example, :perform_enqueued) do |example|
88
+ ActiveJob::Base.queue_adapter.filter = example.metadata[:perform_enqueued].try(:to_a)
89
+ ActiveJob::Base.queue_adapter.perform_enqueued_jobs = true
90
+ ActiveJob::Base.queue_adapter.perform_enqueued_at_jobs = true
91
+ end
92
+ config.after(:example, :perform_enqueued) do
93
+ ActiveJob::Base.queue_adapter.filter = nil
94
+ ActiveJob::Base.queue_adapter.enqueued_jobs = []
95
+ ActiveJob::Base.queue_adapter.performed_jobs = []
96
+ ActiveJob::Base.queue_adapter.perform_enqueued_jobs = false
97
+ ActiveJob::Base.queue_adapter.perform_enqueued_at_jobs = false
98
+ end
99
+ config.after(:suite) do # or :each or :all
100
+ FileUtils.rm_rf(Dir[Rails.root.join('tmp', 'derivatives', '*')])
101
+ end
102
+
103
+ # rspec-expectations config goes here. You can use an alternate
104
+ # assertion/expectation library such as wrong or the stdlib/minitest
105
+ # assertions if you prefer.
106
+ config.expect_with :rspec do |expectations|
107
+ # This option will default to `true` in RSpec 4. It makes the `description`
108
+ # and `failure_message` of custom matchers include text for helper methods
109
+ # defined using `chain`, e.g.:
110
+ # be_bigger_than(2).and_smaller_than(4).description
111
+ # # => "be bigger than 2 and smaller than 4"
112
+ # ...rather than:
113
+ # # => "be bigger than 2"
114
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
115
+ end
116
+
117
+ # rspec-mocks config goes here. You can use an alternate test double
118
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
119
+ config.mock_with :rspec do |mocks|
120
+ # Prevents you from mocking or stubbing a method that does not exist on
121
+ # a real object. This is generally recommended, and will default to
122
+ # `true` in RSpec 4.
123
+ mocks.verify_partial_doubles = true
124
+ end
125
+
126
+ # This option will default to `:apply_to_host_groups` in RSpec 4 (and will
127
+ # have no way to turn it off -- the option exists only for backwards
128
+ # compatibility in RSpec 3). It causes shared context metadata to be
129
+ # inherited by the metadata hash of host groups and examples, rather than
130
+ # triggering implicit auto-inclusion in groups with matching metadata.
131
+ config.shared_context_metadata_behavior = :apply_to_host_groups
132
+
133
+ # The settings below are suggested to provide a good initial experience
134
+ # with RSpec, but feel free to customize to your heart's content.
135
+
136
+ # This allows you to limit a spec run to individual examples or groups
137
+ # you care about by tagging them with `:focus` metadata. When nothing
138
+ # is tagged with `:focus`, all examples get run. RSpec also provides
139
+ # aliases for `it`, `describe`, and `context` that include `:focus`
140
+ # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
141
+ # config.filter_run_when_matching :focus
142
+
143
+ # Allows RSpec to persist some state between runs in order to support
144
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
145
+ # you configure your source control system to ignore this file.
146
+ # config.example_status_persistence_file_path = "spec/examples.txt"
147
+
148
+ # Limits the available syntax to the non-monkey patched syntax that is
149
+ # recommended. For more details, see:
150
+ # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
151
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
152
+ # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
153
+ # config.disable_monkey_patching!
154
+
155
+ # Many RSpec users commonly either run the entire suite or an individual
156
+ # file, and it's useful to allow more verbose output when running an
157
+ # individual spec file.
158
+ # if config.files_to_run.one?
159
+ # Use the documentation formatter for detailed output,
160
+ # unless a formatter has already been configured
161
+ # (e.g. via a command-line flag).
162
+ # config.default_formatter = "doc"
163
+ # end
164
+
165
+ # Print the 10 slowest examples and example groups at the
166
+ # end of the spec run, to help surface which specs are running
167
+ # particularly slow.
168
+ config.profile_examples = 10
169
+
170
+ # Run specs in random order to surface order dependencies. If you find an
171
+ # order dependency and want to debug it, you can fix the order by providing
172
+ # the seed, which is printed after each run.
173
+ # --seed 1234
174
+ config.order = :random
175
+
176
+ # Seed global randomization in this process using the `--seed` CLI option.
177
+ # Setting this allows you to use `--seed` to deterministically reproduce
178
+ # test failures related to randomization by passing the same `--seed` value
179
+ # as the one that triggered the failure.
180
+ Kernel.srand config.seed
181
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+ # copied from Hyrax
3
+
4
+ module ControllerLevelHelpers
5
+ # This provides some common mock methods for view tests.
6
+ # These are normally provided by the controller.
7
+ module ControllerViewHelpers
8
+ def search_state
9
+ @search_state ||= CatalogController.search_state_class.new(params, blacklight_config, controller)
10
+ end
11
+
12
+ # This allows you to set the configuration
13
+ # @example: view.blacklight_config = Blacklight::Configuration.new
14
+ attr_writer :blacklight_config
15
+
16
+ def blacklight_config
17
+ @blacklight_config ||= CatalogController.blacklight_config
18
+ end
19
+
20
+ def blacklight_configuration_context
21
+ @blacklight_configuration_context ||= Blacklight::Configuration::Context.new(controller)
22
+ end
23
+ end
24
+
25
+ def initialize_controller_helpers(helper)
26
+ helper.extend ControllerViewHelpers
27
+ end
28
+ end