iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,84 @@
1
+ require 'json'
2
+ require 'nokogiri'
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::TextExtraction::PageOCR do
6
+ let(:fixture_path) do
7
+ File.join(
8
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
9
+ )
10
+ end
11
+
12
+ let(:altoxsd) do
13
+ xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
14
+ Nokogiri::XML::Schema(File.read(xsdpath))
15
+ end
16
+
17
+ # sample "snippet" images for OCR testing:
18
+ let(:example_gray_tiff) { File.join(fixture_path, 'ocr_gray.tiff') }
19
+ let(:example_mono_tiff) { File.join(fixture_path, 'ocr_mono.tiff') }
20
+ let(:example_color_tiff) { File.join(fixture_path, 'ocr_color.tiff') }
21
+ let(:example_gray_jp2) { File.join(fixture_path, 'ocr_gray.jp2') }
22
+ let(:ocr_from_gray_tiff) { described_class.new(example_gray_tiff) }
23
+
24
+ describe "performs OCR" do
25
+ def match_ocr_expectations(words)
26
+ expect(words).to be_an(Array)
27
+ expect(words).not_to be_empty
28
+ expect(words[0]).to be_a(Hash)
29
+ [:word, :coordinates].each do |key|
30
+ expect(words[0].keys).to include key
31
+ end
32
+ end
33
+
34
+ it "gets words and coordinates from grayscale source" do
35
+ match_ocr_expectations(ocr_from_gray_tiff.words)
36
+ end
37
+
38
+ it "gets words and coordinates from one-bit source" do
39
+ ocr = described_class.new(example_mono_tiff)
40
+ match_ocr_expectations(ocr.words)
41
+ end
42
+
43
+ it "gets words and coordinates from color source" do
44
+ ocr = described_class.new(example_color_tiff)
45
+ match_ocr_expectations(ocr.words)
46
+ end
47
+
48
+ it "gets words and coordinates from jp2 source" do
49
+ ocr = described_class.new(example_gray_jp2)
50
+ match_ocr_expectations(ocr.words)
51
+ end
52
+ end
53
+
54
+ describe "turns image into ALTO" do
55
+ xit "takes grayscale tiff, outputs valid ALTO, geometry" do
56
+ alto = ocr_from_gray_tiff.alto
57
+ document = Nokogiri::XML(alto)
58
+ errors = altoxsd.validate(document)
59
+ expect(errors.length).to eq 0
60
+ expect(document.at_css('PrintSpace')['WIDTH']).to eq "418"
61
+ expect(document.at_css('PrintSpace')['HEIGHT']).to eq "1046"
62
+ end
63
+ end
64
+
65
+ describe "plain text" do
66
+ it "makes plain text available for image" do
67
+ plain = ocr_from_gray_tiff.plain
68
+ expect(plain.class).to be String
69
+ expect(plain.length).to be > 0
70
+ end
71
+ end
72
+
73
+ describe "JSON word coordinates" do
74
+ it "passes properly formatted data to WordCoordsBuilder and receives output" do
75
+ parsed = JSON.parse(ocr_from_gray_tiff.word_json)
76
+ expect(parsed['coords'].length).to be > 1
77
+ word = ocr_from_gray_tiff.words[0]
78
+ word1 = parsed['coords'][word[:word]]
79
+ word1_coords = word1[0]
80
+ expect(word1_coords[2]).to eq word[:coordinates][2]
81
+ expect(word1_coords[3]).to eq word[:coordinates][3]
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::TextExtraction::RenderAlto do
4
+ let(:fixture_path) do
5
+ File.join(
6
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
7
+ )
8
+ end
9
+
10
+ let(:altoxsd) do
11
+ xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
12
+ Nokogiri::XML::Schema(File.read(xsdpath))
13
+ end
14
+
15
+ let(:page_prefix) { '<Page ID="ID1" PHYSICAL_IMG_NR="1"' }
16
+
17
+ let(:words) do
18
+ [
19
+ { word: "If", coordinates: [52, 13, 11, 14] },
20
+ { word: "you", coordinates: [69, 17, 31, 14] },
21
+ { word: "are", coordinates: [108, 17, 28, 10] },
22
+ { word: "a", coordinates: [143, 17, 8, 10] },
23
+ { word: "friend,", coordinates: [158, 13, 56, 16] },
24
+ { word: "you", coordinates: [51, 39, 31, 14] },
25
+ { word: "speak", coordinates: [90, 35, 50, 18] },
26
+ { word: "the", coordinates: [146, 35, 28, 14] },
27
+ { word: "password,", coordinates: [182, 35, 85, 18] },
28
+ { word: "and", coordinates: [51, 57, 30, 14] },
29
+ { word: "the", coordinates: [89, 57, 28, 14] },
30
+ { word: "doors", coordinates: [124, 57, 48, 14] },
31
+ { word: "will", coordinates: [180, 57, 28, 14] },
32
+ { word: "open.", coordinates: [216, 61, 47, 14] }
33
+ ]
34
+ end
35
+
36
+ describe "renders alto" do
37
+ it "creates alto given width, height, words" do
38
+ renderer = described_class.new(12_000, 9600)
39
+ output = renderer.to_alto(words)
40
+ expect(output.class).to be String
41
+ expect(output).to include '<alto'
42
+ expect(output).to include '<String'
43
+ expect(output).to include page_prefix + ' HEIGHT="9600" WIDTH="12000"'
44
+ expect(Nokogiri::XML(output).errors.empty?).to be true
45
+ end
46
+
47
+ xit "makes alto 2.0 that validates" do
48
+ renderer = described_class.new(12_000, 9600)
49
+ output = renderer.to_alto(words)
50
+ document = Nokogiri::XML(output)
51
+ altoxsd.validate(document)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::TextExtraction::WordCoordsBuilder do
4
+ let(:words) do
5
+ [
6
+ { word: "foo", coordinates: [1, 2, 3, 4] },
7
+ { word: "bar", coordinates: [5, 6, 7, 8] },
8
+ { word: "baz", coordinates: [9, 10, 11, 12] },
9
+ { word: "foo", coordinates: [13, 14, 15, 16] }
10
+ ]
11
+ end
12
+ let(:image_width) { 1_234 }
13
+ let(:image_height) { 5_678 }
14
+
15
+ describe '.json_coordinates_for' do
16
+ let(:wcb_to_json) { JSON.parse(described_class.json_coordinates_for(words: words, width: image_width, height: image_height)) }
17
+ it 'has the correct structure' do
18
+ expect(wcb_to_json['height']).to eq image_height
19
+ expect(wcb_to_json['width']).to eq image_width
20
+ expect(wcb_to_json['coords'].length).to eq 3
21
+ expect(wcb_to_json['coords']['foo']).not_to be_falsey
22
+ end
23
+
24
+ it 'combines coordinates for the same word' do
25
+ expect(wcb_to_json['coords']['foo']).to eq [[1, 2, 3, 4], [13, 14, 15, 16]]
26
+ end
27
+ end
28
+
29
+ describe '#to_json' do
30
+ let(:wcb_to_json) { JSON.parse(wcb.to_json) }
31
+ let(:wcb) { described_class.new(words, image_width, image_height) }
32
+
33
+ it 'has the correct structure' do
34
+ expect(wcb_to_json['height']).to eq image_height
35
+ expect(wcb_to_json['width']).to eq image_width
36
+ expect(wcb_to_json['coords'].length).to eq 3
37
+ expect(wcb_to_json['coords']['foo']).not_to be_falsey
38
+ end
39
+
40
+ it 'combines coordinates for the same word' do
41
+ expect(wcb_to_json['coords']['foo']).to eq [[1, 2, 3, 4], [13, 14, 15, 16]]
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint do
4
+ describe ".manifest_metadata_for" do
5
+ let(:attributes) do
6
+ { "id" => "abc123",
7
+ "title_tesim" => ['My Awesome Title'] }
8
+ end
9
+ let(:solr_document) { SolrDocument.new(attributes) }
10
+ let(:base_url) { "https://my.dev.test" }
11
+
12
+ subject(:manifest_metadata) do
13
+ described_class.manifest_metadata_for(work: solr_document, current_ability: double(Ability), base_url: base_url)
14
+ end
15
+ it { is_expected.not_to be_falsey }
16
+ it "does not contain any nil values" do
17
+ expect(subject).not_to include(nil)
18
+ end
19
+ end
20
+
21
+ describe ".model_configuration" do
22
+ context "default configuration" do
23
+ let(:model) do
24
+ Class.new do
25
+ include IiifPrint.model_configuration(pdf_split_child_model: Class.new)
26
+ end
27
+ end
28
+
29
+ subject(:record) { model.new }
30
+
31
+ it { is_expected.to be_iiif_print_config }
32
+
33
+ it "has a #pdf_splitter_job" do
34
+ expect(record.iiif_print_config.pdf_splitter_job).to be(IiifPrint::Jobs::ChildWorksFromPdfJob)
35
+ end
36
+
37
+ it "has a #pdf_splitter_service" do
38
+ expect(record.iiif_print_config.pdf_splitter_service).to be(IiifPrint::SplitPdfs::PagesIntoImagesService)
39
+ end
40
+
41
+ it "has #derivative_service_plugins" do
42
+ expect(record.iiif_print_config.derivative_service_plugins).to eq(
43
+ [IiifPrint::JP2DerivativeService,
44
+ IiifPrint::PDFDerivativeService,
45
+ IiifPrint::TextExtractionDerivativeService,
46
+ IiifPrint::TIFFDerivativeService]
47
+ )
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,111 @@
1
+ RSpec.shared_context "shared setup", shared_context: :metadata do
2
+ let(:fixture_path) do
3
+ path = File.join(
4
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
5
+ )
6
+ # TODO: NOTE: this has potential timing issues in the specs, because we're adjusting the
7
+ # configured value during the spec run.
8
+ registered = Hyrax.config.registered_ingest_dirs
9
+ registered.push(path) unless registered.include?(path)
10
+ path
11
+ end
12
+
13
+ # shared date to be invariant across all tests in a run:
14
+ date_static = Hyrax::TimeService.time_in_utc
15
+ let(:static_date) { date_static }
16
+
17
+ # path fixtures:
18
+ let(:example_gray_jp2) { File.join(fixture_path, 'ocr_gray.jp2') }
19
+ let(:txt_path) { File.join(fixture_path, 'credits.md') }
20
+ let(:sample_thumbnail) { File.join(fixture_path, 'thumbnail.jpg') }
21
+
22
+ # sample data:
23
+ let(:sample_text) { 'even in a mythical Age there must be some enigmas' }
24
+
25
+ let(:valid_file_set) do
26
+ file_set = FileSet.new
27
+ file_set.save!(validate: false)
28
+ file_set
29
+ end
30
+
31
+ let(:sample_work) do
32
+ work = MyWork.new
33
+ work.title = ['Bombadil']
34
+ work.members.push(valid_file_set)
35
+ work.save!
36
+ work
37
+ end
38
+
39
+ # sample objects:
40
+ let(:work_with_file) do
41
+ # we need a work with not just a valid (but empty) fileset, but also
42
+ # a persisted file, so we use the shared work sample, and expand
43
+ # on it with actual file data/metadata.
44
+ work = sample_work
45
+ fileset = work.members.first
46
+ file = Hydra::PCDM::File.create
47
+ fileset.original_file = file
48
+ # Set binary content on file via ActiveFedora content= mutator method
49
+ # which also makes .size method return valid result for content
50
+ file.content = File.open(txt_path)
51
+ # Set some metdata we would expect to otherwise be set upon an upload
52
+ file.original_name = 'credits.md'
53
+ file.mime_type = 'text/plain'
54
+ file.date_modified = static_date
55
+ file.date_created = static_date
56
+ # saving fileset also saves file content
57
+ fileset.save!
58
+ work
59
+ end
60
+
61
+ def path_factory
62
+ Hyrax::DerivativePath
63
+ end
64
+
65
+ def work_file_set(work)
66
+ work.members.detect { |m| m.is_a? FileSet }
67
+ end
68
+
69
+ def text_path(work)
70
+ path_factory.derivative_path_for_reference(work_file_set(work), 'txt')
71
+ end
72
+
73
+ def jp2_path(work)
74
+ path_factory.derivative_path_for_reference(work_file_set(work), 'jp2')
75
+ end
76
+
77
+ def thumbnail_path(work)
78
+ path_factory.derivative_path_for_reference(work_file_set(work), 'thumbnail')
79
+ end
80
+
81
+ def mkdir_derivative(work, name)
82
+ # make shared path for derivatives to live, Hyrax ususally does this
83
+ # for thumbnails, and iiif_print does this in its derivative
84
+ # service plugins; here we do same.
85
+ fsid = work_file_set(work).id
86
+ path = path_factory.derivative_path_for_reference(fsid, name)
87
+ dir = File.join(path.split('/')[0..-2])
88
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
89
+ end
90
+
91
+ def mk_jp2_derivative(work)
92
+ mkdir_derivative(work, 'jp2')
93
+ dst_path = jp2_path(work)
94
+ FileUtils.copy(example_gray_jp2, dst_path)
95
+ expect(File.exist?(dst_path)).to be true
96
+ end
97
+
98
+ def mk_txt_derivative(work)
99
+ mkdir_derivative(work, 'txt')
100
+ dst_path = text_path(work)
101
+ File.open(dst_path, 'w') { |f| f.write(sample_text) }
102
+ expect(File.exist?(dst_path)).to be true
103
+ end
104
+
105
+ def mk_thumbnail_derivative(work)
106
+ mkdir_derivative(work, 'thumbnail')
107
+ dst_path = thumbnail_path(work)
108
+ FileUtils.copy(sample_thumbnail, dst_path)
109
+ expect(File.exist?(dst_path)).to be true
110
+ end
111
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module IiifPrint
4
+ RSpec.describe DerivativeAttachment, type: :model do
5
+ it "requires some columns to be considered complete" do
6
+ model = described_class.create
7
+ # attempt save without required data; expect failure
8
+ expect { model.save! }.to raise_exception(ActiveRecord::RecordInvalid)
9
+ end
10
+
11
+ it "saves when constructed with all field values" do
12
+ model = described_class.create(
13
+ fileset_id: 'a1b2c3d4e5',
14
+ path: '/path/to/somefile',
15
+ destination_name: 'txt'
16
+ )
17
+ # attempt save without required data; expect failure
18
+ expect { model.save! }.not_to raise_exception
19
+ end
20
+
21
+ it "saves when all fields completely set" do
22
+ model = described_class.create
23
+ model.fileset_id = 'someid123'
24
+ model.path = '/path/to/somefile'
25
+ model.destination_name = 'txt'
26
+ expect { model.save! }.not_to raise_exception
27
+ end
28
+
29
+ it "saves when only path, destination_name set" do
30
+ model = described_class.create
31
+ model.fileset_id = nil
32
+ model.path = '/path/to/somefile'
33
+ model.destination_name = 'txt'
34
+ expect { model.save! }.not_to raise_exception
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ module IiifPrint
4
+ RSpec.describe IngestFileRelation, type: :model do
5
+ def make_test_records
6
+ # two unique values
7
+ described_class.create(
8
+ file_path: '/some/path/to/this',
9
+ derivative_path: '/some/path/to/that'
10
+ )
11
+ described_class.create(
12
+ file_path: '/some/path/to/this',
13
+ derivative_path: '/some/path/to/other_thing'
14
+ )
15
+ # a duplicate will save, presumption is that dupes are filtered on query:
16
+ described_class.create(
17
+ file_path: '/some/path/to/this',
18
+ derivative_path: '/some/path/to/other_thing'
19
+ )
20
+ end
21
+
22
+ it "will not save unless record is complete" do
23
+ model = described_class.create
24
+ # attempt save without required data; expect failure
25
+ expect { model.save! }.to raise_exception(ActiveRecord::RecordInvalid)
26
+ model2 = described_class.create
27
+ model2.file_path = '/path/to/sourcefile.tiff'
28
+ expect { model2.save! }.to raise_exception(ActiveRecord::RecordInvalid)
29
+ model3 = described_class.create
30
+ model3.derivative_path = '/path/to/sourcefile.tiff'
31
+ expect { model3.save! }.to raise_exception(ActiveRecord::RecordInvalid)
32
+ end
33
+
34
+ it "will save sufficiently constructed record" do
35
+ model = described_class.create(
36
+ file_path: '/path/to/this',
37
+ derivative_path: '/path/to/that'
38
+ )
39
+ expect { model.save! }.not_to raise_exception
40
+ end
41
+
42
+ it "will save when all fields completely set" do
43
+ model = described_class.create
44
+ model.file_path = '/path/to/sourcefile.tiff'
45
+ model.derivative_path = '/path/to/derived.jp2'
46
+ expect { model.save! }.not_to raise_exception
47
+ end
48
+
49
+ it "can query derivative paths for primary file" do
50
+ make_test_records
51
+ result = described_class.derivatives_for_file('/some/path/to/this')
52
+ expect(result).to be_an Array
53
+ expect(result.size).to eq 2
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+ RSpec.describe SolrDocument do
3
+ let(:solr_doc) { described_class.new(id: 'foo', file_set_ids_ssim: ['bar']) }
4
+
5
+ describe 'file_set_ids' do
6
+ it 'responds to #file_set_ids' do
7
+ expect(solr_doc).to respond_to(:file_set_ids)
8
+ end
9
+
10
+ it 'returns the correct value' do
11
+ expect(solr_doc.file_set_ids).to eq(['bar'])
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::IiifManifestPresenterBehavior do
4
+ let(:attributes) do
5
+ { "id" => "abc123",
6
+ "title_tesim" => ['Page the first'],
7
+ "description_tesim" => ['A book or something'],
8
+ "creator_tesim" => ['Arthur McAuthor'] }
9
+ end
10
+ let(:solr_document) { SolrDocument.new(attributes) }
11
+ let(:presenter) { Hyrax::IiifManifestPresenter.new(solr_document) }
12
+ let(:test_request) { ActionDispatch::TestRequest.new({}) }
13
+
14
+ describe '#search_service' do
15
+ it 'returns the correct URL for the IIIF Search service' do
16
+ expect(presenter.search_service).to include("#{solr_document.id}/iiif_search")
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,49 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe IiifPrint::IiifManifestPresenterBehavior do
4
+ let(:parent_fs_attributes) do
5
+ { "id" => "parent_fs123",
6
+ "title_tesim" => ["My Parent FileSet"],
7
+ "has_model_ssim" => ["FileSet"] }
8
+ end
9
+ let(:child_work_attributes) do
10
+ { "id" => "child_work123",
11
+ "title_tesim" => ["My Child Image"],
12
+ "has_model_ssim" => ["Image"],
13
+ "file_set_ids_ssim" => ["child_image_fs123"] }
14
+ end
15
+ let(:child_fs_attributes) do
16
+ { "id" => "child_fs123",
17
+ "title_tesim" => ["My Child FileSet"],
18
+ "has_model_ssim" => ["FileSet"] }
19
+ end
20
+ let(:parent_fs_solr_doc) { SolrDocument.new(parent_fs_attributes) }
21
+ let(:child_work_solr_doc) { SolrDocument.new(child_work_attributes) }
22
+ let(:child_fs_solr_doc) { SolrDocument.new(child_fs_attributes) }
23
+ let(:ids) { [parent_fs_solr_doc.id, child_work_solr_doc.id] }
24
+ let(:presenter_class) { Hyrax::IiifManifestPresenter }
25
+
26
+ subject(:presenter_factory) do
27
+ Hyrax::IiifManifestPresenter::Factory.new(
28
+ ids: ids,
29
+ presenter_class: presenter_class,
30
+ presenter_args: []
31
+ )
32
+ end
33
+
34
+ describe "#build" do
35
+ it "returns an Array of DisplayImagePresenters" do
36
+ allow_any_instance_of(Hyrax::IiifManifestPresenter::Factory)
37
+ .to receive(:load_docs).and_return([parent_fs_solr_doc, child_work_solr_doc])
38
+ allow_any_instance_of(IiifPrint::IiifManifestPresenterFactoryBehavior)
39
+ .to receive(:load_file_set_docs).and_return([child_fs_solr_doc])
40
+ allow(child_work_solr_doc).to receive(:hydra_model).and_return(MyWork)
41
+ allow(Hyrax.config).to receive(:curation_concerns).and_return([MyWork])
42
+
43
+ expect(subject.build).to be_an Array
44
+ expect(subject.build.size).to eq ids.size
45
+ expect(subject.build.map(&:class).uniq.size).to eq 1
46
+ expect(subject.build.first.class).to eq Hyrax::IiifManifestPresenter::DisplayImagePresenter
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,59 @@
1
+ require 'spec_helper'
2
+ RSpec.describe IiifPrint::JP2DerivativeService do
3
+ let(:valid_file_set) do
4
+ file_set = FileSet.new
5
+ file_set.save!(validate: false)
6
+ file_set
7
+ end
8
+
9
+ let(:fixture_path) do
10
+ File.join(
11
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
12
+ )
13
+ end
14
+
15
+ describe "Creates JP2 derivatives" do
16
+ def source_image(name)
17
+ File.join(fixture_path, name)
18
+ end
19
+
20
+ def expected_path(file_set)
21
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'jp2')
22
+ end
23
+
24
+ def metadata_match_checker(source, target)
25
+ target_meta = IiifPrint::ImageTool.new(target).metadata
26
+ source_meta = IiifPrint::ImageTool.new(source).metadata
27
+ expect(target_meta[:content_type]).to eq 'image/jp2'
28
+ expect(target_meta[:width]).to eq source_meta[:width]
29
+ expect(target_meta[:height]).to eq source_meta[:height]
30
+ end
31
+
32
+ def makes_jp2(filename)
33
+ expected = expected_path(valid_file_set)
34
+ expect(File.exist?(expected)).to be false
35
+ svc = described_class.new(valid_file_set)
36
+ source_path = source_image(filename)
37
+ svc.create_derivatives(source_path)
38
+ expect(File.exist?(expected)).to be true
39
+ metadata_match_checker(source_path, expected)
40
+ svc.cleanup_derivatives
41
+ end
42
+
43
+ it "creates gray JP2 derivative from one-bit source" do
44
+ makes_jp2('ocr_mono.tiff')
45
+ end
46
+
47
+ it "creates gray JP2 from grayscale source" do
48
+ makes_jp2('lowres-gray-via-ndnp-sample.tiff')
49
+ end
50
+
51
+ it "creates color JP2 from color source" do
52
+ makes_jp2('4.1.07.tiff')
53
+ end
54
+
55
+ it "creates JP2 from PDF source, robust to multi-page" do
56
+ makes_jp2('sample-color-newsletter.pdf')
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,66 @@
1
+ require 'spec_helper'
2
+ RSpec.describe IiifPrint::PDFDerivativeService do
3
+ let(:valid_file_set) do
4
+ file_set = FileSet.new
5
+ file_set.save!(validate: false)
6
+ file_set
7
+ end
8
+
9
+ let(:fixture_path) do
10
+ File.join(
11
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
12
+ )
13
+ end
14
+
15
+ describe "Creates PDF derivatives" do
16
+ def source_image(name)
17
+ File.join(fixture_path, name)
18
+ end
19
+
20
+ def expected_path(file_set)
21
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'pdf')
22
+ end
23
+
24
+ # given output file name, check DPI is 150
25
+ def check_dpi(expected)
26
+ metadata = IiifPrint::ImageTool.new(expected).metadata
27
+ # get width of pdf in points (via imagemagick), should be 864x == 12in
28
+ page_width = metadata[:width]
29
+ expect(page_width).to eq 864
30
+ # get total width of image in pixels from pdfimages -list, ==> 1800
31
+ image_width = 1800
32
+ im_list = `pdfimages -list #{expected}`
33
+ expect(im_list.lines[-1].split(' ')[3]).to eq image_width.to_s
34
+ # this combination of page pt width, image px width ==> 150ppi
35
+ expect(image_width / (page_width / 72.0)).to eq 150.0
36
+ end
37
+
38
+ def makes_pdf(filename)
39
+ expected = expected_path(valid_file_set)
40
+ expect(File.exist?(expected)).to be false
41
+ svc = described_class.new(valid_file_set)
42
+ svc.create_derivatives(source_image(filename))
43
+ expect(File.exist?(expected)).to be true
44
+ metadata = IiifPrint::ImageTool.new(expected).metadata
45
+ expect(metadata[:content_type]).to eq 'application/pdf'
46
+ check_dpi(expected)
47
+ svc.cleanup_derivatives
48
+ end
49
+
50
+ it "creates gray PDF derivative from one-bit source" do
51
+ makes_pdf('ocr_mono.tiff')
52
+ end
53
+
54
+ it "creates gray PDF from grayscale source" do
55
+ makes_pdf('lowres-gray-via-ndnp-sample.tiff')
56
+ end
57
+
58
+ it "creates color PDF from color source" do
59
+ makes_pdf('4.1.07.tiff')
60
+ end
61
+
62
+ it "creates color PDF from color JP2 source" do
63
+ makes_pdf('4.1.07.jp2')
64
+ end
65
+ end
66
+ end