iiif_print 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,84 @@
1
+ require 'json'
2
+ require 'nokogiri'
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe IiifPrint::TextExtraction::PageOCR do
6
+ let(:fixture_path) do
7
+ File.join(
8
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
9
+ )
10
+ end
11
+
12
+ let(:altoxsd) do
13
+ xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
14
+ Nokogiri::XML::Schema(File.read(xsdpath))
15
+ end
16
+
17
+ # sample "snippet" images for OCR testing:
18
+ let(:example_gray_tiff) { File.join(fixture_path, 'ocr_gray.tiff') }
19
+ let(:example_mono_tiff) { File.join(fixture_path, 'ocr_mono.tiff') }
20
+ let(:example_color_tiff) { File.join(fixture_path, 'ocr_color.tiff') }
21
+ let(:example_gray_jp2) { File.join(fixture_path, 'ocr_gray.jp2') }
22
+ let(:ocr_from_gray_tiff) { described_class.new(example_gray_tiff) }
23
+
24
+ describe "performs OCR" do
25
+ def match_ocr_expectations(words)
26
+ expect(words).to be_an(Array)
27
+ expect(words).not_to be_empty
28
+ expect(words[0]).to be_a(Hash)
29
+ [:word, :coordinates].each do |key|
30
+ expect(words[0].keys).to include key
31
+ end
32
+ end
33
+
34
+ it "gets words and coordinates from grayscale source" do
35
+ match_ocr_expectations(ocr_from_gray_tiff.words)
36
+ end
37
+
38
+ it "gets words and coordinates from one-bit source" do
39
+ ocr = described_class.new(example_mono_tiff)
40
+ match_ocr_expectations(ocr.words)
41
+ end
42
+
43
+ it "gets words and coordinates from color source" do
44
+ ocr = described_class.new(example_color_tiff)
45
+ match_ocr_expectations(ocr.words)
46
+ end
47
+
48
+ it "gets words and coordinates from jp2 source" do
49
+ ocr = described_class.new(example_gray_jp2)
50
+ match_ocr_expectations(ocr.words)
51
+ end
52
+ end
53
+
54
+ describe "turns image into ALTO" do
55
+ xit "takes grayscale tiff, outputs valid ALTO, geometry" do
56
+ alto = ocr_from_gray_tiff.alto
57
+ document = Nokogiri::XML(alto)
58
+ errors = altoxsd.validate(document)
59
+ expect(errors.length).to eq 0
60
+ expect(document.at_css('PrintSpace')['WIDTH']).to eq "418"
61
+ expect(document.at_css('PrintSpace')['HEIGHT']).to eq "1046"
62
+ end
63
+ end
64
+
65
+ describe "plain text" do
66
+ it "makes plain text available for image" do
67
+ plain = ocr_from_gray_tiff.plain
68
+ expect(plain.class).to be String
69
+ expect(plain.length).to be > 0
70
+ end
71
+ end
72
+
73
+ describe "JSON word coordinates" do
74
+ it "passes properly formatted data to WordCoordsBuilder and receives output" do
75
+ parsed = JSON.parse(ocr_from_gray_tiff.word_json)
76
+ expect(parsed['coords'].length).to be > 1
77
+ word = ocr_from_gray_tiff.words[0]
78
+ word1 = parsed['coords'][word[:word]]
79
+ word1_coords = word1[0]
80
+ expect(word1_coords[2]).to eq word[:coordinates][2]
81
+ expect(word1_coords[3]).to eq word[:coordinates][3]
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::TextExtraction::RenderAlto do
4
+ let(:fixture_path) do
5
+ File.join(
6
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
7
+ )
8
+ end
9
+
10
+ let(:altoxsd) do
11
+ xsdpath = File.join(fixture_path, 'alto-2-0.xsd')
12
+ Nokogiri::XML::Schema(File.read(xsdpath))
13
+ end
14
+
15
+ let(:page_prefix) { '<Page ID="ID1" PHYSICAL_IMG_NR="1"' }
16
+
17
+ let(:words) do
18
+ [
19
+ { word: "If", coordinates: [52, 13, 11, 14] },
20
+ { word: "you", coordinates: [69, 17, 31, 14] },
21
+ { word: "are", coordinates: [108, 17, 28, 10] },
22
+ { word: "a", coordinates: [143, 17, 8, 10] },
23
+ { word: "friend,", coordinates: [158, 13, 56, 16] },
24
+ { word: "you", coordinates: [51, 39, 31, 14] },
25
+ { word: "speak", coordinates: [90, 35, 50, 18] },
26
+ { word: "the", coordinates: [146, 35, 28, 14] },
27
+ { word: "password,", coordinates: [182, 35, 85, 18] },
28
+ { word: "and", coordinates: [51, 57, 30, 14] },
29
+ { word: "the", coordinates: [89, 57, 28, 14] },
30
+ { word: "doors", coordinates: [124, 57, 48, 14] },
31
+ { word: "will", coordinates: [180, 57, 28, 14] },
32
+ { word: "open.", coordinates: [216, 61, 47, 14] }
33
+ ]
34
+ end
35
+
36
+ describe "renders alto" do
37
+ it "creates alto given width, height, words" do
38
+ renderer = described_class.new(12_000, 9600)
39
+ output = renderer.to_alto(words)
40
+ expect(output.class).to be String
41
+ expect(output).to include '<alto'
42
+ expect(output).to include '<String'
43
+ expect(output).to include page_prefix + ' HEIGHT="9600" WIDTH="12000"'
44
+ expect(Nokogiri::XML(output).errors.empty?).to be true
45
+ end
46
+
47
+ xit "makes alto 2.0 that validates" do
48
+ renderer = described_class.new(12_000, 9600)
49
+ output = renderer.to_alto(words)
50
+ document = Nokogiri::XML(output)
51
+ altoxsd.validate(document)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::TextExtraction::WordCoordsBuilder do
4
+ let(:words) do
5
+ [
6
+ { word: "foo", coordinates: [1, 2, 3, 4] },
7
+ { word: "bar", coordinates: [5, 6, 7, 8] },
8
+ { word: "baz", coordinates: [9, 10, 11, 12] },
9
+ { word: "foo", coordinates: [13, 14, 15, 16] }
10
+ ]
11
+ end
12
+ let(:image_width) { 1_234 }
13
+ let(:image_height) { 5_678 }
14
+
15
+ describe '.json_coordinates_for' do
16
+ let(:wcb_to_json) { JSON.parse(described_class.json_coordinates_for(words: words, width: image_width, height: image_height)) }
17
+ it 'has the correct structure' do
18
+ expect(wcb_to_json['height']).to eq image_height
19
+ expect(wcb_to_json['width']).to eq image_width
20
+ expect(wcb_to_json['coords'].length).to eq 3
21
+ expect(wcb_to_json['coords']['foo']).not_to be_falsey
22
+ end
23
+
24
+ it 'combines coordinates for the same word' do
25
+ expect(wcb_to_json['coords']['foo']).to eq [[1, 2, 3, 4], [13, 14, 15, 16]]
26
+ end
27
+ end
28
+
29
+ describe '#to_json' do
30
+ let(:wcb_to_json) { JSON.parse(wcb.to_json) }
31
+ let(:wcb) { described_class.new(words, image_width, image_height) }
32
+
33
+ it 'has the correct structure' do
34
+ expect(wcb_to_json['height']).to eq image_height
35
+ expect(wcb_to_json['width']).to eq image_width
36
+ expect(wcb_to_json['coords'].length).to eq 3
37
+ expect(wcb_to_json['coords']['foo']).not_to be_falsey
38
+ end
39
+
40
+ it 'combines coordinates for the same word' do
41
+ expect(wcb_to_json['coords']['foo']).to eq [[1, 2, 3, 4], [13, 14, 15, 16]]
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint do
4
+ describe ".manifest_metadata_for" do
5
+ let(:attributes) do
6
+ { "id" => "abc123",
7
+ "title_tesim" => ['My Awesome Title'] }
8
+ end
9
+ let(:solr_document) { SolrDocument.new(attributes) }
10
+ let(:base_url) { "https://my.dev.test" }
11
+
12
+ subject(:manifest_metadata) do
13
+ described_class.manifest_metadata_for(work: solr_document, current_ability: double(Ability), base_url: base_url)
14
+ end
15
+ it { is_expected.not_to be_falsey }
16
+ it "does not contain any nil values" do
17
+ expect(subject).not_to include(nil)
18
+ end
19
+ end
20
+
21
+ describe ".model_configuration" do
22
+ context "default configuration" do
23
+ let(:model) do
24
+ Class.new do
25
+ include IiifPrint.model_configuration(pdf_split_child_model: Class.new)
26
+ end
27
+ end
28
+
29
+ subject(:record) { model.new }
30
+
31
+ it { is_expected.to be_iiif_print_config }
32
+
33
+ it "has a #pdf_splitter_job" do
34
+ expect(record.iiif_print_config.pdf_splitter_job).to be(IiifPrint::Jobs::ChildWorksFromPdfJob)
35
+ end
36
+
37
+ it "has a #pdf_splitter_service" do
38
+ expect(record.iiif_print_config.pdf_splitter_service).to be(IiifPrint::SplitPdfs::PagesIntoImagesService)
39
+ end
40
+
41
+ it "has #derivative_service_plugins" do
42
+ expect(record.iiif_print_config.derivative_service_plugins).to eq(
43
+ [IiifPrint::JP2DerivativeService,
44
+ IiifPrint::PDFDerivativeService,
45
+ IiifPrint::TextExtractionDerivativeService,
46
+ IiifPrint::TIFFDerivativeService]
47
+ )
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,111 @@
1
+ RSpec.shared_context "shared setup", shared_context: :metadata do
2
+ let(:fixture_path) do
3
+ path = File.join(
4
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
5
+ )
6
+ # TODO: NOTE: this has potential timing issues in the specs, because we're adjusting the
7
+ # configured value during the spec run.
8
+ registered = Hyrax.config.registered_ingest_dirs
9
+ registered.push(path) unless registered.include?(path)
10
+ path
11
+ end
12
+
13
+ # shared date to be invariant across all tests in a run:
14
+ date_static = Hyrax::TimeService.time_in_utc
15
+ let(:static_date) { date_static }
16
+
17
+ # path fixtures:
18
+ let(:example_gray_jp2) { File.join(fixture_path, 'ocr_gray.jp2') }
19
+ let(:txt_path) { File.join(fixture_path, 'credits.md') }
20
+ let(:sample_thumbnail) { File.join(fixture_path, 'thumbnail.jpg') }
21
+
22
+ # sample data:
23
+ let(:sample_text) { 'even in a mythical Age there must be some enigmas' }
24
+
25
+ let(:valid_file_set) do
26
+ file_set = FileSet.new
27
+ file_set.save!(validate: false)
28
+ file_set
29
+ end
30
+
31
+ let(:sample_work) do
32
+ work = MyWork.new
33
+ work.title = ['Bombadil']
34
+ work.members.push(valid_file_set)
35
+ work.save!
36
+ work
37
+ end
38
+
39
+ # sample objects:
40
+ let(:work_with_file) do
41
+ # we need a work with not just a valid (but empty) fileset, but also
42
+ # a persisted file, so we use the shared work sample, and expand
43
+ # on it with actual file data/metadata.
44
+ work = sample_work
45
+ fileset = work.members.first
46
+ file = Hydra::PCDM::File.create
47
+ fileset.original_file = file
48
+ # Set binary content on file via ActiveFedora content= mutator method
49
+ # which also makes .size method return valid result for content
50
+ file.content = File.open(txt_path)
51
+ # Set some metdata we would expect to otherwise be set upon an upload
52
+ file.original_name = 'credits.md'
53
+ file.mime_type = 'text/plain'
54
+ file.date_modified = static_date
55
+ file.date_created = static_date
56
+ # saving fileset also saves file content
57
+ fileset.save!
58
+ work
59
+ end
60
+
61
+ def path_factory
62
+ Hyrax::DerivativePath
63
+ end
64
+
65
+ def work_file_set(work)
66
+ work.members.detect { |m| m.is_a? FileSet }
67
+ end
68
+
69
+ def text_path(work)
70
+ path_factory.derivative_path_for_reference(work_file_set(work), 'txt')
71
+ end
72
+
73
+ def jp2_path(work)
74
+ path_factory.derivative_path_for_reference(work_file_set(work), 'jp2')
75
+ end
76
+
77
+ def thumbnail_path(work)
78
+ path_factory.derivative_path_for_reference(work_file_set(work), 'thumbnail')
79
+ end
80
+
81
+ def mkdir_derivative(work, name)
82
+ # make shared path for derivatives to live, Hyrax ususally does this
83
+ # for thumbnails, and iiif_print does this in its derivative
84
+ # service plugins; here we do same.
85
+ fsid = work_file_set(work).id
86
+ path = path_factory.derivative_path_for_reference(fsid, name)
87
+ dir = File.join(path.split('/')[0..-2])
88
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
89
+ end
90
+
91
+ def mk_jp2_derivative(work)
92
+ mkdir_derivative(work, 'jp2')
93
+ dst_path = jp2_path(work)
94
+ FileUtils.copy(example_gray_jp2, dst_path)
95
+ expect(File.exist?(dst_path)).to be true
96
+ end
97
+
98
+ def mk_txt_derivative(work)
99
+ mkdir_derivative(work, 'txt')
100
+ dst_path = text_path(work)
101
+ File.open(dst_path, 'w') { |f| f.write(sample_text) }
102
+ expect(File.exist?(dst_path)).to be true
103
+ end
104
+
105
+ def mk_thumbnail_derivative(work)
106
+ mkdir_derivative(work, 'thumbnail')
107
+ dst_path = thumbnail_path(work)
108
+ FileUtils.copy(sample_thumbnail, dst_path)
109
+ expect(File.exist?(dst_path)).to be true
110
+ end
111
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module IiifPrint
4
+ RSpec.describe DerivativeAttachment, type: :model do
5
+ it "requires some columns to be considered complete" do
6
+ model = described_class.create
7
+ # attempt save without required data; expect failure
8
+ expect { model.save! }.to raise_exception(ActiveRecord::RecordInvalid)
9
+ end
10
+
11
+ it "saves when constructed with all field values" do
12
+ model = described_class.create(
13
+ fileset_id: 'a1b2c3d4e5',
14
+ path: '/path/to/somefile',
15
+ destination_name: 'txt'
16
+ )
17
+ # attempt save without required data; expect failure
18
+ expect { model.save! }.not_to raise_exception
19
+ end
20
+
21
+ it "saves when all fields completely set" do
22
+ model = described_class.create
23
+ model.fileset_id = 'someid123'
24
+ model.path = '/path/to/somefile'
25
+ model.destination_name = 'txt'
26
+ expect { model.save! }.not_to raise_exception
27
+ end
28
+
29
+ it "saves when only path, destination_name set" do
30
+ model = described_class.create
31
+ model.fileset_id = nil
32
+ model.path = '/path/to/somefile'
33
+ model.destination_name = 'txt'
34
+ expect { model.save! }.not_to raise_exception
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+
3
+ module IiifPrint
4
+ RSpec.describe IngestFileRelation, type: :model do
5
+ def make_test_records
6
+ # two unique values
7
+ described_class.create(
8
+ file_path: '/some/path/to/this',
9
+ derivative_path: '/some/path/to/that'
10
+ )
11
+ described_class.create(
12
+ file_path: '/some/path/to/this',
13
+ derivative_path: '/some/path/to/other_thing'
14
+ )
15
+ # a duplicate will save, presumption is that dupes are filtered on query:
16
+ described_class.create(
17
+ file_path: '/some/path/to/this',
18
+ derivative_path: '/some/path/to/other_thing'
19
+ )
20
+ end
21
+
22
+ it "will not save unless record is complete" do
23
+ model = described_class.create
24
+ # attempt save without required data; expect failure
25
+ expect { model.save! }.to raise_exception(ActiveRecord::RecordInvalid)
26
+ model2 = described_class.create
27
+ model2.file_path = '/path/to/sourcefile.tiff'
28
+ expect { model2.save! }.to raise_exception(ActiveRecord::RecordInvalid)
29
+ model3 = described_class.create
30
+ model3.derivative_path = '/path/to/sourcefile.tiff'
31
+ expect { model3.save! }.to raise_exception(ActiveRecord::RecordInvalid)
32
+ end
33
+
34
+ it "will save sufficiently constructed record" do
35
+ model = described_class.create(
36
+ file_path: '/path/to/this',
37
+ derivative_path: '/path/to/that'
38
+ )
39
+ expect { model.save! }.not_to raise_exception
40
+ end
41
+
42
+ it "will save when all fields completely set" do
43
+ model = described_class.create
44
+ model.file_path = '/path/to/sourcefile.tiff'
45
+ model.derivative_path = '/path/to/derived.jp2'
46
+ expect { model.save! }.not_to raise_exception
47
+ end
48
+
49
+ it "can query derivative paths for primary file" do
50
+ make_test_records
51
+ result = described_class.derivatives_for_file('/some/path/to/this')
52
+ expect(result).to be_an Array
53
+ expect(result.size).to eq 2
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+ RSpec.describe SolrDocument do
3
+ let(:solr_doc) { described_class.new(id: 'foo', file_set_ids_ssim: ['bar']) }
4
+
5
+ describe 'file_set_ids' do
6
+ it 'responds to #file_set_ids' do
7
+ expect(solr_doc).to respond_to(:file_set_ids)
8
+ end
9
+
10
+ it 'returns the correct value' do
11
+ expect(solr_doc.file_set_ids).to eq(['bar'])
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe IiifPrint::IiifManifestPresenterBehavior do
4
+ let(:attributes) do
5
+ { "id" => "abc123",
6
+ "title_tesim" => ['Page the first'],
7
+ "description_tesim" => ['A book or something'],
8
+ "creator_tesim" => ['Arthur McAuthor'] }
9
+ end
10
+ let(:solr_document) { SolrDocument.new(attributes) }
11
+ let(:presenter) { Hyrax::IiifManifestPresenter.new(solr_document) }
12
+ let(:test_request) { ActionDispatch::TestRequest.new({}) }
13
+
14
+ describe '#search_service' do
15
+ it 'returns the correct URL for the IIIF Search service' do
16
+ expect(presenter.search_service).to include("#{solr_document.id}/iiif_search")
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,49 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe IiifPrint::IiifManifestPresenterBehavior do
4
+ let(:parent_fs_attributes) do
5
+ { "id" => "parent_fs123",
6
+ "title_tesim" => ["My Parent FileSet"],
7
+ "has_model_ssim" => ["FileSet"] }
8
+ end
9
+ let(:child_work_attributes) do
10
+ { "id" => "child_work123",
11
+ "title_tesim" => ["My Child Image"],
12
+ "has_model_ssim" => ["Image"],
13
+ "file_set_ids_ssim" => ["child_image_fs123"] }
14
+ end
15
+ let(:child_fs_attributes) do
16
+ { "id" => "child_fs123",
17
+ "title_tesim" => ["My Child FileSet"],
18
+ "has_model_ssim" => ["FileSet"] }
19
+ end
20
+ let(:parent_fs_solr_doc) { SolrDocument.new(parent_fs_attributes) }
21
+ let(:child_work_solr_doc) { SolrDocument.new(child_work_attributes) }
22
+ let(:child_fs_solr_doc) { SolrDocument.new(child_fs_attributes) }
23
+ let(:ids) { [parent_fs_solr_doc.id, child_work_solr_doc.id] }
24
+ let(:presenter_class) { Hyrax::IiifManifestPresenter }
25
+
26
+ subject(:presenter_factory) do
27
+ Hyrax::IiifManifestPresenter::Factory.new(
28
+ ids: ids,
29
+ presenter_class: presenter_class,
30
+ presenter_args: []
31
+ )
32
+ end
33
+
34
+ describe "#build" do
35
+ it "returns an Array of DisplayImagePresenters" do
36
+ allow_any_instance_of(Hyrax::IiifManifestPresenter::Factory)
37
+ .to receive(:load_docs).and_return([parent_fs_solr_doc, child_work_solr_doc])
38
+ allow_any_instance_of(IiifPrint::IiifManifestPresenterFactoryBehavior)
39
+ .to receive(:load_file_set_docs).and_return([child_fs_solr_doc])
40
+ allow(child_work_solr_doc).to receive(:hydra_model).and_return(MyWork)
41
+ allow(Hyrax.config).to receive(:curation_concerns).and_return([MyWork])
42
+
43
+ expect(subject.build).to be_an Array
44
+ expect(subject.build.size).to eq ids.size
45
+ expect(subject.build.map(&:class).uniq.size).to eq 1
46
+ expect(subject.build.first.class).to eq Hyrax::IiifManifestPresenter::DisplayImagePresenter
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,59 @@
1
+ require 'spec_helper'
2
+ RSpec.describe IiifPrint::JP2DerivativeService do
3
+ let(:valid_file_set) do
4
+ file_set = FileSet.new
5
+ file_set.save!(validate: false)
6
+ file_set
7
+ end
8
+
9
+ let(:fixture_path) do
10
+ File.join(
11
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
12
+ )
13
+ end
14
+
15
+ describe "Creates JP2 derivatives" do
16
+ def source_image(name)
17
+ File.join(fixture_path, name)
18
+ end
19
+
20
+ def expected_path(file_set)
21
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'jp2')
22
+ end
23
+
24
+ def metadata_match_checker(source, target)
25
+ target_meta = IiifPrint::ImageTool.new(target).metadata
26
+ source_meta = IiifPrint::ImageTool.new(source).metadata
27
+ expect(target_meta[:content_type]).to eq 'image/jp2'
28
+ expect(target_meta[:width]).to eq source_meta[:width]
29
+ expect(target_meta[:height]).to eq source_meta[:height]
30
+ end
31
+
32
+ def makes_jp2(filename)
33
+ expected = expected_path(valid_file_set)
34
+ expect(File.exist?(expected)).to be false
35
+ svc = described_class.new(valid_file_set)
36
+ source_path = source_image(filename)
37
+ svc.create_derivatives(source_path)
38
+ expect(File.exist?(expected)).to be true
39
+ metadata_match_checker(source_path, expected)
40
+ svc.cleanup_derivatives
41
+ end
42
+
43
+ it "creates gray JP2 derivative from one-bit source" do
44
+ makes_jp2('ocr_mono.tiff')
45
+ end
46
+
47
+ it "creates gray JP2 from grayscale source" do
48
+ makes_jp2('lowres-gray-via-ndnp-sample.tiff')
49
+ end
50
+
51
+ it "creates color JP2 from color source" do
52
+ makes_jp2('4.1.07.tiff')
53
+ end
54
+
55
+ it "creates JP2 from PDF source, robust to multi-page" do
56
+ makes_jp2('sample-color-newsletter.pdf')
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,66 @@
1
+ require 'spec_helper'
2
+ RSpec.describe IiifPrint::PDFDerivativeService do
3
+ let(:valid_file_set) do
4
+ file_set = FileSet.new
5
+ file_set.save!(validate: false)
6
+ file_set
7
+ end
8
+
9
+ let(:fixture_path) do
10
+ File.join(
11
+ IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
12
+ )
13
+ end
14
+
15
+ describe "Creates PDF derivatives" do
16
+ def source_image(name)
17
+ File.join(fixture_path, name)
18
+ end
19
+
20
+ def expected_path(file_set)
21
+ Hyrax::DerivativePath.derivative_path_for_reference(file_set, 'pdf')
22
+ end
23
+
24
+ # given output file name, check DPI is 150
25
+ def check_dpi(expected)
26
+ metadata = IiifPrint::ImageTool.new(expected).metadata
27
+ # get width of pdf in points (via imagemagick), should be 864x == 12in
28
+ page_width = metadata[:width]
29
+ expect(page_width).to eq 864
30
+ # get total width of image in pixels from pdfimages -list, ==> 1800
31
+ image_width = 1800
32
+ im_list = `pdfimages -list #{expected}`
33
+ expect(im_list.lines[-1].split(' ')[3]).to eq image_width.to_s
34
+ # this combination of page pt width, image px width ==> 150ppi
35
+ expect(image_width / (page_width / 72.0)).to eq 150.0
36
+ end
37
+
38
+ def makes_pdf(filename)
39
+ expected = expected_path(valid_file_set)
40
+ expect(File.exist?(expected)).to be false
41
+ svc = described_class.new(valid_file_set)
42
+ svc.create_derivatives(source_image(filename))
43
+ expect(File.exist?(expected)).to be true
44
+ metadata = IiifPrint::ImageTool.new(expected).metadata
45
+ expect(metadata[:content_type]).to eq 'application/pdf'
46
+ check_dpi(expected)
47
+ svc.cleanup_derivatives
48
+ end
49
+
50
+ it "creates gray PDF derivative from one-bit source" do
51
+ makes_pdf('ocr_mono.tiff')
52
+ end
53
+
54
+ it "creates gray PDF from grayscale source" do
55
+ makes_pdf('lowres-gray-via-ndnp-sample.tiff')
56
+ end
57
+
58
+ it "creates color PDF from color source" do
59
+ makes_pdf('4.1.07.tiff')
60
+ end
61
+
62
+ it "creates color PDF from color JP2 source" do
63
+ makes_pdf('4.1.07.jp2')
64
+ end
65
+ end
66
+ end