newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,155 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::IssueIngester do
5
+ include_context "ndnp fixture setup"
6
+
7
+ # Source data:
8
+ let(:issue_data) do
9
+ NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue1)
10
+ end
11
+
12
+ let(:metadata) { issue_data.metadata }
13
+
14
+ # IssueIngester adapter does the work we are testing:
15
+ let(:adapter) { described_class.new(issue_data) }
16
+
17
+ describe "adapter and asset construction" do
18
+ def expect_issue_import_logging(adapter)
19
+ expect(adapter).to receive(:write_log).with(
20
+ satisfy { |v| v.include?('Saved metadata to new NewspaperIssue') }
21
+ ).once
22
+ end
23
+
24
+ # remove publication asset from repository for LCCN, when re-creating
25
+ # is desired test behavior
26
+ def clear_publication(lccn)
27
+ NewspaperTitle.where(lccn: lccn).delete_all
28
+ end
29
+
30
+ it "constructs adapter with issue source" do
31
+ expect(adapter.issue).to be issue_data
32
+ expect(adapter.path).to eq issue_data.path
33
+ # initially nil target:
34
+ expect(adapter.target).to be_nil
35
+ end
36
+
37
+ it "constructs adapter with hash options" do
38
+ user = User.batch_user.user_key
39
+ adapter = described_class.new(
40
+ issue_data,
41
+ depositor: user
42
+ )
43
+ expect(adapter.opts[:depositor]).to eq user
44
+ end
45
+
46
+ it "constructs NewspaperIssue with adapter" do
47
+ # construct_issue is only the first part of ingest, create issue
48
+ # and find-or-link publication NewspaperTitle;
49
+ # this does not trigger creation of child pages.
50
+ clear_publication(issue_data.metadata.lccn)
51
+ expect_issue_import_logging(adapter)
52
+ expect(adapter).to receive(:write_log).with(
53
+ satisfy do |v|
54
+ v.include?('Created NewspaperTitle work') ||
55
+ v.include?('Found existing NewspaperTitle')
56
+ end
57
+ ).once
58
+ expect(adapter).to receive(:write_log).with(
59
+ satisfy { |v| v.include?('Linked NewspaperIssue') }
60
+ ).once
61
+ adapter.construct_issue
62
+ issue = adapter.target
63
+ expect(issue).to be_a NewspaperIssue
64
+ expect(issue.id).not_to be_nil
65
+ # check parent publication
66
+ publication = issue.publication
67
+ expect(publication.lccn).to eq issue_data.metadata.lccn
68
+ expect(publication.title).to contain_exactly 'The Park Record'
69
+ end
70
+
71
+ it "creates new NewspaperTitle without place of publication" do
72
+ # clear any existing publications from previous testing
73
+ lccn = issue_data.metadata.lccn
74
+ clear_publication(lccn)
75
+ # construct with title, this time no username set for geonames:
76
+ Qa::Authorities::Geonames.username = ''
77
+ adapter.construct_issue
78
+ expect(adapter.target.publication.place_of_publication).to be_empty
79
+ Qa::Authorities::Geonames.username = 'newspaper_works'
80
+ end
81
+
82
+ it "creates new NewspaperTitle with place of publication" do
83
+ # clear any existing publications from previous testing
84
+ lccn = issue_data.metadata.lccn
85
+ clear_publication(lccn)
86
+ # construct with title, this time with username set for geonames:
87
+ Qa::Authorities::Geonames.username = 'newspaper_works'
88
+ adapter.construct_issue
89
+ pop = adapter.target.publication.place_of_publication.map do |v|
90
+ v.to_uri.to_s
91
+ end
92
+ expect(pop).not_to be_empty
93
+ expect(pop[0]).to start_with 'http://sws.geonames.org/'
94
+ end
95
+ end
96
+
97
+ describe "metadata access/setting" do
98
+ def normalized_pubtitle(issue_data)
99
+ issue_data.metadata.publication_title.strip.split(/ \(/)[0]
100
+ end
101
+
102
+ def expected_title(issue_data)
103
+ metadata = issue_data.metadata
104
+ d = DateTime.iso8601(metadata.publication_date).strftime('%B %-d, %Y')
105
+ "#{normalized_pubtitle(issue_data)}: #{d}"
106
+ end
107
+
108
+ it "copies metadata to NewspaperIssue" do
109
+ adapter.construct_issue
110
+ issue = adapter.target
111
+ metadata = issue_data.metadata
112
+ expect(issue.title).to contain_exactly expected_title(issue_data)
113
+ expect(issue.lccn).to eq metadata.lccn
114
+ expect(issue.volume).to eq metadata.volume
115
+ expect(issue.publication_date).to eq metadata.publication_date
116
+ expect(issue.issue_number).to eq metadata.issue_number
117
+ end
118
+
119
+ it "sets default administrative metadata with default construction" do
120
+ adapter.construct_issue
121
+ issue_asset = adapter.target
122
+ expect(issue_asset.depositor).to eq User.batch_user.user_key
123
+ expect(issue_asset.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
124
+ expect(issue_asset.visibility).to eq 'open'
125
+ end
126
+
127
+ it "sets custom administrative metadata for issue" do
128
+ # test one exemplary/representative option:
129
+ adapter = described_class.new(issue_data, visibility: 'open')
130
+ adapter.construct_issue
131
+ expect(adapter.target.visibility).to eq 'open'
132
+ end
133
+
134
+ it "sets custom administrative metadata for constructed publication" do
135
+ # test one exemplary/representative option:
136
+ adapter = described_class.new(issue_data, visibility: 'open')
137
+ adapter.construct_issue
138
+ publication_asset = adapter.target.publication
139
+ expect(publication_asset).not_to be_nil
140
+ expect(publication_asset.visibility).to eq 'open'
141
+ end
142
+ end
143
+
144
+ describe "child page creation" do
145
+ it "creates child pages on ingest of issue" do
146
+ # calling ingest without invoking the ususal async jobs should
147
+ # create child pages without additional work of attaching files
148
+ # to them, which we don't need to test here (tested elsewhere).
149
+ adapter.ingest
150
+ adapter.target.pages.each do |page|
151
+ expect(page.issue.id).to eq adapter.target.id
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,84 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::IssueMetadata do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "sample fixture 'batch_local'" do
8
+ let(:issue) { described_class.new(issue1) }
9
+
10
+ it "gets lccn" do
11
+ expect(issue.lccn).to eq "sn85058233"
12
+ end
13
+
14
+ it "gets volume" do
15
+ expect(issue.volume).to eq "56"
16
+ end
17
+
18
+ it "gets issue" do
19
+ expect(issue.issue_number).to eq "27"
20
+ end
21
+
22
+ it "gets edition fields" do
23
+ expect(issue.edition_name).to eq "Main Edition"
24
+ expect(issue.edition_number).to eq "1"
25
+ end
26
+
27
+ it "gets publication date" do
28
+ expect(issue.publication_date).to eq "1935-08-02"
29
+ end
30
+
31
+ it "gets publication title via //mets/@LABEL" do
32
+ expect(issue.publication_title).to eq 'The Park Record (Park City, UT)'
33
+ end
34
+
35
+ it "gets held_by" do
36
+ expect(issue.held_by).to eq "University of Utah; Salt Lake City, UT"
37
+ end
38
+ end
39
+
40
+ describe "sample fixture 'batch_test_ver01" do
41
+ let(:issue) { described_class.new(issue2) }
42
+ let(:issue_ingest) do
43
+ NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
44
+ end
45
+
46
+ it "gets lccn" do
47
+ expect(issue.lccn).to eq "sn85025202"
48
+ end
49
+
50
+ it "gets volume" do
51
+ expect(issue.volume).to eq "2"
52
+ end
53
+
54
+ it "gets issue" do
55
+ expect(issue.issue_number).to eq "4"
56
+ end
57
+
58
+ it "gets edition fields" do
59
+ expect(issue.edition_name).to be_nil
60
+ expect(issue.edition_number).to eq "1"
61
+ end
62
+
63
+ it "gets publication date" do
64
+ expect(issue.publication_date).to eq "1857-02-14"
65
+ end
66
+
67
+ it "gets publication title via label, when reel unavailable" do
68
+ expect(issue.publication_title).to \
69
+ eq 'Weekly Trinity journal (Weaverville, Calif.)'
70
+ end
71
+
72
+ # integration test for reel context publication title:
73
+ it "gets publication title via label, from reel" do
74
+ expect(issue_ingest.metadata.publication_title).to \
75
+ eq 'Weekly Trinity journal (Weaverville, Calif.)'
76
+ expect(issue_ingest.metadata.publication_title).to \
77
+ eq issue_ingest.container.metadata.title
78
+ end
79
+
80
+ it "gets held_by" do
81
+ expect(issue.held_by).to eq "University of Utah, Salt Lake City, UT"
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,79 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::PageIngest do
5
+ include_context "ndnp fixture setup"
6
+
7
+ def file_type?(path, ext)
8
+ path.split('/')[-1].split('.')[-1].casecmp(ext).zero?
9
+ end
10
+
11
+ def includes_file_type(files, ext)
12
+ files.any? { |path| file_type?(path, ext) }
13
+ end
14
+
15
+ def check_expected_files(page, extensions)
16
+ files = page.files
17
+ expect(files.size).to eq extensions.size
18
+ files.each do |filepath|
19
+ # each path is normalized to absolute path
20
+ expect(filepath.start_with?('/')).to be true
21
+ end
22
+ extensions.each do |ext|
23
+ expect(includes_file_type(files, ext)).to be true
24
+ end
25
+ end
26
+
27
+ describe "sample fixture 'batch_local'" do
28
+ let(:page) { described_class.new(issue1, 'pageModsBib8') }
29
+
30
+ it "gets metadata" do
31
+ expect(page.metadata).to be_a NewspaperWorks::Ingest::NDNP::PageMetadata
32
+ # uses same Nokogiri document context:
33
+ expect(page.metadata.doc).to be page.doc
34
+ end
35
+
36
+ it "gets expected files" do
37
+ check_expected_files(page, ['tif', 'jp2', 'pdf', 'xml'])
38
+ end
39
+
40
+ it "gets nil container for page without reel XML" do
41
+ reel = page.container
42
+ expect(reel).to be_nil
43
+ end
44
+ end
45
+
46
+ describe "sample fixture 'batch_test_ver01'" do
47
+ let(:page) { described_class.new(issue2, 'pageModsBib1') }
48
+
49
+ it "gets metadata" do
50
+ expect(page.metadata).to be_a NewspaperWorks::Ingest::NDNP::PageMetadata
51
+ # uses same Nokogiri document context:
52
+ expect(page.metadata.doc).to be page.doc
53
+ end
54
+
55
+ it "gets expected files" do
56
+ check_expected_files(page, ['tif', 'jp2', 'pdf', 'xml'])
57
+ end
58
+
59
+ it "gets a ContainerIngest for reel providing page" do
60
+ reel = page.container
61
+ expect(reel).to be_a NewspaperWorks::Ingest::NDNP::ContainerIngest
62
+ expect(reel.path).to end_with '_1.xml'
63
+ end
64
+ end
65
+
66
+ describe "sample fixture reel xml (for control images)" do
67
+ let(:page) { described_class.new(reel1, 'targetModsBib1') }
68
+
69
+ it "gets metadata" do
70
+ expect(page.metadata).to be_a NewspaperWorks::Ingest::NDNP::PageMetadata
71
+ # uses same Nokogiri document context:
72
+ expect(page.metadata.doc).to be page.doc
73
+ end
74
+
75
+ it "gets expected files" do
76
+ check_expected_files(page, ['tif', 'jp2', 'pdf', 'xml'])
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,184 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+ require 'misc_shared'
4
+
5
+ RSpec.describe NewspaperWorks::Ingest::NDNP::PageIngester do
6
+ include_context "ndnp fixture setup"
7
+ include_context "shared setup"
8
+
9
+ # use FactoryBot issue factory for a NewspaperIssue object for page:
10
+ let(:issue) { create(:newspaper_issue) }
11
+
12
+ # We need page source data as PageIngest
13
+ let(:page_data) do
14
+ NewspaperWorks::Ingest::NDNP::PageIngest.new(issue1, 'pageModsBib8')
15
+ end
16
+
17
+ let(:metadata) { page_data.metadata }
18
+
19
+ # PageIngester adapter does the work we are testing:
20
+ let(:adapter) { described_class.new(page_data, issue) }
21
+
22
+ describe "adapter and asset construction" do
23
+ it "constructs adapter with page source, issue context" do
24
+ expect(adapter.page).to be page_data
25
+ expect(adapter.issue).to be issue
26
+ expect(adapter.path).to eq page_data.path
27
+ end
28
+
29
+ it "constructs NewspaperPage with adapter" do
30
+ # construct_page is ingest of metadata only, without importing files:
31
+ adapter.construct_page
32
+ page = adapter.target
33
+ expect(page).to be_a NewspaperPage
34
+ expect(page.id).not_to be_nil
35
+ expect(issue.members).to include page
36
+ expect(issue.ordered_members.to_a).to include page
37
+ end
38
+
39
+ it "constructs adapter with hash options" do
40
+ user = User.batch_user.user_key
41
+ adapter = described_class.new(
42
+ page_data,
43
+ issue,
44
+ depositor: user
45
+ )
46
+ expect(adapter.opts[:depositor]).to eq user
47
+ end
48
+ end
49
+
50
+ describe "metadata access/setting" do
51
+ let(:expected_title) do
52
+ "#{issue.title.first}: Page #{metadata.page_number}"
53
+ end
54
+
55
+ it "sets default administrative metadata with default construction" do
56
+ adapter.construct_page
57
+ asset = adapter.target
58
+ expect(asset.depositor).to eq User.batch_user.user_key
59
+ expect(asset.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
60
+ expect(asset.visibility).to eq 'open'
61
+ end
62
+
63
+ it "sets custom administrative metadata" do
64
+ # test one exemplary/representative option:
65
+ adapter = described_class.new(page_data, issue, visibility: 'open')
66
+ adapter.construct_page
67
+ expect(adapter.target.visibility).to eq 'open'
68
+ end
69
+
70
+ it "copies metadata to NewspaperPage" do
71
+ adapter.construct_page
72
+ page = adapter.target
73
+ expect(page.title).to contain_exactly expected_title
74
+ expect(page.width).to eq metadata.width
75
+ expect(page.height).to eq metadata.height
76
+ expect(page.page_number).to eq metadata.page_number
77
+ expect(page.identifier).to contain_exactly metadata.identifier
78
+ end
79
+ end
80
+
81
+ describe "reel/container linking" do
82
+ # need publication, title, and reel to use for page data context:
83
+ let(:publication) { create(:newspaper_title) }
84
+
85
+ let(:issue) do
86
+ issue = create(:newspaper_issue)
87
+ publication.members << issue
88
+ publication.save!
89
+ issue
90
+ end
91
+
92
+ let(:issue_data) do
93
+ NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
94
+ end
95
+
96
+ let(:page_data) do
97
+ data = issue_data.to_a[0]
98
+ # some NDNP samples missing TIFF, put dummy in place of missing, as needed
99
+ data.files = data.files.map do |path|
100
+ File.exist?(path) ? path : File.join(fixture_path, 'ocr_gray.tiff')
101
+ end
102
+ data
103
+ end
104
+
105
+ let(:adapter) { described_class.new(page_data, issue) }
106
+
107
+ it "links page to reel" do
108
+ # construct_page + link_reel ~= ingest without files import:
109
+ adapter.construct_page
110
+ adapter.link_reel
111
+ page = adapter.target
112
+ page.reload
113
+ expect(page.container).not_to be_nil
114
+ expect(page.container.ordered_members.to_a.map(&:id)).to include page.id
115
+ end
116
+ end
117
+
118
+ describe "file import integration" do
119
+ do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
120
+
121
+ let(:issue_data) do
122
+ NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
123
+ end
124
+
125
+ let(:page_data_minus_tiff) { issue_data.to_a[0] }
126
+
127
+ def check_fileset(page)
128
+ fileset = page.members.select { |m| m.class == FileSet }[0]
129
+ # Reload fileset because jobs have modified:
130
+ fileset.reload
131
+ expect(fileset).not_to be_nil
132
+ expect(fileset.original_file).not_to be_nil
133
+ expect(fileset.original_file.mime_type).to eq 'image/tiff'
134
+ expect(fileset.original_file.size).to be > 0
135
+ end
136
+
137
+ def expect_file_assignment_logging(adapter)
138
+ expect(adapter).to receive(:write_log).with(
139
+ satisfy { |v| v.include?('Assigned primary file to work') }
140
+ ).once
141
+ expect(adapter).to receive(:write_log).with(
142
+ satisfy { |v| v.include?('Assigned derivative file to work') }
143
+ ).exactly(3).times
144
+ expect(adapter).to receive(:write_log).with(
145
+ satisfy { |v| v.include?('Beginning file attachment') }
146
+ ).once
147
+ end
148
+
149
+ def expect_page_import_logging(adapter)
150
+ expect(adapter).to receive(:write_log).with(
151
+ satisfy { |v| v.include?('Created NewspaperPage work') }
152
+ ).once
153
+ expect(adapter).to receive(:write_log).with(
154
+ satisfy { |v| v.include?('Saved metadata to NewspaperPage work') }
155
+ ).once
156
+ expect(adapter).to receive(:write_log).with(
157
+ satisfy { |v| v.include?('Linked NewspaperIssue') }
158
+ ).once
159
+ end
160
+
161
+ it "attaches primary, derivative files", perform_enqueued: do_now_jobs do
162
+ expect_page_import_logging(adapter)
163
+ expect_file_assignment_logging(adapter)
164
+ adapter.ingest
165
+ page = adapter.target
166
+ check_fileset(page)
167
+ derivatives = NewspaperWorks::Data::WorkDerivatives.new(page)
168
+ expect(derivatives.keys).to match_array ["jp2", "xml", "pdf"]
169
+ end
170
+
171
+ # support this use-case for evaluation purposes
172
+ it "generates TIFF when missing from page", perform_enqueued: do_now_jobs do
173
+ adapter = described_class.new(page_data_minus_tiff, issue)
174
+ expect_page_import_logging(adapter)
175
+ expect(adapter).to receive(:write_log).with(
176
+ satisfy { |arg| arg.include?('Creating TIFF') },
177
+ Logger::WARN
178
+ ).exactly(1).times
179
+ expect_file_assignment_logging(adapter)
180
+ expect { adapter.ingest }.not_to raise_error
181
+ check_fileset(adapter.target)
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,85 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::PageMetadata do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "sample fixture 'batch_local'" do
8
+ let(:page1) { described_class.new(issue1, nil, 'pageModsBib8') }
9
+ let(:page2) { described_class.new(issue1, nil, 'pageModsBib6') }
10
+
11
+ it "gets expected page number as String" do
12
+ expect(page1.page_number).to eq "1"
13
+ expect(page2.page_number).to eq "2"
14
+ end
15
+
16
+ it "gets expected sequence number as Integer" do
17
+ expect(page1.page_sequence_number).to eq 1
18
+ expect(page2.page_sequence_number).to eq 2
19
+ end
20
+
21
+ it "gets expected width from ALTO as Integer " do
22
+ expect(page1.width).to eq 18_352
23
+ expect(page2.width).to eq 18_200
24
+ end
25
+
26
+ it "gets expected height from ALTO as Integer " do
27
+ expect(page1.height).to eq 28_632
28
+ expect(page2.height).to eq 28_872
29
+ end
30
+
31
+ it "gets identifier from ALTO as primary file name" do
32
+ expect(page1.identifier).to eq "0657b"
33
+ expect(page2.identifier).to eq "0656a"
34
+ end
35
+ end
36
+
37
+ describe "sample fixture 'batch_test_ver01" do
38
+ let(:page) { described_class.new(issue2, nil, 'pageModsBib1') }
39
+
40
+ it "fallback to sequence number on page without page number" do
41
+ expect(page.page_number).to eq page.page_sequence_number.to_s
42
+ end
43
+
44
+ it "gets expected sequence number as Integer" do
45
+ expect(page.page_sequence_number).to eq 1
46
+ end
47
+
48
+ it "gets expected width from ALTO as Integer " do
49
+ expect(page.width).to eq 21_464
50
+ end
51
+
52
+ it "gets expected height from ALTO as Integer " do
53
+ expect(page.height).to eq 30_268
54
+ end
55
+
56
+ it "gets identifier from ALTO as primary file name" do
57
+ expect(page.identifier).to eq "0225"
58
+ end
59
+ end
60
+
61
+ describe "sample fixture via Reel XML" do
62
+ let(:page) { described_class.new(reel1, nil, 'targetModsBib1') }
63
+
64
+ it "return nil page number when page and sequence missing" do
65
+ expect(page.page_number).to eq nil
66
+ expect(page.page_sequence_number).to eq nil
67
+ end
68
+
69
+ it "gets expected sequence number as Integer" do
70
+ expect(page.page_sequence_number).to eq nil
71
+ end
72
+
73
+ it "gets expected width from ALTO as Integer " do
74
+ expect(page.width).to eq 30_176
75
+ end
76
+
77
+ it "gets expected height from ALTO as Integer " do
78
+ expect(page.height).to eq 29_152
79
+ end
80
+
81
+ it "gets identifier from ALTO as primary file name" do
82
+ expect(page.identifier).to eq "0001"
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,83 @@
1
+ require 'spec_helper'
2
+
3
+ # test NewspaperIssueIngest against a NewspaperIssue
4
+ RSpec.describe NewspaperWorks::Ingest::NewspaperIssueIngest do
5
+ # define the path to the file we will use for multiple examples
6
+ let(:path) do
7
+ fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
8
+ Hyrax.config.whitelisted_ingest_dirs.push(fixtures)
9
+ File.join(fixtures, 'sample-4page-issue.pdf')
10
+ end
11
+
12
+ let(:path2) do
13
+ fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
14
+ File.join(fixtures, 'ndnp-sample1.pdf')
15
+ end
16
+
17
+ it_behaves_like('ingest adapter IO')
18
+
19
+ describe "file import and attachment" do
20
+ do_now_jobs = [
21
+ IngestJob,
22
+ IngestLocalFileJob,
23
+ InheritPermissionsJob,
24
+ VisibilityCopyJob
25
+ ]
26
+
27
+ PERMISSION_METHODS = [
28
+ :edit_users,
29
+ :read_users,
30
+ :discover_users,
31
+ :edit_groups,
32
+ :read_groups,
33
+ :discover_groups
34
+ ].freeze
35
+
36
+ def check_equivalent_permissions(obj1, obj2)
37
+ PERMISSION_METHODS.each do |m|
38
+ expect(obj1.send(m)).to match_array obj2.send(m)
39
+ end
40
+ expect(obj1.visibility).to eq obj2.visibility
41
+ end
42
+
43
+ def check_page_metadata(page)
44
+ expect(page.date_uploaded).not_to be nil
45
+ expect(page.date_modified).not_to be nil
46
+ # title: issue title plus page qualifier expected:
47
+ expect(page.title).to contain_exactly "Here and There: Page 1"
48
+ # page number is sequence number, expressed as String
49
+ expect(page.page_number).to be_a String
50
+ expect(page.page_number).to match(/^[0-9]+$/)
51
+ end
52
+
53
+ def assign_custom_permissions(work)
54
+ # read_groups ['public'] <==> "open" visibility
55
+ work.read_groups = ['public']
56
+ # add a permission to issue, that is not default:
57
+ work.read_users = ['peanutbutter@example.com']
58
+ work.save!
59
+ end
60
+
61
+ it "ingests work and creates child page works" do
62
+ adapter = build(:newspaper_issue_ingest)
63
+ adapter.ingest(path)
64
+ child_pages = adapter.work.members.select { |w| w.class == NewspaperPage }
65
+ expect(child_pages.length).to eq 4
66
+ end
67
+
68
+ # For created child pages, date and permission attributes are side-effect
69
+ # of file attachment process (`NewspaperWorks::Data::WorkFiles`)
70
+ # manipulating the work through the Hyrax actor stack create pipeline.
71
+ it "sets work attributes on created pages via file attachment",
72
+ peform_enqueued: do_now_jobs do
73
+ adapter = build(:newspaper_issue_ingest)
74
+ assign_custom_permissions(adapter.work)
75
+ adapter.ingest(path2)
76
+ child_pages = adapter.work.members.select { |w| w.class == NewspaperPage }
77
+ page = child_pages[0]
78
+ check_page_metadata(page)
79
+ # permissions:
80
+ check_equivalent_permissions(adapter.work, page)
81
+ end
82
+ end
83
+ end