newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,224 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Data::WorkFiles do
5
+ include_context "shared setup"
6
+
7
+ let(:work) { work_with_file }
8
+ let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
9
+ let(:tiff_uri) { 'file://' + File.expand_path(tiff_path) }
10
+
11
+ describe "adapter composition" do
12
+ it "adapts work" do
13
+ adapter = described_class.new(work)
14
+ expect(adapter.work).to be work
15
+ end
16
+
17
+ it "adapts work with 'of' alt constructor" do
18
+ adapter = described_class.of(work)
19
+ expect(adapter.work).to be work
20
+ end
21
+ end
22
+
23
+ describe "path assignment queueing" do
24
+ it "queues assigned file path" do
25
+ adapter = described_class.of(work)
26
+ expect(adapter.assigned).to be_empty
27
+ # assign a valid source path
28
+ adapter.assign(tiff_path)
29
+ expect(adapter.assigned).to include tiff_path
30
+ end
31
+
32
+ it "will fail to assign file in non-whitelisted dir" do
33
+ adapter = described_class.new(work)
34
+ # need a non-whitlisted file that exists:
35
+ bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
36
+ expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
37
+ end
38
+
39
+ it "queues a file:/// URI" do
40
+ adapter = described_class.of(work)
41
+ expect(adapter.assigned).to be_empty
42
+ adapter.assign(tiff_uri)
43
+ expect(adapter.assigned).to include tiff_uri
44
+ end
45
+
46
+ it "queues a Pathname, normalized to string" do
47
+ adapter = described_class.of(work)
48
+ expect(adapter.assigned).to be_empty
49
+ adapter.assign(Pathname.new(tiff_path))
50
+ expect(adapter.assigned).to include tiff_path
51
+ end
52
+
53
+ it "unqueues a queued path" do
54
+ adapter = described_class.of(work)
55
+ adapter.assign(tiff_path)
56
+ expect(adapter.assigned).to include tiff_path
57
+ adapter.unassign(tiff_path)
58
+ expect(adapter.assigned).to be_empty
59
+ end
60
+ end
61
+
62
+ describe "hash/mapping-like file enumeration" do
63
+ it "has expected WorkFile in values for work" do
64
+ adapter = described_class.of(work)
65
+ values = adapter.values
66
+ expect(values).to be_an Array
67
+ expect(values.size).to eq 1
68
+ expect(values[0]).to be_an NewspaperWorks::Data::WorkFile
69
+ expect(values[0].parent).to be adapter
70
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
71
+ expect(values[0].fileset).to eq first_fileset
72
+ expect(values[0].unwrapped).to be_a Hydra::PCDM::File
73
+ end
74
+
75
+ it "has expected fileset keys for work" do
76
+ adapter = described_class.of(work)
77
+ keys = adapter.keys
78
+ expect(keys).to be_an Array
79
+ expect(keys[0]).to be_a String
80
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
81
+ expect(keys[0]).to eq first_fileset.id
82
+ end
83
+
84
+ it "has expected entries for work" do
85
+ adapter = described_class.of(work)
86
+ entries = adapter.entries
87
+ expect(entries).to be_an Array
88
+ expect(entries[0]).to be_an Array
89
+ expect(entries[0].size).to eq 2
90
+ expect(entries[0][0]).to eq adapter.keys[0]
91
+ expect(entries[0][1]).to eq adapter.values[0]
92
+ end
93
+
94
+ it "gets work file by fileset id" do
95
+ adapter = described_class.of(work)
96
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
97
+ fsid = adapter.keys[0]
98
+ expect(fsid).to eq first_fileset.id
99
+ work_file = adapter.get(fsid)
100
+ expect(work_file.unwrapped).to eq first_fileset.original_file
101
+ work_file = adapter[fsid]
102
+ expect(work_file.unwrapped).to eq first_fileset.original_file
103
+ end
104
+
105
+ it "gets work file by work-local filename" do
106
+ adapter = described_class.of(work)
107
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
108
+ name = first_fileset.original_file.original_name
109
+ work_file = adapter.get(name)
110
+ expect(work_file).to eq adapter.get(first_fileset.id)
111
+ end
112
+
113
+ it "verifies inclusion of fileset id key" do
114
+ adapter = described_class.of(work)
115
+ fsid = adapter.keys[0]
116
+ expect(adapter.include?(fsid)).to be true
117
+ end
118
+ end
119
+
120
+ describe "assignment state" do
121
+ it "has empty state for work with no files" do
122
+ bare_work = NewspaperPage.new
123
+ bare_work.title = ['No files to see here']
124
+ bare_work.save!
125
+ adapter = described_class.of(bare_work)
126
+ expect(adapter.keys.empty?).to be true
127
+ expect(adapter.state).to eq 'empty'
128
+ end
129
+
130
+ it "has 'dirty' state when files assigned" do
131
+ adapter = described_class.of(work)
132
+ expect(adapter.state).to eq 'saved'
133
+ adapter.assign(tiff_path)
134
+ # changes to dirty
135
+ expect(adapter.state).to eq 'dirty'
136
+ # unassign path again to empty assigned queue:
137
+ adapter.unassign(tiff_path)
138
+ # no we are back to 'saved' since no changes are queued now:
139
+ expect(adapter.state).to eq 'saved'
140
+ end
141
+ end
142
+
143
+ describe "commits changes" do
144
+ # These jobs we need whitelisted to run now, at minimum:
145
+ do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
146
+ # These we skip: [CharacterizeJob, CreateDerivativesJob]
147
+ # -- skipping these saves 10-15 seconds on attachment example
148
+
149
+ permission_methods = [
150
+ :edit_users,
151
+ :read_users,
152
+ :discover_users,
153
+ :edit_groups,
154
+ :read_groups,
155
+ :discover_groups
156
+ ]
157
+
158
+ let(:bare_work) do
159
+ bare_work = NewspaperPage.new
160
+ bare_work.title = ['No files to see here']
161
+ bare_work.save!
162
+ bare_work
163
+ end
164
+
165
+ it "commits unassign (file deletions)" do
166
+ adapter = described_class.of(work)
167
+ expect(adapter.keys.size).to eq 1
168
+ adapter.unassign(adapter.keys[0])
169
+ adapter.commit!
170
+ expect(adapter.keys.size).to eq 0
171
+ expect(work.members.select { |m| m.class == FileSet }.size).to eq 0
172
+ end
173
+
174
+ it "commit for assignment invokes actor stack" do
175
+ work = bare_work
176
+ adapter = described_class.of(work)
177
+ adapter.assign(tiff_path)
178
+ allow(Hyrax::CurationConcern.actor).to receive(:create).and_return(true)
179
+ expect(Hyrax::CurationConcern.actor).to receive(:create)
180
+ expect(adapter.commit!).to be true
181
+ end
182
+
183
+ it "commits successful file attachment", perform_enqueued: do_now_jobs do
184
+ work = bare_work
185
+ adapter = described_class.of(work)
186
+ adapter.assign(tiff_path)
187
+ adapter.commit!
188
+ # whitelisted jobs (do_now_jobs) performed as effect of commit!
189
+ # are configured to effectively run inline. Reloading work
190
+ # should refresh the work.members, and by consequence adapter.keys
191
+ work.reload
192
+ expect(adapter.keys.size).to eq 1
193
+ expect(work.members.select { |m| m.class == FileSet }.size).to eq 1
194
+ expect(adapter.names).to include 'ocr_gray.tiff'
195
+ end
196
+
197
+ it "copies work perimssions to fileset", perform_enqueued: do_now_jobs do
198
+ adapter = described_class.of(bare_work)
199
+ adapter.assign(tiff_path)
200
+ adapter.commit!
201
+ bare_work.reload
202
+ fileset = bare_work.members.select { |w| w.class == FileSet }[0]
203
+ permission_methods.each do |m|
204
+ expect(fileset.send(m)).to match_array bare_work.send(m)
205
+ end
206
+ expect(fileset.visibility).to eq bare_work.visibility
207
+ end
208
+ end
209
+
210
+ describe "derivative access" do
211
+ it "gets derivatives for first fileset" do
212
+ fileset = work.members.select { |m| m.class == FileSet }[0]
213
+ adapter = described_class.of(work)
214
+ # adapts same context(s):
215
+ expect(adapter.derivatives.fileset.id).to eq fileset.id
216
+ expect(adapter.derivatives.work).to be work
217
+ expect(adapter.derivatives.class).to eq \
218
+ NewspaperWorks::Data::WorkDerivatives
219
+ # transitive parent/child relationship, can traverse to adapter from
220
+ # derivatives:
221
+ expect(adapter.derivatives.parent.parent).to be adapter
222
+ end
223
+ end
224
+ end
@@ -0,0 +1,158 @@
1
+ require 'spec_helper'
2
+ require 'newspaper_works_fixtures'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::BatchIssueIngester do
5
+ include_context "ingest test fixtures"
6
+
7
+ # lccn, paths for respective media:
8
+ let(:pdf_lccn) { 'sn93059126' }
9
+ let(:tiff_lccn) { 'sn93059126' }
10
+ let(:jp2_lccn) { 'sn85058233' }
11
+ let(:pdf_path) { File.join(pdf_fixtures, pdf_lccn) }
12
+ let(:tiff_path) { File.join(tiff_fixtures, tiff_lccn) }
13
+ let(:jp2_path) { File.join(jp2_fixtures, jp2_lccn) }
14
+
15
+ describe "ingester construction and composition" do
16
+ it "constructs ingester from PDF with expected metadata" do
17
+ # given path to single batch
18
+ ingester = described_class.new(pdf_path)
19
+ # correctly parses LCCN from path:
20
+ expect(ingester.lccn).to eq pdf_lccn
21
+ expect(ingester.path).to eq pdf_path
22
+ end
23
+
24
+ it "constructs ingester from TIFF with expected metadata" do
25
+ ingester = described_class.new(tiff_path)
26
+ expect(ingester.lccn).to eq tiff_lccn
27
+ expect(ingester.path).to eq tiff_path
28
+ end
29
+
30
+ it "constructs ingester from JP2 with expected metadata" do
31
+ ingester = described_class.new(jp2_path)
32
+ expect(ingester.lccn).to eq jp2_lccn
33
+ expect(ingester.path).to eq jp2_path
34
+ end
35
+
36
+ it "constructs ingester with publication metadata" do
37
+ ingester = described_class.new(pdf_path)
38
+ expect(ingester.publication).to be_a NewspaperWorks::Ingest::PublicationInfo
39
+ expect(ingester.publication.lccn).to eq ingester.lccn
40
+ expect(ingester.publication.title).to eq 'The weekly journal'
41
+ end
42
+
43
+ it "constructs ingester with explicit LCCN" do
44
+ # path is for The weekly journal (Chicopee Mass), pass LCCN for other pub
45
+ sltrib = 'sn83045396'
46
+ ingester = described_class.new(pdf_path, lccn: sltrib)
47
+ expect(ingester.lccn).to eq sltrib
48
+ expect(ingester.publication.lccn).to eq ingester.lccn
49
+ expect(ingester.publication.title).to eq 'Salt Lake tribune'
50
+ end
51
+
52
+ it "constructs ingester enumerating PDF files" do
53
+ ingester = described_class.new(pdf_path)
54
+ pdfs = Dir.entries(pdf_path).select { |name| name.end_with?('.pdf') }
55
+ paths = pdfs.map { |name| File.join(pdf_path, name) }
56
+ issues = ingester.issues
57
+ expect(issues).to be_a NewspaperWorks::Ingest::PDFIssues
58
+ expect(issues.size).to eq pdfs.size
59
+ expect(issues.keys).to match_array paths
60
+ end
61
+
62
+ it "constructs ingester enumerating issues of page images" do
63
+ ingester = described_class.new(tiff_path)
64
+ entries = Dir.entries(tiff_path)
65
+ .map { |name| File.join(tiff_path, name) }
66
+ .select { |v| !v.end_with?('.') && File.directory?(v) }
67
+ issues = ingester.issues
68
+ expect(issues).to be_a NewspaperWorks::Ingest::ImageIngestIssues
69
+ expect(issues.size).to eq 2
70
+ expect(issues.keys).to match_array entries
71
+ end
72
+ end
73
+
74
+ describe "ingester behavior" do
75
+ # Ensure LCCN has no initial publication NewspaperTitle asset:
76
+ let(:pdf_lccn) do
77
+ v = 'sn93059126'
78
+ NewspaperTitle.where(lccn: v).delete_all
79
+ v
80
+ end
81
+
82
+ let(:tiff_lccn) do
83
+ v = 'sn93059126'
84
+ NewspaperTitle.where(lccn: v).delete_all
85
+ v
86
+ end
87
+
88
+ let(:jp2_lccn) do
89
+ v = 'sn85058233'
90
+ NewspaperTitle.where(lccn: v).delete_all
91
+ v
92
+ end
93
+
94
+ let(:pdf_issue_path) { File.join(pdf_path, '1853060401.pdf') }
95
+ let(:tiff_issue_path) { File.join(tiff_path, '1853060401') }
96
+ let(:jp2_issue_path) { File.join(jp2_path, '1935080201') }
97
+
98
+ def single_issue_dir(lccn, target_issue_path)
99
+ Hyrax.config.whitelisted_ingest_dirs |= ['/tmp']
100
+ parent_dir = Dir.mktmpdir
101
+ dir = File.join(parent_dir, lccn)
102
+ FileUtils.mkdir(dir)
103
+ FileUtils.cp_r(target_issue_path, dir)
104
+ dir
105
+ end
106
+
107
+ def job_enqueued?(job)
108
+ jobs = ActiveJob::Base.queue_adapter.enqueued_jobs.map { |j| j[:job] }
109
+ jobs.include?(job)
110
+ end
111
+
112
+ def expect_administrative_metadata(work)
113
+ expect(work.depositor).to eq User.batch_user.user_key
114
+ expect(work.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
115
+ expect(work.visibility).to eq 'open'
116
+ end
117
+
118
+ # rubocop:disable Metrics/AbcSize
119
+ def issue_ingest(lccn, path, page_count, metadata)
120
+ dir = single_issue_dir(lccn, path)
121
+ ingester = described_class.new(dir)
122
+ ingester.ingest
123
+ # Outcomes tested:
124
+ # 1. NewspaperTitle for Publication created, and contains issue
125
+ issue = NewspaperTitle.where(lccn: lccn).first.members.to_a[0]
126
+ # 2. Metadata:
127
+ expect(issue.publication_date).to eq metadata[:publication_date]
128
+ expect(issue.title).to contain_exactly metadata[:title]
129
+ expect_administrative_metadata(issue)
130
+ if page_count > 0
131
+ # 3. Child pages created
132
+ expect(issue.pages.size).to eq page_count
133
+ expect_administrative_metadata(issue.pages[0])
134
+ # 4. Creation of issue PDF enqueued:
135
+ expect(job_enqueued?(NewspaperWorks::ComposeIssuePDFJob)).to be true
136
+ end
137
+ # clean up after temp dir:
138
+ FileUtils.rmtree(File.dirname(dir))
139
+ end
140
+ # rubocop:enable Metrics/AbcSize
141
+
142
+ it "ingests PDFs" do
143
+ expected_metadata = {
144
+ title: "The weekly journal: June 4, 1853",
145
+ publication_date: "1853-06-04"
146
+ }
147
+ issue_ingest(pdf_lccn, pdf_issue_path, 0, expected_metadata)
148
+ end
149
+
150
+ it "ingests JP2 page images (as TIFF) into an issue with child pages" do
151
+ expected_metadata = {
152
+ title: "The Park record: August 2, 1935",
153
+ publication_date: "1935-08-02"
154
+ }
155
+ issue_ingest(jp2_lccn, jp2_issue_path, 2, expected_metadata)
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::ChronAmPublicationInfo do
4
+ let(:lccn1) { 'sn94051019' }
5
+ let(:lccn2) { 'sn84038814' }
6
+ let(:bad_lccn) { 'sn99999999' }
7
+
8
+ describe "gets metadata" do
9
+ it "gets simple metadata" do
10
+ meta = described_class.new(lccn1)
11
+ expect(meta.title).to eq 'Marysville daily news'
12
+ expect(meta.issn).to be_nil
13
+ expect(meta.oclcnum).to eq 'ocm30043558'
14
+ expect(meta.place_name).to eq 'Marysville, Calif.'
15
+ expect(meta.place_of_publication).to eq 'http://sws.geonames.org/5370984/'
16
+ end
17
+
18
+ it "gets related item metadata" do
19
+ meta1 = described_class.new(lccn1)
20
+ meta2 = described_class.new(lccn2)
21
+ # lccn2 succeeds lccn1, favors lccn.loc.gov URL as authoritative:
22
+ expect(meta1.succeeded_by).to eq "https://lccn.loc.gov/#{lccn2}"
23
+ # lccn1 precedes lccn2, favors chronam URL as authoritative, since
24
+ # catalog.loc.gov and lccn.loc.gov do not have records for this LCCN:
25
+ expect(meta2.preceded_by).to eq "https://chroniclingamerica.loc.gov/lccn/sn94051019"
26
+ end
27
+ end
28
+
29
+ describe "error handling" do
30
+ it "handles unknown LCCN (404)" do
31
+ meta = described_class.new(bad_lccn)
32
+ expect(meta.empty?).to be true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+ require 'newspaper_works_fixtures'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::FromCommand do
5
+ include_context "ingest test fixtures"
6
+
7
+ describe "alternate construction" do
8
+ let(:klass) do
9
+ Class.new do
10
+ extend NewspaperWorks::Ingest::FromCommand
11
+
12
+ attr_accessor :path, :opts
13
+
14
+ def initialize(path, opts = {})
15
+ @path = path
16
+ @opts = opts
17
+ end
18
+ end
19
+ end
20
+
21
+ def construct(args)
22
+ klass.from_command(
23
+ args,
24
+ 'rake newspaper_works:ingest_pdf_issues --'
25
+ )
26
+ end
27
+
28
+ let(:lccn) { 'sn93059126' }
29
+
30
+ let(:pdf_path) { File.join(pdf_fixtures, lccn) }
31
+
32
+ let(:fake_argv) do
33
+ [
34
+ 'newspaper_works:ingest_pdf_issues',
35
+ '--',
36
+ "--path=#{pdf_path}"
37
+ ]
38
+ end
39
+
40
+ let(:more_argv) do
41
+ fake_argv + [
42
+ "--lccn=#{lccn}"
43
+ ]
44
+ end
45
+
46
+ let(:most_argv) do
47
+ more_argv + [
48
+ "--admin_set=admin_set/default",
49
+ "--depositor=#{User.batch_user.user_key}",
50
+ "--visibility=open"
51
+ ]
52
+ end
53
+
54
+ it "calls constructor with minimal options parsed" do
55
+ ingester = construct(fake_argv)
56
+ expect(ingester.path).to eq pdf_path
57
+ expect(ingester.opts[:path]).to eq pdf_path
58
+ end
59
+
60
+ it "calls constructor with explict lccn option" do
61
+ ingester = construct(more_argv)
62
+ expect(ingester.path).to eq pdf_path
63
+ expect(ingester.opts[:lccn]).to eq lccn
64
+ end
65
+
66
+ it "calls constructor with all options" do
67
+ ingester = construct(most_argv)
68
+ expect(ingester.path).to eq pdf_path
69
+ expect(ingester.opts[:lccn]).to eq lccn
70
+ expect(ingester.opts[:admin_set]).to eq 'admin_set/default'
71
+ expect(ingester.opts[:depositor]).to eq User.batch_user.user_key
72
+ expect(ingester.opts[:visibility]).to eq 'open'
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::ImageIngestIssues do
4
+ include_context 'ingest test fixtures'
5
+
6
+ let(:lccn) { 'sn93059126' }
7
+
8
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
9
+
10
+ let(:pub_path) { File.join(tiff_fixtures, lccn) }
11
+
12
+ let(:expected_paths) do
13
+ entries = Dir.entries(pub_path).map { |p| File.join(pub_path, p) }
14
+ entries.select { |p| File.directory?(p) && !File.basename(p).start_with?('.') }
15
+ end
16
+
17
+ let(:issues) { described_class.new(pub_path, publication) }
18
+
19
+ describe " construction and metadata" do
20
+ it "constructs with path and publication" do
21
+ expect(issues.path).to eq pub_path
22
+ expect(issues.publication).to be publication
23
+ expect(issues.lccn).to eq lccn
24
+ expect(issues.publication.lccn).to eq lccn
25
+ end
26
+
27
+ it "enumerates valid directories as IssueImages objects" do
28
+ expect(issues.size).to eq 2
29
+ enumerated = issues.values
30
+ expect(enumerated.size).to eq issues.size
31
+ sample = enumerated[0]
32
+ expect(sample).to be_a NewspaperWorks::Ingest::IssueImages
33
+ expect(File.dirname(sample.path)).to eq pub_path
34
+ end
35
+
36
+ it "presents hash-like mapping behavior" do
37
+ # Keys are paths to directory containing issue images:
38
+ expect(issues.keys).to match_array expected_paths
39
+ # info and [] methods get IssueImages object for given path key:
40
+ issue1 = issues[issues.keys[0]]
41
+ issue2 = issues.info(issues.keys[1])
42
+ expect(issue1).to be_a NewspaperWorks::Ingest::IssueImages
43
+ expect(issue2).to be_a NewspaperWorks::Ingest::IssueImages
44
+ expect(issue1.path).to eq issues.keys[0]
45
+ end
46
+
47
+ it "enumerates pairs like a hash" do
48
+ issues.each_value do |v|
49
+ expect(v).to be_a NewspaperWorks::Ingest::IssueImages
50
+ end
51
+ issues.each_key do |k|
52
+ expect(expected_paths).to include k
53
+ end
54
+ issues.each do |path, info|
55
+ expect(expected_paths).to include path
56
+ expect(info).to be_a NewspaperWorks::Ingest::IssueImages
57
+ expect(info.path).to eq path
58
+ end
59
+ expect(issues.to_a.size).to eq 2
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+ require 'newspaper_works_fixtures'
3
+
4
+ RSpec.shared_context "ingest test fixtures", shared_context: :metadata do
5
+ # Path to fixtures gem for sample fixtures, whitelisted:
6
+ let(:fixtures_path) do
7
+ path = NewspaperWorksFixtures.file_fixtures
8
+ whitelist = Hyrax.config.whitelisted_ingest_dirs
9
+ whitelist.push(path) unless whitelist.include?(path)
10
+ path
11
+ end
12
+
13
+ # directory containing PDF fixture batch(es)
14
+ let(:pdf_fixtures) { File.join(fixtures_path, 'pdf_batch') }
15
+
16
+ # directory containing TIFF image fixtures batch(es)
17
+ let(:tiff_fixtures) { File.join(fixtures_path, 'tiff_batch') }
18
+
19
+ # directory containing JP2 image fixture batch(es)
20
+ let(:jp2_fixtures) { File.join(fixtures_path, 'jp2_batch') }
21
+ end
22
+
23
+ RSpec.shared_examples 'ingest adapter IO' do
24
+ # define the path to the file we will use for multiple examples
25
+ let(:path) do
26
+ fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
27
+ File.join(fixtures, 'page1.tiff')
28
+ end
29
+
30
+ # DRY for this matcher's use in multiple examples:
31
+ let(:have_io_and_correct_filename) do
32
+ have_attributes(
33
+ filename: 'page1.tiff',
34
+ io: an_object_responding_to(:read)
35
+ )
36
+ end
37
+
38
+ describe "file loading" do
39
+ # the first half of work done by ingest is done by load(); these
40
+ # assertions test load() independent of work done.
41
+
42
+ it "loads stream from path" do
43
+ adapter = build(:newspaper_page_ingest)
44
+ adapter.load(path)
45
+ expect(adapter).to have_io_and_correct_filename
46
+ end
47
+
48
+ it "loads stream from a Pathname object" do
49
+ adapter = build(:newspaper_page_ingest)
50
+ adapter.load(Pathname.new(path))
51
+ expect(adapter).to have_io_and_correct_filename
52
+ end
53
+
54
+ it "loads an File object" do
55
+ adapter = build(:newspaper_page_ingest)
56
+ File.open(path) do |file|
57
+ adapter.load(file)
58
+ expect(adapter).to have_io_and_correct_filename
59
+ end
60
+ end
61
+
62
+ it "loads a StringIO with filename" do
63
+ adapter = build(:newspaper_page_ingest)
64
+ io = StringIO.new('File Content Here, Maybe')
65
+ adapter.load(io, filename: 'page1.tiff')
66
+ expect(adapter).to have_io_and_correct_filename
67
+ end
68
+
69
+ it "raises on missing explicit filename for StringIO" do
70
+ adapter = build(:newspaper_page_ingest)
71
+ io = StringIO.new('File Content Here, Maybe')
72
+ expect { adapter.load(io) }.to raise_error(ArgumentError)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::IssueImages do
4
+ include_context 'ingest test fixtures'
5
+
6
+ # LCCN for TIFF fixture examples:
7
+ let(:lccn_tiff) { 'sn93059126' }
8
+
9
+ # LCCN for JP2 fixture examples:
10
+ let(:lccn_jp2) { 'sn85058233' }
11
+
12
+ let(:tiff_issue_path) { File.join(tiff_fixtures, lccn_tiff, '1853060401') }
13
+
14
+ let(:jp2_issue_path) { File.join(jp2_fixtures, lccn_jp2, '1935080201') }
15
+
16
+ # Publication for TIFF fixtures:
17
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn_tiff) }
18
+
19
+ # Publication for JP2 fixtures:
20
+ let(:publication_jp2) { NewspaperWorks::Ingest::PublicationInfo.new(lccn_jp2) }
21
+
22
+ let(:issue) { described_class.new(tiff_issue_path, publication) }
23
+
24
+ describe "issue construction and metadata" do
25
+ it "constructs with path and publication" do
26
+ expect(issue.path).to eq tiff_issue_path
27
+ expect(issue.filename).to eq File.basename(tiff_issue_path)
28
+ expect(issue.publication).to be publication
29
+ expect(issue.lccn).to eq lccn_tiff
30
+ expect(issue.publication.lccn).to eq lccn_tiff
31
+ end
32
+
33
+ it "extracts date, edition, title from filename" do
34
+ expect(issue.publication_date).to eq '1853-06-04'
35
+ expect(issue.edition_number).to eq 1
36
+ expect(issue.title).to contain_exactly 'The weekly journal: June 4, 1853'
37
+ end
38
+
39
+ it "enumerates pages (TIFF)" do
40
+ expect(issue.to_a.size).to eq 4
41
+ expect(issue.keys.size).to eq 4
42
+ # lexical ordering:
43
+ expect(issue.keys).to eq issue.keys.sort
44
+ issue.entries.each_with_index do |pair, idx|
45
+ # PageImage object value:
46
+ page_image = pair[1]
47
+ expect(page_image).to be_a NewspaperWorks::Ingest::PageImage
48
+ expect(page_image.lccn).to eq publication.lccn
49
+ # path key
50
+ expect(page_image.path).to eq pair[0]
51
+ expect(page_image.issue).to be issue
52
+ # Verify lexical ordering (for page_number in file name vs. seq num):
53
+ expect(page_image.page_number.to_i).to eq idx + 1
54
+ # page numbering matches sequence numbering:
55
+ expected_title = "The weekly journal: June 4, 1853: Page #{page_image.page_number}"
56
+ expect(page_image.title).to contain_exactly expected_title
57
+ end
58
+ end
59
+
60
+ it "enumerates pages (JP2)" do
61
+ issue = described_class.new(jp2_issue_path, publication_jp2)
62
+ expect(issue.to_a.size).to eq 2
63
+ end
64
+ end
65
+ end