newspaper_works 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,224 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Data::WorkFiles do
5
+ include_context "shared setup"
6
+
7
+ let(:work) { work_with_file }
8
+ let(:tiff_path) { File.join(fixture_path, 'ocr_gray.tiff') }
9
+ let(:tiff_uri) { 'file://' + File.expand_path(tiff_path) }
10
+
11
+ describe "adapter composition" do
12
+ it "adapts work" do
13
+ adapter = described_class.new(work)
14
+ expect(adapter.work).to be work
15
+ end
16
+
17
+ it "adapts work with 'of' alt constructor" do
18
+ adapter = described_class.of(work)
19
+ expect(adapter.work).to be work
20
+ end
21
+ end
22
+
23
+ describe "path assignment queueing" do
24
+ it "queues assigned file path" do
25
+ adapter = described_class.of(work)
26
+ expect(adapter.assigned).to be_empty
27
+ # assign a valid source path
28
+ adapter.assign(tiff_path)
29
+ expect(adapter.assigned).to include tiff_path
30
+ end
31
+
32
+ it "will fail to assign file in non-whitelisted dir" do
33
+ adapter = described_class.new(work)
34
+ # need a non-whitlisted file that exists:
35
+ bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
36
+ expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
37
+ end
38
+
39
+ it "queues a file:/// URI" do
40
+ adapter = described_class.of(work)
41
+ expect(adapter.assigned).to be_empty
42
+ adapter.assign(tiff_uri)
43
+ expect(adapter.assigned).to include tiff_uri
44
+ end
45
+
46
+ it "queues a Pathname, normalized to string" do
47
+ adapter = described_class.of(work)
48
+ expect(adapter.assigned).to be_empty
49
+ adapter.assign(Pathname.new(tiff_path))
50
+ expect(adapter.assigned).to include tiff_path
51
+ end
52
+
53
+ it "unqueues a queued path" do
54
+ adapter = described_class.of(work)
55
+ adapter.assign(tiff_path)
56
+ expect(adapter.assigned).to include tiff_path
57
+ adapter.unassign(tiff_path)
58
+ expect(adapter.assigned).to be_empty
59
+ end
60
+ end
61
+
62
+ describe "hash/mapping-like file enumeration" do
63
+ it "has expected WorkFile in values for work" do
64
+ adapter = described_class.of(work)
65
+ values = adapter.values
66
+ expect(values).to be_an Array
67
+ expect(values.size).to eq 1
68
+ expect(values[0]).to be_an NewspaperWorks::Data::WorkFile
69
+ expect(values[0].parent).to be adapter
70
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
71
+ expect(values[0].fileset).to eq first_fileset
72
+ expect(values[0].unwrapped).to be_a Hydra::PCDM::File
73
+ end
74
+
75
+ it "has expected fileset keys for work" do
76
+ adapter = described_class.of(work)
77
+ keys = adapter.keys
78
+ expect(keys).to be_an Array
79
+ expect(keys[0]).to be_a String
80
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
81
+ expect(keys[0]).to eq first_fileset.id
82
+ end
83
+
84
+ it "has expected entries for work" do
85
+ adapter = described_class.of(work)
86
+ entries = adapter.entries
87
+ expect(entries).to be_an Array
88
+ expect(entries[0]).to be_an Array
89
+ expect(entries[0].size).to eq 2
90
+ expect(entries[0][0]).to eq adapter.keys[0]
91
+ expect(entries[0][1]).to eq adapter.values[0]
92
+ end
93
+
94
+ it "gets work file by fileset id" do
95
+ adapter = described_class.of(work)
96
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
97
+ fsid = adapter.keys[0]
98
+ expect(fsid).to eq first_fileset.id
99
+ work_file = adapter.get(fsid)
100
+ expect(work_file.unwrapped).to eq first_fileset.original_file
101
+ work_file = adapter[fsid]
102
+ expect(work_file.unwrapped).to eq first_fileset.original_file
103
+ end
104
+
105
+ it "gets work file by work-local filename" do
106
+ adapter = described_class.of(work)
107
+ first_fileset = work.members.select { |m| m.class == FileSet }[0]
108
+ name = first_fileset.original_file.original_name
109
+ work_file = adapter.get(name)
110
+ expect(work_file).to eq adapter.get(first_fileset.id)
111
+ end
112
+
113
+ it "verifies inclusion of fileset id key" do
114
+ adapter = described_class.of(work)
115
+ fsid = adapter.keys[0]
116
+ expect(adapter.include?(fsid)).to be true
117
+ end
118
+ end
119
+
120
+ describe "assignment state" do
121
+ it "has empty state for work with no files" do
122
+ bare_work = NewspaperPage.new
123
+ bare_work.title = ['No files to see here']
124
+ bare_work.save!
125
+ adapter = described_class.of(bare_work)
126
+ expect(adapter.keys.empty?).to be true
127
+ expect(adapter.state).to eq 'empty'
128
+ end
129
+
130
+ it "has 'dirty' state when files assigned" do
131
+ adapter = described_class.of(work)
132
+ expect(adapter.state).to eq 'saved'
133
+ adapter.assign(tiff_path)
134
+ # changes to dirty
135
+ expect(adapter.state).to eq 'dirty'
136
+ # unassign path again to empty assigned queue:
137
+ adapter.unassign(tiff_path)
138
+ # no we are back to 'saved' since no changes are queued now:
139
+ expect(adapter.state).to eq 'saved'
140
+ end
141
+ end
142
+
143
+ describe "commits changes" do
144
+ # These jobs we need whitelisted to run now, at minimum:
145
+ do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
146
+ # These we skip: [CharacterizeJob, CreateDerivativesJob]
147
+ # -- skipping these saves 10-15 seconds on attachment example
148
+
149
+ permission_methods = [
150
+ :edit_users,
151
+ :read_users,
152
+ :discover_users,
153
+ :edit_groups,
154
+ :read_groups,
155
+ :discover_groups
156
+ ]
157
+
158
+ let(:bare_work) do
159
+ bare_work = NewspaperPage.new
160
+ bare_work.title = ['No files to see here']
161
+ bare_work.save!
162
+ bare_work
163
+ end
164
+
165
+ it "commits unassign (file deletions)" do
166
+ adapter = described_class.of(work)
167
+ expect(adapter.keys.size).to eq 1
168
+ adapter.unassign(adapter.keys[0])
169
+ adapter.commit!
170
+ expect(adapter.keys.size).to eq 0
171
+ expect(work.members.select { |m| m.class == FileSet }.size).to eq 0
172
+ end
173
+
174
+ it "commit for assignment invokes actor stack" do
175
+ work = bare_work
176
+ adapter = described_class.of(work)
177
+ adapter.assign(tiff_path)
178
+ allow(Hyrax::CurationConcern.actor).to receive(:create).and_return(true)
179
+ expect(Hyrax::CurationConcern.actor).to receive(:create)
180
+ expect(adapter.commit!).to be true
181
+ end
182
+
183
+ it "commits successful file attachment", perform_enqueued: do_now_jobs do
184
+ work = bare_work
185
+ adapter = described_class.of(work)
186
+ adapter.assign(tiff_path)
187
+ adapter.commit!
188
+ # whitelisted jobs (do_now_jobs) performed as effect of commit!
189
+ # are configured to effectively run inline. Reloading work
190
+ # should refresh the work.members, and by consequence adapter.keys
191
+ work.reload
192
+ expect(adapter.keys.size).to eq 1
193
+ expect(work.members.select { |m| m.class == FileSet }.size).to eq 1
194
+ expect(adapter.names).to include 'ocr_gray.tiff'
195
+ end
196
+
197
+ it "copies work perimssions to fileset", perform_enqueued: do_now_jobs do
198
+ adapter = described_class.of(bare_work)
199
+ adapter.assign(tiff_path)
200
+ adapter.commit!
201
+ bare_work.reload
202
+ fileset = bare_work.members.select { |w| w.class == FileSet }[0]
203
+ permission_methods.each do |m|
204
+ expect(fileset.send(m)).to match_array bare_work.send(m)
205
+ end
206
+ expect(fileset.visibility).to eq bare_work.visibility
207
+ end
208
+ end
209
+
210
+ describe "derivative access" do
211
+ it "gets derivatives for first fileset" do
212
+ fileset = work.members.select { |m| m.class == FileSet }[0]
213
+ adapter = described_class.of(work)
214
+ # adapts same context(s):
215
+ expect(adapter.derivatives.fileset.id).to eq fileset.id
216
+ expect(adapter.derivatives.work).to be work
217
+ expect(adapter.derivatives.class).to eq \
218
+ NewspaperWorks::Data::WorkDerivatives
219
+ # transitive parent/child relationship, can traverse to adapter from
220
+ # derivatives:
221
+ expect(adapter.derivatives.parent.parent).to be adapter
222
+ end
223
+ end
224
+ end
@@ -0,0 +1,158 @@
1
+ require 'spec_helper'
2
+ require 'newspaper_works_fixtures'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::BatchIssueIngester do
5
+ include_context "ingest test fixtures"
6
+
7
+ # lccn, paths for respective media:
8
+ let(:pdf_lccn) { 'sn93059126' }
9
+ let(:tiff_lccn) { 'sn93059126' }
10
+ let(:jp2_lccn) { 'sn85058233' }
11
+ let(:pdf_path) { File.join(pdf_fixtures, pdf_lccn) }
12
+ let(:tiff_path) { File.join(tiff_fixtures, tiff_lccn) }
13
+ let(:jp2_path) { File.join(jp2_fixtures, jp2_lccn) }
14
+
15
+ describe "ingester construction and composition" do
16
+ it "constructs ingester from PDF with expected metadata" do
17
+ # given path to single batch
18
+ ingester = described_class.new(pdf_path)
19
+ # correctly parses LCCN from path:
20
+ expect(ingester.lccn).to eq pdf_lccn
21
+ expect(ingester.path).to eq pdf_path
22
+ end
23
+
24
+ it "constructs ingester from TIFF with expected metadata" do
25
+ ingester = described_class.new(tiff_path)
26
+ expect(ingester.lccn).to eq tiff_lccn
27
+ expect(ingester.path).to eq tiff_path
28
+ end
29
+
30
+ it "constructs ingester from JP2 with expected metadata" do
31
+ ingester = described_class.new(jp2_path)
32
+ expect(ingester.lccn).to eq jp2_lccn
33
+ expect(ingester.path).to eq jp2_path
34
+ end
35
+
36
+ it "constructs ingester with publication metadata" do
37
+ ingester = described_class.new(pdf_path)
38
+ expect(ingester.publication).to be_a NewspaperWorks::Ingest::PublicationInfo
39
+ expect(ingester.publication.lccn).to eq ingester.lccn
40
+ expect(ingester.publication.title).to eq 'The weekly journal'
41
+ end
42
+
43
+ it "constructs ingester with explicit LCCN" do
44
+ # path is for The weekly journal (Chicopee Mass), pass LCCN for other pub
45
+ sltrib = 'sn83045396'
46
+ ingester = described_class.new(pdf_path, lccn: sltrib)
47
+ expect(ingester.lccn).to eq sltrib
48
+ expect(ingester.publication.lccn).to eq ingester.lccn
49
+ expect(ingester.publication.title).to eq 'Salt Lake tribune'
50
+ end
51
+
52
+ it "constructs ingester enumerating PDF files" do
53
+ ingester = described_class.new(pdf_path)
54
+ pdfs = Dir.entries(pdf_path).select { |name| name.end_with?('.pdf') }
55
+ paths = pdfs.map { |name| File.join(pdf_path, name) }
56
+ issues = ingester.issues
57
+ expect(issues).to be_a NewspaperWorks::Ingest::PDFIssues
58
+ expect(issues.size).to eq pdfs.size
59
+ expect(issues.keys).to match_array paths
60
+ end
61
+
62
+ it "constructs ingester enumerating issues of page images" do
63
+ ingester = described_class.new(tiff_path)
64
+ entries = Dir.entries(tiff_path)
65
+ .map { |name| File.join(tiff_path, name) }
66
+ .select { |v| !v.end_with?('.') && File.directory?(v) }
67
+ issues = ingester.issues
68
+ expect(issues).to be_a NewspaperWorks::Ingest::ImageIngestIssues
69
+ expect(issues.size).to eq 2
70
+ expect(issues.keys).to match_array entries
71
+ end
72
+ end
73
+
74
+ describe "ingester behavior" do
75
+ # Ensure LCCN has no initial publication NewspaperTitle asset:
76
+ let(:pdf_lccn) do
77
+ v = 'sn93059126'
78
+ NewspaperTitle.where(lccn: v).delete_all
79
+ v
80
+ end
81
+
82
+ let(:tiff_lccn) do
83
+ v = 'sn93059126'
84
+ NewspaperTitle.where(lccn: v).delete_all
85
+ v
86
+ end
87
+
88
+ let(:jp2_lccn) do
89
+ v = 'sn85058233'
90
+ NewspaperTitle.where(lccn: v).delete_all
91
+ v
92
+ end
93
+
94
+ let(:pdf_issue_path) { File.join(pdf_path, '1853060401.pdf') }
95
+ let(:tiff_issue_path) { File.join(tiff_path, '1853060401') }
96
+ let(:jp2_issue_path) { File.join(jp2_path, '1935080201') }
97
+
98
+ def single_issue_dir(lccn, target_issue_path)
99
+ Hyrax.config.whitelisted_ingest_dirs |= ['/tmp']
100
+ parent_dir = Dir.mktmpdir
101
+ dir = File.join(parent_dir, lccn)
102
+ FileUtils.mkdir(dir)
103
+ FileUtils.cp_r(target_issue_path, dir)
104
+ dir
105
+ end
106
+
107
+ def job_enqueued?(job)
108
+ jobs = ActiveJob::Base.queue_adapter.enqueued_jobs.map { |j| j[:job] }
109
+ jobs.include?(job)
110
+ end
111
+
112
+ def expect_administrative_metadata(work)
113
+ expect(work.depositor).to eq User.batch_user.user_key
114
+ expect(work.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
115
+ expect(work.visibility).to eq 'open'
116
+ end
117
+
118
+ # rubocop:disable Metrics/AbcSize
119
+ def issue_ingest(lccn, path, page_count, metadata)
120
+ dir = single_issue_dir(lccn, path)
121
+ ingester = described_class.new(dir)
122
+ ingester.ingest
123
+ # Outcomes tested:
124
+ # 1. NewspaperTitle for Publication created, and contains issue
125
+ issue = NewspaperTitle.where(lccn: lccn).first.members.to_a[0]
126
+ # 2. Metadata:
127
+ expect(issue.publication_date).to eq metadata[:publication_date]
128
+ expect(issue.title).to contain_exactly metadata[:title]
129
+ expect_administrative_metadata(issue)
130
+ if page_count > 0
131
+ # 3. Child pages created
132
+ expect(issue.pages.size).to eq page_count
133
+ expect_administrative_metadata(issue.pages[0])
134
+ # 4. Creation of issue PDF enqueued:
135
+ expect(job_enqueued?(NewspaperWorks::ComposeIssuePDFJob)).to be true
136
+ end
137
+ # clean up after temp dir:
138
+ FileUtils.rmtree(File.dirname(dir))
139
+ end
140
+ # rubocop:enable Metrics/AbcSize
141
+
142
+ it "ingests PDFs" do
143
+ expected_metadata = {
144
+ title: "The weekly journal: June 4, 1853",
145
+ publication_date: "1853-06-04"
146
+ }
147
+ issue_ingest(pdf_lccn, pdf_issue_path, 0, expected_metadata)
148
+ end
149
+
150
+ it "ingests JP2 page images (as TIFF) into an issue with child pages" do
151
+ expected_metadata = {
152
+ title: "The Park record: August 2, 1935",
153
+ publication_date: "1935-08-02"
154
+ }
155
+ issue_ingest(jp2_lccn, jp2_issue_path, 2, expected_metadata)
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::ChronAmPublicationInfo do
4
+ let(:lccn1) { 'sn94051019' }
5
+ let(:lccn2) { 'sn84038814' }
6
+ let(:bad_lccn) { 'sn99999999' }
7
+
8
+ describe "gets metadata" do
9
+ it "gets simple metadata" do
10
+ meta = described_class.new(lccn1)
11
+ expect(meta.title).to eq 'Marysville daily news'
12
+ expect(meta.issn).to be_nil
13
+ expect(meta.oclcnum).to eq 'ocm30043558'
14
+ expect(meta.place_name).to eq 'Marysville, Calif.'
15
+ expect(meta.place_of_publication).to eq 'http://sws.geonames.org/5370984/'
16
+ end
17
+
18
+ it "gets related item metadata" do
19
+ meta1 = described_class.new(lccn1)
20
+ meta2 = described_class.new(lccn2)
21
+ # lccn2 succeeds lccn1, favors lccn.loc.gov URL as authoritative:
22
+ expect(meta1.succeeded_by).to eq "https://lccn.loc.gov/#{lccn2}"
23
+ # lccn1 precedes lccn2, favors chronam URL as authoritative, since
24
+ # catalog.loc.gov and lccn.loc.gov do not have records for this LCCN:
25
+ expect(meta2.preceded_by).to eq "https://chroniclingamerica.loc.gov/lccn/sn94051019"
26
+ end
27
+ end
28
+
29
+ describe "error handling" do
30
+ it "handles unknown LCCN (404)" do
31
+ meta = described_class.new(bad_lccn)
32
+ expect(meta.empty?).to be true
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+ require 'newspaper_works_fixtures'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::FromCommand do
5
+ include_context "ingest test fixtures"
6
+
7
+ describe "alternate construction" do
8
+ let(:klass) do
9
+ Class.new do
10
+ extend NewspaperWorks::Ingest::FromCommand
11
+
12
+ attr_accessor :path, :opts
13
+
14
+ def initialize(path, opts = {})
15
+ @path = path
16
+ @opts = opts
17
+ end
18
+ end
19
+ end
20
+
21
+ def construct(args)
22
+ klass.from_command(
23
+ args,
24
+ 'rake newspaper_works:ingest_pdf_issues --'
25
+ )
26
+ end
27
+
28
+ let(:lccn) { 'sn93059126' }
29
+
30
+ let(:pdf_path) { File.join(pdf_fixtures, lccn) }
31
+
32
+ let(:fake_argv) do
33
+ [
34
+ 'newspaper_works:ingest_pdf_issues',
35
+ '--',
36
+ "--path=#{pdf_path}"
37
+ ]
38
+ end
39
+
40
+ let(:more_argv) do
41
+ fake_argv + [
42
+ "--lccn=#{lccn}"
43
+ ]
44
+ end
45
+
46
+ let(:most_argv) do
47
+ more_argv + [
48
+ "--admin_set=admin_set/default",
49
+ "--depositor=#{User.batch_user.user_key}",
50
+ "--visibility=open"
51
+ ]
52
+ end
53
+
54
+ it "calls constructor with minimal options parsed" do
55
+ ingester = construct(fake_argv)
56
+ expect(ingester.path).to eq pdf_path
57
+ expect(ingester.opts[:path]).to eq pdf_path
58
+ end
59
+
60
+ it "calls constructor with explict lccn option" do
61
+ ingester = construct(more_argv)
62
+ expect(ingester.path).to eq pdf_path
63
+ expect(ingester.opts[:lccn]).to eq lccn
64
+ end
65
+
66
+ it "calls constructor with all options" do
67
+ ingester = construct(most_argv)
68
+ expect(ingester.path).to eq pdf_path
69
+ expect(ingester.opts[:lccn]).to eq lccn
70
+ expect(ingester.opts[:admin_set]).to eq 'admin_set/default'
71
+ expect(ingester.opts[:depositor]).to eq User.batch_user.user_key
72
+ expect(ingester.opts[:visibility]).to eq 'open'
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::ImageIngestIssues do
4
+ include_context 'ingest test fixtures'
5
+
6
+ let(:lccn) { 'sn93059126' }
7
+
8
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
9
+
10
+ let(:pub_path) { File.join(tiff_fixtures, lccn) }
11
+
12
+ let(:expected_paths) do
13
+ entries = Dir.entries(pub_path).map { |p| File.join(pub_path, p) }
14
+ entries.select { |p| File.directory?(p) && !File.basename(p).start_with?('.') }
15
+ end
16
+
17
+ let(:issues) { described_class.new(pub_path, publication) }
18
+
19
+ describe " construction and metadata" do
20
+ it "constructs with path and publication" do
21
+ expect(issues.path).to eq pub_path
22
+ expect(issues.publication).to be publication
23
+ expect(issues.lccn).to eq lccn
24
+ expect(issues.publication.lccn).to eq lccn
25
+ end
26
+
27
+ it "enumerates valid directories as IssueImages objects" do
28
+ expect(issues.size).to eq 2
29
+ enumerated = issues.values
30
+ expect(enumerated.size).to eq issues.size
31
+ sample = enumerated[0]
32
+ expect(sample).to be_a NewspaperWorks::Ingest::IssueImages
33
+ expect(File.dirname(sample.path)).to eq pub_path
34
+ end
35
+
36
+ it "presents hash-like mapping behavior" do
37
+ # Keys are paths to directory containing issue images:
38
+ expect(issues.keys).to match_array expected_paths
39
+ # info and [] methods get IssueImages object for given path key:
40
+ issue1 = issues[issues.keys[0]]
41
+ issue2 = issues.info(issues.keys[1])
42
+ expect(issue1).to be_a NewspaperWorks::Ingest::IssueImages
43
+ expect(issue2).to be_a NewspaperWorks::Ingest::IssueImages
44
+ expect(issue1.path).to eq issues.keys[0]
45
+ end
46
+
47
+ it "enumerates pairs like a hash" do
48
+ issues.each_value do |v|
49
+ expect(v).to be_a NewspaperWorks::Ingest::IssueImages
50
+ end
51
+ issues.each_key do |k|
52
+ expect(expected_paths).to include k
53
+ end
54
+ issues.each do |path, info|
55
+ expect(expected_paths).to include path
56
+ expect(info).to be_a NewspaperWorks::Ingest::IssueImages
57
+ expect(info.path).to eq path
58
+ end
59
+ expect(issues.to_a.size).to eq 2
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+ require 'newspaper_works_fixtures'
3
+
4
+ RSpec.shared_context "ingest test fixtures", shared_context: :metadata do
5
+ # Path to fixtures gem for sample fixtures, whitelisted:
6
+ let(:fixtures_path) do
7
+ path = NewspaperWorksFixtures.file_fixtures
8
+ whitelist = Hyrax.config.whitelisted_ingest_dirs
9
+ whitelist.push(path) unless whitelist.include?(path)
10
+ path
11
+ end
12
+
13
+ # directory containing PDF fixture batch(es)
14
+ let(:pdf_fixtures) { File.join(fixtures_path, 'pdf_batch') }
15
+
16
+ # directory containing TIFF image fixtures batch(es)
17
+ let(:tiff_fixtures) { File.join(fixtures_path, 'tiff_batch') }
18
+
19
+ # directory containing JP2 image fixture batch(es)
20
+ let(:jp2_fixtures) { File.join(fixtures_path, 'jp2_batch') }
21
+ end
22
+
23
+ RSpec.shared_examples 'ingest adapter IO' do
24
+ # define the path to the file we will use for multiple examples
25
+ let(:path) do
26
+ fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
27
+ File.join(fixtures, 'page1.tiff')
28
+ end
29
+
30
+ # DRY for this matcher's use in multiple examples:
31
+ let(:have_io_and_correct_filename) do
32
+ have_attributes(
33
+ filename: 'page1.tiff',
34
+ io: an_object_responding_to(:read)
35
+ )
36
+ end
37
+
38
+ describe "file loading" do
39
+ # the first half of work done by ingest is done by load(); these
40
+ # assertions test load() independent of work done.
41
+
42
+ it "loads stream from path" do
43
+ adapter = build(:newspaper_page_ingest)
44
+ adapter.load(path)
45
+ expect(adapter).to have_io_and_correct_filename
46
+ end
47
+
48
+ it "loads stream from a Pathname object" do
49
+ adapter = build(:newspaper_page_ingest)
50
+ adapter.load(Pathname.new(path))
51
+ expect(adapter).to have_io_and_correct_filename
52
+ end
53
+
54
+ it "loads an File object" do
55
+ adapter = build(:newspaper_page_ingest)
56
+ File.open(path) do |file|
57
+ adapter.load(file)
58
+ expect(adapter).to have_io_and_correct_filename
59
+ end
60
+ end
61
+
62
+ it "loads a StringIO with filename" do
63
+ adapter = build(:newspaper_page_ingest)
64
+ io = StringIO.new('File Content Here, Maybe')
65
+ adapter.load(io, filename: 'page1.tiff')
66
+ expect(adapter).to have_io_and_correct_filename
67
+ end
68
+
69
+ it "raises on missing explicit filename for StringIO" do
70
+ adapter = build(:newspaper_page_ingest)
71
+ io = StringIO.new('File Content Here, Maybe')
72
+ expect { adapter.load(io) }.to raise_error(ArgumentError)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,65 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::IssueImages do
4
+ include_context 'ingest test fixtures'
5
+
6
+ # LCCN for TIFF fixture examples:
7
+ let(:lccn_tiff) { 'sn93059126' }
8
+
9
+ # LCCN for JP2 fixture examples:
10
+ let(:lccn_jp2) { 'sn85058233' }
11
+
12
+ let(:tiff_issue_path) { File.join(tiff_fixtures, lccn_tiff, '1853060401') }
13
+
14
+ let(:jp2_issue_path) { File.join(jp2_fixtures, lccn_jp2, '1935080201') }
15
+
16
+ # Publication for TIFF fixtures:
17
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn_tiff) }
18
+
19
+ # Publication for JP2 fixtures:
20
+ let(:publication_jp2) { NewspaperWorks::Ingest::PublicationInfo.new(lccn_jp2) }
21
+
22
+ let(:issue) { described_class.new(tiff_issue_path, publication) }
23
+
24
+ describe "issue construction and metadata" do
25
+ it "constructs with path and publication" do
26
+ expect(issue.path).to eq tiff_issue_path
27
+ expect(issue.filename).to eq File.basename(tiff_issue_path)
28
+ expect(issue.publication).to be publication
29
+ expect(issue.lccn).to eq lccn_tiff
30
+ expect(issue.publication.lccn).to eq lccn_tiff
31
+ end
32
+
33
+ it "extracts date, edition, title from filename" do
34
+ expect(issue.publication_date).to eq '1853-06-04'
35
+ expect(issue.edition_number).to eq 1
36
+ expect(issue.title).to contain_exactly 'The weekly journal: June 4, 1853'
37
+ end
38
+
39
+ it "enumerates pages (TIFF)" do
40
+ expect(issue.to_a.size).to eq 4
41
+ expect(issue.keys.size).to eq 4
42
+ # lexical ordering:
43
+ expect(issue.keys).to eq issue.keys.sort
44
+ issue.entries.each_with_index do |pair, idx|
45
+ # PageImage object value:
46
+ page_image = pair[1]
47
+ expect(page_image).to be_a NewspaperWorks::Ingest::PageImage
48
+ expect(page_image.lccn).to eq publication.lccn
49
+ # path key
50
+ expect(page_image.path).to eq pair[0]
51
+ expect(page_image.issue).to be issue
52
+ # Verify lexical ordering (for page_number in file name vs. seq num):
53
+ expect(page_image.page_number.to_i).to eq idx + 1
54
+ # page numbering matches sequence numbering:
55
+ expected_title = "The weekly journal: June 4, 1853: Page #{page_image.page_number}"
56
+ expect(page_image.title).to contain_exactly expected_title
57
+ end
58
+ end
59
+
60
+ it "enumerates pages (JP2)" do
61
+ issue = described_class.new(jp2_issue_path, publication_jp2)
62
+ expect(issue.to_a.size).to eq 2
63
+ end
64
+ end
65
+ end