newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::LCPublicationInfo do
4
+ let(:lccn1) { 'sn83021453' }
5
+ let(:lccn2) { 'sn83045396' }
6
+ let(:bad_lccn) { 'sn99999999' }
7
+
8
+ describe "gets metadata" do
9
+ it "gets simple metadata" do
10
+ meta = described_class.new(lccn1)
11
+ expect(meta.title).to eq 'Salt Lake daily tribune'
12
+ expect(meta.issn).to be_nil
13
+ expect(meta.oclcnum).to eq 'ocm10170377'
14
+ expect(meta.place_name).to eq 'Salt Lake City, Utah'
15
+ expect(meta.place_of_publication).to eq 'http://sws.geonames.org/5780993/'
16
+ end
17
+
18
+ it "gets related item metadata" do
19
+ meta1 = described_class.new(lccn1)
20
+ meta2 = described_class.new(lccn2)
21
+ # lccn2 succeeds lccn1, favors lccn.loc.gov URL as authoritative:
22
+ expect(meta1.succeeded_by).to eq "https://lccn.loc.gov/#{lccn2}"
23
+ # lccn1 precedes lccn2, favors lccn.loc.gov URL as authoritative:
24
+ expect(meta2.preceded_by).to eq "https://lccn.loc.gov/sn83021453"
25
+ end
26
+ end
27
+
28
+ describe "error handling" do
29
+ it "handles unknown LCCN (empty mods)" do
30
+ meta = described_class.new(bad_lccn)
31
+ expect(meta.empty?).to be true
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,131 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::BatchIngester do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "adapter construction" do
8
+ it "loads batch to operate on" do
9
+ adapter = described_class.new(batch1)
10
+ expect(adapter.batch).to be_a NewspaperWorks::Ingest::NDNP::BatchXMLIngest
11
+ expect(adapter.batch.path).to eq adapter.path
12
+ end
13
+
14
+ it "finds batch xml, if given path containing batch" do
15
+ parent_path = File.dirname(batch1)
16
+ adapter = described_class.new(parent_path)
17
+ expect(adapter.path).to eq batch1
18
+ expect(adapter.batch.path).to eq adapter.path
19
+ end
20
+
21
+ it "constructs adapter with hash options" do
22
+ user = User.batch_user.user_key
23
+ adapter = described_class.new(
24
+ batch1,
25
+ depositor: user
26
+ )
27
+ expect(adapter.opts[:depositor]).to eq user
28
+ end
29
+ end
30
+
31
+ describe "ingests issues" do
32
+ def expect_start_finish_logging(adapter)
33
+ expect(adapter).to receive(:write_log).with(
34
+ satisfy { |v| v.include?('Beginning NDNP batch ingest') }
35
+ ).once
36
+ expect(adapter).to receive(:write_log).with(
37
+ satisfy { |v| v.include?('NDNP batch ingest complete') }
38
+ ).once
39
+ end
40
+
41
+ it "calls ingest for all issues in batch" do
42
+ adapter = described_class.new(batch1)
43
+ issue_ingest_call_count = 0
44
+ # rubocop:disable RSpec/AnyInstance (we really need to stub this way)
45
+ allow_any_instance_of(NewspaperWorks::Ingest::NDNP::IssueIngester).to \
46
+ receive(:ingest) { issue_ingest_call_count += 1 }
47
+ # rubocop:enable RSpec/AnyInstance
48
+ expect_start_finish_logging(adapter)
49
+ adapter.ingest
50
+ expect(issue_ingest_call_count).to eq 4
51
+ end
52
+ end
53
+
54
+ describe "command invocation" do
55
+ def construct(args)
56
+ described_class.from_command(
57
+ args,
58
+ 'rake newspaper_works:ingest_ndnp --'
59
+ )
60
+ end
61
+
62
+ let(:fake_argv) do
63
+ [
64
+ 'newspaper_works:ingest_ndnp',
65
+ '--',
66
+ "--path=#{batch1}"
67
+ ]
68
+ end
69
+
70
+ let(:fake_argv2) do
71
+ [
72
+ 'newspaper_works:ingest_ndnp',
73
+ '--',
74
+ "--path=#{batch1}",
75
+ "--admin_set=admin_set/default",
76
+ "--depositor=#{User.batch_user.user_key}",
77
+ "--visibility=open"
78
+ ]
79
+ end
80
+
81
+ it "creates ingester from command arguments" do
82
+ adapter = construct(fake_argv)
83
+ expect(adapter).to be_a described_class
84
+ expect(adapter.path).to eq batch1
85
+ end
86
+
87
+ it "creates ingester from expanded command arguments" do
88
+ adapter = construct(fake_argv2)
89
+ expect(adapter).to be_a described_class
90
+ expect(adapter.path).to eq batch1
91
+ expect(adapter.opts[:depositor]).to eq User.batch_user.user_key
92
+ expect(adapter.opts[:visibility]).to eq 'open'
93
+ expect(adapter.opts[:admin_set]).to eq 'admin_set/default'
94
+ end
95
+
96
+ it "creates ingester from command with dir path" do
97
+ # command can accept a parent directory for batch:
98
+ base_path = File.dirname(batch1)
99
+ fake_argv = ['newspaper_works:ingest_ndnp', '--', "--path=#{base_path}"]
100
+ adapter = construct(fake_argv)
101
+ expect(adapter).to be_a described_class
102
+ # adapter.path is path to actual XML
103
+ expect(adapter.path).to eq batch1
104
+ end
105
+
106
+ it "exits on file not found for batch" do
107
+ fake_argv = ['newspaper_works:ingest_ndnp', '--', "--path=123/45/5678"]
108
+ begin
109
+ construct(fake_argv)
110
+ rescue SystemExit => e
111
+ expect(e.status).to eq(1)
112
+ end
113
+ end
114
+
115
+ it "exits on missing path for batch" do
116
+ fake_argv = ['newspaper_works:ingest_ndnp', '--']
117
+ begin
118
+ construct(fake_argv)
119
+ rescue SystemExit => e
120
+ expect(e.status).to eq(1)
121
+ end
122
+ end
123
+
124
+ it "exits on unexpected arguments" do
125
+ fake_argv = ['newspaper_works:ingest_ndnp', '--', '--foo=bar']
126
+ expect { construct(fake_argv) }.to raise_error(
127
+ OptionParser::InvalidOption
128
+ )
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::BatchXMLIngest do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "sample batch" do
8
+ let(:batch) { described_class.new(batch1) }
9
+
10
+ it "gets batch name" do
11
+ expect(batch.name).to eq 'batch_test'
12
+ end
13
+
14
+ it "gets issue by path" do
15
+ path = batch.issue_paths[0]
16
+ issue = batch.get(path)
17
+ expect(issue).to be_a NewspaperWorks::Ingest::NDNP::IssueIngest
18
+ expect(issue.path).to eq path
19
+ end
20
+
21
+ it "gets reel/container by path" do
22
+ path = batch.container_paths[0]
23
+ container = batch.get(path)
24
+ expect(container).to be_a NewspaperWorks::Ingest::NDNP::ContainerIngest
25
+ expect(container.path).to eq path
26
+ end
27
+
28
+ it "enumerates container paths" do
29
+ reel_ids = batch.container_paths
30
+ expect(reel_ids).to be_an Array
31
+ expect(reel_ids.size).to eq 3
32
+ end
33
+
34
+ it "enumerates issue paths" do
35
+ issue_ids = batch.issue_paths
36
+ expect(issue_ids).to be_an Array
37
+ expect(issue_ids.size).to eq 4
38
+ end
39
+
40
+ it "enumerates issues via method" do
41
+ issues = batch.issues
42
+ expect(issues).to be_an Array
43
+ expect(issues.size).to eq 4
44
+ expect(issues[0]).to be_a NewspaperWorks::Ingest::NDNP::IssueIngest
45
+ end
46
+
47
+ it "makes batch fixed-size enumerable of issues" do
48
+ expect(batch.size).to eq batch.issue_paths.size
49
+ issues = batch.to_a # implied .each
50
+ expect(issues.size).to eq batch.size
51
+ expect(issues.size).to eq 4
52
+ issues.each do |issue|
53
+ expect(issue).to be_a NewspaperWorks::Ingest::NDNP::IssueIngest
54
+ end
55
+ end
56
+
57
+ it "enumerates containers" do
58
+ reels = batch.containers
59
+ expect(reels).to be_an Array
60
+ expect(reels.size).to eq 3
61
+ expect(reels[0]).to be_a NewspaperWorks::Ingest::NDNP::ContainerIngest
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::ContainerIngest do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "sample fixture 'batch_test_ver01'" do
8
+ let(:reel) { described_class.new(reel1) }
9
+
10
+ it "gets metadata" do
11
+ expect(reel.metadata).to be_a \
12
+ NewspaperWorks::Ingest::NDNP::ContainerMetadata
13
+ # uses same Nokogiri document context:
14
+ expect(reel.metadata.doc).to be reel.doc
15
+ # has identifier method equivalent to reel number
16
+ expect(reel.identifier).to eq reel.metadata.reel_number
17
+ end
18
+
19
+ it "gets control image as PageIngest, by dmdid" do
20
+ page = reel.page_by_dmdid('targetModsBib1')
21
+ expect(page).to be_a NewspaperWorks::Ingest::NDNP::PageIngest
22
+ expect(page.dmdid).to eq 'targetModsBib1'
23
+ end
24
+
25
+ it "shares xml document context with contained pages" do
26
+ page = reel.page_by_dmdid('targetModsBib1')
27
+ expect(page.doc).to be reel.doc
28
+ end
29
+
30
+ it "enumerates expected issues" do
31
+ # enumerate by casting reel to Array
32
+ issues = reel.to_a
33
+ expect(issues.size).to eq 2
34
+ expect(issues[0]).to be_a NewspaperWorks::Ingest::NDNP::IssueIngest
35
+ expect(issues[0].path).to eq reel.issue_paths[0]
36
+ end
37
+
38
+ it "gets size, in issue count" do
39
+ issues = reel.to_a
40
+ expect(reel.size).to eq issues.size
41
+ expect(reel.size).to eq reel.issue_paths.size
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,126 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+ require 'misc_shared'
4
+
5
+ RSpec.describe NewspaperWorks::Ingest::NDNP::ContainerIngester do
6
+ include_context "ndnp fixture setup"
7
+ include_context "shared setup"
8
+
9
+ # use FactoryBot for publication, issue
10
+ let(:publication) { create(:newspaper_title) }
11
+ let(:issue) do
12
+ issue = create(:newspaper_issue)
13
+ publication.members << issue
14
+ publication.save!
15
+ issue
16
+ end
17
+ let(:linked_publication) { issue.publication }
18
+
19
+ let(:issue_data) do
20
+ NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
21
+ end
22
+
23
+ let(:page_data) do
24
+ data = issue_data.to_a[0]
25
+ # some NDNP samples missing TIFF, put dummy in place of missing, as needed
26
+ data.files = data.files.map do |path|
27
+ File.exist?(path) ? path : File.join(fixture_path, 'ocr_gray.tiff')
28
+ end
29
+ data
30
+ end
31
+
32
+ let(:page) do
33
+ ingester = NewspaperWorks::Ingest::NDNP::PageIngester.new(page_data, issue)
34
+ ingester.ingest
35
+ ingester.target
36
+ end
37
+
38
+ # reel data via ContainerIngest, ContainerMetadata objects:
39
+ let(:reel_data) { issue_data.container }
40
+ let(:metadata) { reel_data.metadata }
41
+ let(:sn) { metadata.reel_number }
42
+
43
+ describe "asset construction and linking" do
44
+ before do
45
+ # trick with testing ingest that does find-or-create on reel is
46
+ # that we want to clear previous reel assets left over from
47
+ # other tests, most of the time.
48
+ containers = NewspaperContainer.where(identifier: sn)
49
+ next if containers.size.zero?
50
+ # first, unlink the reel from publication before deleting:
51
+ container = containers[0]
52
+ publication = container.publication
53
+ publication.members.delete(container) unless publication.nil?
54
+ # then delete reel
55
+ containers.delete_all
56
+ end
57
+
58
+ it "constructs adapter wth reel_data, publication asset" do
59
+ adapter = described_class.new(reel_data, linked_publication)
60
+ expect(adapter.source).to be reel_data
61
+ expect(adapter.publication).to be linked_publication
62
+ end
63
+
64
+ it "constructs a publication-linked asset for reel" do
65
+ adapter = described_class.new(reel_data, linked_publication)
66
+ expect(adapter.target).to be_nil
67
+ adapter.ingest
68
+ expect(adapter.target).to be_a NewspaperContainer
69
+ expect(adapter.target.publication).to be_a NewspaperTitle
70
+ end
71
+
72
+ it "links pages to reel" do
73
+ adapter = described_class.new(reel_data, linked_publication)
74
+ adapter.ingest
75
+ adapter.link(page)
76
+ expect(adapter.target.pages.map(&:id)).to include page.id
77
+ expect(adapter.target.pages.size).to eq 1
78
+ end
79
+
80
+ it "copies reel metadata" do
81
+ adapter = described_class.new(reel_data, linked_publication)
82
+ adapter.ingest
83
+ reel = adapter.target
84
+ expect(reel.identifier).to contain_exactly sn
85
+ expect(reel.held_by).to eq metadata.held_by
86
+ expect(reel.title).to contain_exactly "Microform reel (#{sn})"
87
+ expect(reel.publication_date_start).to eq metadata.publication_date_start
88
+ expect(reel.publication_date_end).to eq metadata.publication_date_end
89
+ end
90
+
91
+ it "sets default administrative metadata with default construction" do
92
+ adapter = described_class.new(reel_data, linked_publication)
93
+ adapter.ingest
94
+ asset = adapter.target
95
+ expect(asset.depositor).to eq User.batch_user.user_key
96
+ expect(asset.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
97
+ expect(asset.visibility).to eq 'open'
98
+ end
99
+
100
+ it "sets custom administrative metadata" do
101
+ # test one exemplary/representative option:
102
+ adapter = described_class.new(
103
+ reel_data,
104
+ linked_publication,
105
+ visibility: 'open'
106
+ )
107
+ adapter.ingest
108
+ expect(adapter.target.visibility).to eq 'open'
109
+ end
110
+
111
+ it "finds or creates container asset for reel" do
112
+ # No initial container for thre reel id, per before block above:
113
+ expect(NewspaperContainer.where(identifier: sn).size).to eq 0
114
+ # create it once
115
+ described_class.new(reel_data, linked_publication).ingest
116
+ result = NewspaperContainer.where(identifier: sn)
117
+ expect(result.size).to eq 1
118
+ expect(result.first.identifier).to contain_exactly sn
119
+ # now do this again, expecting to find the existing container asset:
120
+ described_class.new(reel_data, linked_publication).ingest
121
+ result = NewspaperContainer.where(identifier: sn)
122
+ expect(result.size).to eq 1 # still just one asset
123
+ expect(result.first.identifier).to contain_exactly sn
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::ContainerMetadata do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "sample fixture 'batch_test_ver01'" do
8
+ let(:meta) { described_class.new(reel1) }
9
+ let(:meta2) { described_class.new(reel2) }
10
+
11
+ it "gets reel_number" do
12
+ expect(meta.reel_number).to eq "00279557177"
13
+ expect(meta2.reel_number).to eq "00279557281"
14
+ end
15
+
16
+ it "gets held_by" do
17
+ expect(meta.held_by).to eq "University of Utah, Salt Lake City, UT"
18
+ end
19
+
20
+ it "gets genre" do
21
+ expect(meta.genre).to eq 'microfilm'
22
+ end
23
+
24
+ it "gets title" do
25
+ expect(meta.title).to eq 'Daily national Democrat (Marysville, Calif.)'
26
+ end
27
+
28
+ it "gets start date" do
29
+ expect(meta.publication_date_start).to eq '1858-08-13'
30
+ end
31
+
32
+ it "gets end date" do
33
+ expect(meta.publication_date_end).to eq '1858-12-31'
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+ require 'ndnp_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::NDNP::IssueIngest do
5
+ include_context "ndnp fixture setup"
6
+
7
+ describe "sample fixture 'batch_local'" do
8
+ let(:issue) { described_class.new(issue1) }
9
+
10
+ it "gets metadata" do
11
+ expect(issue.metadata).to be_a NewspaperWorks::Ingest::NDNP::IssueMetadata
12
+ # uses same Nokogiri document context:
13
+ expect(issue.metadata.doc).to be issue.doc
14
+ # has identifier method equivalent to lccn
15
+ expect(issue.identifier).to eq issue.metadata.lccn
16
+ end
17
+
18
+ it "gets nil container for issue without reel XML" do
19
+ reel = issue.container
20
+ expect(reel).to be_nil
21
+ end
22
+
23
+ it "gets page by dmdid" do
24
+ page = issue.page_by_dmdid('pageModsBib8')
25
+ expect(page).to be_a NewspaperWorks::Ingest::NDNP::PageIngest
26
+ expect(page.metadata.page_sequence_number).to eq 1
27
+ expect(page.dmdid).to eq 'pageModsBib8'
28
+ end
29
+
30
+ it "gets page by sequence number" do
31
+ page = issue.page_by_sequence_number(1)
32
+ expect(page.metadata.page_sequence_number).to eq 1
33
+ expect(page.dmdid).to eq 'pageModsBib8'
34
+ page = issue.page_by_sequence_number(2)
35
+ expect(page.metadata.page_sequence_number).to eq 2
36
+ expect(page.dmdid).to eq 'pageModsBib6'
37
+ end
38
+
39
+ it "shares xml document context with contained pages" do
40
+ page = issue.page_by_sequence_number(1)
41
+ expect(page.doc).to be issue.doc
42
+ end
43
+
44
+ it "enumerates expected pages" do
45
+ # enumerate by casting issue to Array
46
+ pages = issue.to_a
47
+ expect(pages.size).to eq 2
48
+ expect(pages[0]).to be_a NewspaperWorks::Ingest::NDNP::PageIngest
49
+ expect(pages[0].metadata.page_sequence_number).to eq 1
50
+ end
51
+
52
+ it "gets size, in page count" do
53
+ pages = issue.to_a
54
+ expect(issue.size).to eq pages.size
55
+ expect(issue.size).to eq issue.dmdids.size
56
+ end
57
+ end
58
+
59
+ describe "sample fixture 'batch_test_ver01'" do
60
+ let(:issue) { described_class.new(issue2) }
61
+
62
+ it "gets a ContainerIngest for reel providing issue" do
63
+ reel = issue.container
64
+ expect(reel).to be_a NewspaperWorks::Ingest::NDNP::ContainerIngest
65
+ expect(reel.path).to end_with '_1.xml'
66
+ end
67
+
68
+ it "gets metadata" do
69
+ expect(issue.metadata).to be_a NewspaperWorks::Ingest::NDNP::IssueMetadata
70
+ # uses same Nokogiri document context:
71
+ expect(issue.metadata.doc).to be issue.doc
72
+ # has identifier method equivalent to lccn
73
+ expect(issue.identifier).to eq issue.metadata.lccn
74
+ end
75
+
76
+ it "gets page by dmdid" do
77
+ page = issue.page_by_dmdid('pageModsBib1')
78
+ expect(page).to be_a NewspaperWorks::Ingest::NDNP::PageIngest
79
+ expect(page.metadata.page_sequence_number).to eq 1
80
+ expect(page.dmdid).to eq 'pageModsBib1'
81
+ end
82
+
83
+ it "shares xml document context with contained pages" do
84
+ page = issue.page_by_sequence_number(1)
85
+ expect(page.doc).to be issue.doc
86
+ end
87
+
88
+ it "gets page by sequence number" do
89
+ page = issue.page_by_sequence_number(1)
90
+ expect(page.metadata.page_sequence_number).to eq 1
91
+ expect(page.dmdid).to eq 'pageModsBib1'
92
+ end
93
+
94
+ it "enumerates expected pages" do
95
+ # enumerate by casting issue to Array
96
+ pages = issue.to_a
97
+ expect(pages.size).to eq 1
98
+ expect(pages[0]).to be_a NewspaperWorks::Ingest::NDNP::PageIngest
99
+ expect(pages[0].metadata.page_sequence_number).to eq 1
100
+ end
101
+
102
+ it "gets size, in page count" do
103
+ pages = issue.to_a
104
+ expect(issue.size).to eq pages.size
105
+ expect(issue.size).to eq issue.dmdids.size
106
+ end
107
+ end
108
+ end