newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+ require 'faraday'
4
+
5
+ # test NewspaperPageIngest against work
6
+ RSpec.describe NewspaperWorks::Ingest::NewspaperPageIngest do
7
+ include_context "shared setup"
8
+
9
+ # define the path to the file we will use for multiple examples
10
+ let(:path) do
11
+ File.join(fixture_path, 'page1.tiff')
12
+ end
13
+
14
+ it_behaves_like('ingest adapter IO')
15
+
16
+ describe "file import and attachment" do
17
+ do_now_jobs = [
18
+ IngestJob,
19
+ IngestLocalFileJob,
20
+ InheritPermissionsJob,
21
+ VisibilityCopyJob
22
+ ]
23
+
24
+ permission_methods = [
25
+ :edit_users,
26
+ :read_users,
27
+ :discover_users,
28
+ :edit_groups,
29
+ :read_groups,
30
+ :discover_groups
31
+ ]
32
+
33
+ def verify_pcdm_fileset(fileset)
34
+ # Hyrax always sets label (if not title) on fileset:
35
+ expect(fileset.label).to eq 'page1.tiff'
36
+ # reload file set and check on original file
37
+ fileset.reload
38
+ file = fileset.original_file
39
+ expect(file).to be_a Hydra::PCDM::File
40
+ end
41
+
42
+ def verify_attached_file(work, path)
43
+ work.reload
44
+ files = NewspaperWorks::Data::WorkFiles.of(work)
45
+ expect(files.keys.size).to eq 1
46
+ expect(File.exist?(files.values[0].path)).to be true
47
+ expect(files.values[0].size).to eq File.size(path)
48
+ end
49
+
50
+ it "ingests file data and saves", perform_enqueued: do_now_jobs do
51
+ adapter = build(:newspaper_page_ingest)
52
+ adapter.ingest(path)
53
+ file_sets = adapter.work.members.select { |w| w.class == FileSet }
54
+ expect(file_sets.size).to eq 1
55
+ verify_pcdm_fileset(file_sets[0])
56
+ verify_attached_file(adapter.work, path)
57
+ end
58
+
59
+ # For (minimal) page objects constructed in tests, PDF ingest,
60
+ # permission setting on fileset happens as result of attachment
61
+ # process (`NewspaperWorks::Data::WorkFiles`), via the
62
+ # `NewspaperWorks::Data::handle_after_create_fileset` method,
63
+ # since the CreateWithRemoteFilesActor in Hyrax unfortunately
64
+ # does not invoke InheritPermissionJob.
65
+ it "copies work permissions to fileset", perform_enqueued: do_now_jobs do
66
+ adapter = build(:newspaper_page_ingest)
67
+ adapter.ingest(path)
68
+ adapter.work.reload
69
+ file_sets = adapter.work.members.select { |w| w.class == FileSet }
70
+ fileset = file_sets[0]
71
+ permission_methods.each do |m|
72
+ expect(fileset.send(m)).to match_array adapter.work.send(m)
73
+ end
74
+ expect(fileset.visibility).to eq adapter.work.visibility
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::PageImage do
4
+ include_context 'ingest test fixtures'
5
+
6
+ let(:lccn) { 'sn93059126' }
7
+
8
+ let(:issue_path) { File.join(tiff_fixtures, lccn, '1853060401') }
9
+
10
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
11
+
12
+ let(:issue) do
13
+ NewspaperWorks::Ingest::IssueImages.new(issue_path, publication)
14
+ end
15
+
16
+ describe "page construction and metadata" do
17
+ it "validates path to page image file" do
18
+ garbage_path = '/path/to/nonexistent'
19
+ expect { described_class.new(garbage_path, issue, 1) }.to raise_error ArgumentError
20
+ end
21
+
22
+ it "extracts page number, title from image filename" do
23
+ path = issue.keys[0]
24
+ page = described_class.new(path, issue, 1)
25
+ expect(page.page_number).to eq "1"
26
+ expect(page.title).to contain_exactly "The weekly journal: June 4, 1853: Page #{page.page_number}"
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::PdfImages do
4
+ let(:path) do
5
+ base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
6
+ base.join('sample-4page-issue.pdf').to_s
7
+ end
8
+ let(:pdfimages) { described_class.new(path) }
9
+
10
+ describe "get image sizing from PDF" do
11
+ it "gets width" do
12
+ expect(pdfimages.width).to be 7200
13
+ end
14
+
15
+ it "gets height" do
16
+ expect(pdfimages.height).to be 9600
17
+ end
18
+
19
+ it "gets ppi" do
20
+ expect(pdfimages.ppi).to be 400
21
+ end
22
+ end
23
+
24
+ describe "get image info from PDF" do
25
+ it "gets color info" do
26
+ color, channels, bits = pdfimages.color
27
+ expect(color).to eq 'gray'
28
+ expect(channels).to be 1
29
+ expect(bits).to be 1
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::PDFIssue do
4
+ include_context 'ingest test fixtures'
5
+
6
+ let(:lccn) { 'sn93059126' }
7
+
8
+ let(:pdf_path) { File.join(pdf_fixtures, lccn, '1853060401.pdf') }
9
+
10
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
11
+
12
+ describe "issue construction and metadata" do
13
+ it "constructs with path and publication" do
14
+ issue = described_class.new(pdf_path, publication)
15
+ expect(issue.path).to eq pdf_path
16
+ expect(issue.filename).to eq File.basename(pdf_path)
17
+ expect(issue.publication).to be publication
18
+ expect(issue.lccn).to eq lccn
19
+ expect(issue.publication.lccn).to eq lccn
20
+ end
21
+
22
+ it "extracts date, edition, title from filename" do
23
+ issue = described_class.new(pdf_path, publication)
24
+ expect(issue.publication_date).to eq '1853-06-04'
25
+ expect(issue.edition_number).to eq 1
26
+ expect(issue.title).to contain_exactly 'The weekly journal: June 4, 1853'
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::PDFIssues do
4
+ include_context 'ingest test fixtures'
5
+
6
+ let(:lccn) { 'sn93059126' }
7
+
8
+ let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
9
+
10
+ let(:pub_path) { File.join(pdf_fixtures, lccn) }
11
+
12
+ describe " construction and metadata" do
13
+ it "constructs with path and publication" do
14
+ issues = described_class.new(pub_path, publication)
15
+ expect(issues.path).to eq pub_path
16
+ expect(issues.publication).to be publication
17
+ expect(issues.lccn).to eq lccn
18
+ expect(issues.publication.lccn).to eq lccn
19
+ end
20
+
21
+ it "enumerates valid pdfs as PDFIssue objects" do
22
+ issues = described_class.new(pub_path, publication)
23
+ expect(issues.size).to eq 5
24
+ enumerated = issues.values
25
+ expect(enumerated.size).to eq issues.size
26
+ sample = enumerated[0]
27
+ expect(sample).to be_a NewspaperWorks::Ingest::PDFIssue
28
+ expect(File.dirname(sample.path)).to eq pub_path
29
+ end
30
+
31
+ it "presents hash-like mapping behavior" do
32
+ issues = described_class.new(pub_path, publication)
33
+ expected_paths = Dir.entries(pub_path).map { |p| File.join(pub_path, p) }
34
+ expected_paths = expected_paths.select { |p| p.end_with?('.pdf') }
35
+ # Keys are paths to file:
36
+ expect(issues.keys).to match_array expected_paths
37
+ # info and [] methods get PDFIssue for given path key:
38
+ issue1 = issues[issues.keys[0]]
39
+ issue2 = issues.info(issues.keys[1])
40
+ expect(issue1).to be_a NewspaperWorks::Ingest::PDFIssue
41
+ expect(issue2).to be_a NewspaperWorks::Ingest::PDFIssue
42
+ expect(issue1.path).to eq issues.keys[0]
43
+ end
44
+
45
+ it "enumerates pairs like a hash" do
46
+ issues = described_class.new(pub_path, publication)
47
+ expected_paths = Dir.entries(pub_path).map { |p| File.join(pub_path, p) }
48
+ issues.each_value do |v|
49
+ expect(v).to be_a NewspaperWorks::Ingest::PDFIssue
50
+ end
51
+ issues.each_key do |k|
52
+ expect(expected_paths).to include k
53
+ end
54
+ issues.each do |path, info|
55
+ expect(expected_paths).to include path
56
+ expect(info).to be_a NewspaperWorks::Ingest::PDFIssue
57
+ expect(info.path).to eq path
58
+ end
59
+ expect(issues.to_a.size).to eq 5
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,110 @@
1
+ require 'spec_helper'
2
+ require 'mini_magick'
3
+
4
+ RSpec.describe NewspaperWorks::Ingest::PdfPages do
5
+ let(:sample1) do
6
+ base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
7
+ base.join('sample-4page-issue.pdf').to_s
8
+ end
9
+ let(:sample2) do
10
+ base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
11
+ base.join('sample-color-newsletter.pdf').to_s
12
+ end
13
+ let(:sample3) do
14
+ base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
15
+ base.join('ndnp-sample1.pdf').to_s
16
+ end
17
+ let(:onebitpages) { described_class.new(sample1) }
18
+ let(:colorpages) { described_class.new(sample2) }
19
+ let(:graypages) { described_class.new(sample3) }
20
+
21
+ describe "implementation details" do
22
+ it "pdfinfo gets PdfImages, memoized" do
23
+ pdfimages = onebitpages.pdfinfo
24
+ expect(pdfimages).to be_a(NewspaperWorks::Ingest::PdfImages)
25
+ pdfimages2 = onebitpages.pdfinfo
26
+ # same object, method only fetches once:
27
+ expect(pdfimages2).to equal pdfimages
28
+ end
29
+
30
+ it "gets correct Ghostscript TIFF output" do
31
+ expect(onebitpages.gsdevice).to eq 'tiffg4'
32
+ expect(colorpages.gsdevice).to eq 'tiff24nc'
33
+ end
34
+
35
+ it "gets text elements saved in PDF" do
36
+ # should be little to nothing in scanned work, besides
37
+ # output of Ghostscript banner:
38
+ expect(onebitpages.gstext.length).to eq 0
39
+ # the color sample is born-digital and thus has text in PDF;
40
+ # this checks for > 160 (non-trivial) text, though this text
41
+ # stream is at least 6k, if you strip out excess whitespace.
42
+ expect(colorpages.gstext.length).to be > 160
43
+ end
44
+
45
+ it "gets reasonable ppi" do
46
+ # 400 ppi native:
47
+ expect(onebitpages.ppi).to eq 400
48
+ # sourced from scan:
49
+ expect(onebitpages.ppi).to eq onebitpages.pdfinfo.ppi
50
+ # digital native content gets forced to 400 ppi...
51
+ expect(colorpages.ppi).to eq 400
52
+ # ...because the images in this sample are not reasonably
53
+ # representative, due to low PPI (not scans of whole pages):
54
+ expect(colorpages.ppi).to be > colorpages.pdfinfo.ppi
55
+ end
56
+ end
57
+
58
+ describe "splits PDF into pages with TIFF tmpfiles" do
59
+ it "page filenames of TIFF files are ordered" do
60
+ pages = colorpages.entries
61
+ pages.each_with_index do |path, idx|
62
+ n = idx + 1
63
+ expect(path).to match(/page#{n}.tiff/)
64
+ end
65
+ end
66
+
67
+ it "color sample splits into color TIFF per page" do
68
+ pages = colorpages.entries
69
+ pages.each do |path|
70
+ image = MiniMagick::Image.open(path)
71
+ expect(image.mime_type).to eq 'image/tiff'
72
+ expect(image.colorspace).to start_with 'DirectClass sRGB'
73
+ end
74
+ end
75
+
76
+ it "one bit sample splits into Group 4 TIFF per page" do
77
+ pages = onebitpages.entries
78
+ pages.each do |path|
79
+ Open3.popen3("identify #{path}") do |_stdin, stdout, _stderr, _wait_thr|
80
+ output = stdout.read
81
+ expect(output).to include '1-bit'
82
+ expect(output).to include 'Bilevel'
83
+ expect(output).to include 'TIFF'
84
+ end
85
+ end
86
+ end
87
+
88
+ it "one bit sample is 7200x9600 scan, verify" do
89
+ pages = onebitpages.entries
90
+ pages.each do |path|
91
+ image = MiniMagick::Image.open(path)
92
+ expect(image.width).to eq 7200
93
+ expect(image.height).to eq 9600
94
+ end
95
+ end
96
+
97
+ it "processes Grayscale NDNP PDF correctly" do
98
+ pages = graypages.entries
99
+ expect(pages.length).to eq 1
100
+ pages.each do |path|
101
+ Open3.popen3("identify #{path}") do |_stdin, stdout, _stderr, _wait_thr|
102
+ output = stdout.read
103
+ expect(output).to include 'Grayscale'
104
+ expect(output).to include '8-bit'
105
+ expect(output).to include 'TIFF'
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::PubFinder do
4
+ describe "mixin publication find-or-create module" do
5
+ let(:klass) do
6
+ Class.new do
7
+ include NewspaperWorks::Ingest::PubFinder
8
+ end
9
+ end
10
+
11
+ before do
12
+ ['sn2099999999', 'sn2036999999', 'sn82014496'].each do |lccn|
13
+ NewspaperTitle.where(lccn: lccn).delete_all
14
+ end
15
+ end
16
+
17
+ # use factory for saved NewspaperIssue:
18
+ let(:issue) { create(:newspaper_issue) }
19
+
20
+ let(:ingester) { klass.new }
21
+
22
+ let(:publication) { create(:newspaper_title) }
23
+
24
+ it "finds existing publication, if it exists" do
25
+ lccn = publication.lccn
26
+ expect(ingester.find_publication(lccn)).to be_a NewspaperTitle
27
+ end
28
+
29
+ it "links existing publication on find-or-create" do
30
+ lccn = publication.lccn
31
+ ingester.find_or_create_publication_for_issue(issue, lccn, nil, {})
32
+ publication.reload
33
+ expect(publication.members.to_a).to include issue
34
+ end
35
+
36
+ it "links issue to new publication" do
37
+ lccn = 'sn2099999999'
38
+ expect(ingester.find_publication(lccn)).to be_nil
39
+ ingester.find_or_create_publication_for_issue(issue, lccn, nil, {})
40
+ publication = ingester.find_publication(lccn)
41
+ expect(publication).to be_a NewspaperTitle
42
+ expect(publication.members.to_a).to include issue
43
+ end
44
+
45
+ it "copies metadata for created publication" do
46
+ lccn = 'sn82014496'
47
+ expect(ingester.find_publication(lccn)).to be_nil
48
+ publication = ingester.create_publication(lccn, nil, {})
49
+ expect(publication.title).to contain_exactly "Rocky Mountain news"
50
+ expect(publication.place_of_publication.map { |v| v.to_uri.to_s }).to \
51
+ contain_exactly(
52
+ "http://sws.geonames.org/5419384/"
53
+ )
54
+ expect(publication.language).to contain_exactly 'eng'
55
+ expect(publication.oclcnum).to eq 'ocm03946163'
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Ingest::PublicationInfo do
4
+ # prefers lccn.loc.gov:
5
+ let(:lccn1) { 'sn83021453' }
6
+ let(:lccn2) { 'sn83045396' }
7
+ # prefers ChronAm:
8
+ let(:lccn3) { 'sn94051019' }
9
+ let(:bad_lccn) { 'sn99999999' }
10
+
11
+ describe "gets metadata" do
12
+ it "gets simple metadata" do
13
+ meta = described_class.new(lccn1)
14
+ expect(meta.title).to eq 'Salt Lake daily tribune'
15
+ expect(meta.issn).to be_nil
16
+ expect(meta.oclcnum).to eq 'ocm10170377'
17
+ expect(meta.place_name).to eq 'Salt Lake City, Utah'
18
+ expect(meta.place_of_publication).to eq 'http://sws.geonames.org/5780993/'
19
+ end
20
+
21
+ it "gets related item metadata" do
22
+ meta1 = described_class.new(lccn1)
23
+ meta2 = described_class.new(lccn2)
24
+ # lccn2 succeeds lccn1, favors lccn.loc.gov URL as authoritative:
25
+ expect(meta1.succeeded_by).to eq "https://lccn.loc.gov/#{lccn2}"
26
+ # lccn1 precedes lccn2, favors lccn.loc.gov URL as authoritative:
27
+ expect(meta2.preceded_by).to eq "https://lccn.loc.gov/sn83021453"
28
+ end
29
+ end
30
+
31
+ describe "backing authority choice" do
32
+ it "picks default authority of lccn.loc.gov" do
33
+ meta = described_class.new(lccn1)
34
+ expect(meta.implementation).to be_a NewspaperWorks::Ingest::LCPublicationInfo
35
+ end
36
+
37
+ it "picks chronam implementation when lccn.loc.gov empty for LCCN" do
38
+ meta = described_class.new(lccn3)
39
+ expect(meta.implementation).to be_a NewspaperWorks::Ingest::ChronAmPublicationInfo
40
+ end
41
+
42
+ it "responds to known metadata" do
43
+ meta = described_class.new(lccn3)
44
+ expect(meta).to respond_to(:lccn)
45
+ expect(meta).to respond_to(:issn)
46
+ expect(meta).to respond_to(:title)
47
+ expect(meta).to respond_to(:oclcnum)
48
+ expect(meta).to respond_to(:place_name)
49
+ expect(meta).to respond_to(:place_of_publication)
50
+ expect(meta).to respond_to(:preceded_by)
51
+ expect(meta).to respond_to(:succeeded_by)
52
+ end
53
+ end
54
+
55
+ describe "error handling" do
56
+ it "handles unknown LCCN (empty mods)" do
57
+ meta = described_class.new(bad_lccn)
58
+ expect(meta.empty?).to be true
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ describe NewspaperWorks::Ingest do
4
+ describe "Ingest module methods" do
5
+ it "gets default admin set" do
6
+ admin_set = described_class.find_admin_set
7
+ expect(admin_set).to be_an AdminSet
8
+ expect(admin_set.id).to eq AdminSet::DEFAULT_ID
9
+ end
10
+
11
+ # initial expectations of a just-created work without administrative
12
+ # metadata set yet; AKA the "before" picture
13
+ def expect_initial_work_state(work)
14
+ expect(work.admin_set).to be_nil
15
+ expect(work.depositor).to be_nil
16
+ expect(work.visibility).to eq 'restricted'
17
+ expect(work.date_modified).to be_nil
18
+ expect(work.date_uploaded).to be_nil
19
+ expect(work.resource_type).to be_empty
20
+ expect(work.state).to be_nil
21
+ end
22
+
23
+ it "sets default assigned metadata for a work" do
24
+ work = NewspaperTitle.create!(title: ["hello"])
25
+ expect_initial_work_state(work)
26
+ described_class.assign_administrative_metadata(work)
27
+ expect(work.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
28
+ expect(work.depositor).to eq User.batch_user.user_key
29
+ expect(work.visibility).to eq 'open'
30
+ expect(work.state).to be_an ActiveTriples::Resource
31
+ expect(work.state.to_uri.to_s).to eq \
32
+ 'http://fedora.info/definitions/1/0/access/ObjState#active'
33
+ expect(work.date_uploaded).to be_a DateTime
34
+ expect(work.date_modified).to eq work.date_uploaded
35
+ expect(work.resource_type).to match_array ['Newspapers']
36
+ end
37
+
38
+ it "has method to get publication metadata for lccn" do
39
+ lccn = 'sn84038814'
40
+ metadata = described_class.publication_metadata(lccn)
41
+ expect(metadata).to be_a NewspaperWorks::Ingest::PublicationInfo
42
+ expect(metadata.lccn).to eq lccn
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,101 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::IssuePDFComposer do
4
+ let(:bare_issue) do
5
+ build(:newspaper_issue)
6
+ end
7
+
8
+ let(:fixtures_path) do
9
+ fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
10
+ Hyrax.config.whitelisted_ingest_dirs.push(fixtures)
11
+ fixtures
12
+ end
13
+
14
+ let(:pdf_path) do
15
+ File.join(fixtures_path, 'minimal-1-page.pdf')
16
+ end
17
+
18
+ let(:broken_pdf) do
19
+ File.join(fixtures_path, 'broken-truncated.pdf')
20
+ end
21
+
22
+ def page_with_pdf(name, path)
23
+ # empty+saved fileset: only need id, no primary file, to attach derivatives
24
+ fs = FileSet.create!
25
+ page = NewspaperPage.create!(title: [name])
26
+ page.members << fs
27
+ page.save!
28
+ derivatives = NewspaperWorks::Data::WorkDerivatives.of(page)
29
+ derivatives.assign(path)
30
+ derivatives.commit!
31
+ page
32
+ end
33
+
34
+ let(:page1_with_pdf) { page_with_pdf('Page 1', pdf_path) }
35
+ let(:page2_with_pdf) { page_with_pdf('Page 2', pdf_path) }
36
+
37
+ let(:broken_page) { page_with_pdf('Broken Page', broken_pdf) }
38
+
39
+ let(:two_page_issue) do
40
+ issue = NewspaperIssue.create(title: ['Issue Test'])
41
+ issue.ordered_members << page1_with_pdf
42
+ issue.ordered_members << page2_with_pdf
43
+ issue.save!
44
+ issue
45
+ end
46
+
47
+ let(:unfinished_issue) do
48
+ issue = NewspaperIssue.create(title: ['Unfinished issue'])
49
+ issue.members << FileSet.create!
50
+ issue.save!
51
+ issue.ordered_members << broken_page
52
+ issue.save!
53
+ issue
54
+ end
55
+
56
+ describe "adapter construction" do
57
+ it "constructs adapter" do
58
+ composer = described_class.new(bare_issue)
59
+ expect(composer.issue).to be bare_issue
60
+ expect(composer.page_pdfs).to match_array []
61
+ end
62
+ end
63
+
64
+ describe "Validation and handling of not-yet-ready pages" do
65
+ it "validates PDFs" do
66
+ # we can fake issue context with nil on construction to call validate_pdf
67
+ composer = described_class.new(nil)
68
+ expect(composer.validate_pdf(broken_pdf)).to be false
69
+ expect(composer.validate_pdf(pdf_path)).to be true
70
+ end
71
+
72
+ it "raises NewspaperWorks::PagesNotReady on incomplete PDF" do
73
+ composer = described_class.new(unfinished_issue)
74
+ expect { composer.compose }.to raise_error(NewspaperWorks::PagesNotReady)
75
+ end
76
+ end
77
+
78
+ describe "Construction, attachment of combined PDF" do
79
+ do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
80
+
81
+ def files_of(work)
82
+ NewspaperWorks::Data::WorkFiles.of(work)
83
+ end
84
+
85
+ it "creates issue PDF from sources", perform_enqueued: do_now_jobs do
86
+ composer = described_class.new(two_page_issue)
87
+ # no (primary) files attached to issue yet:
88
+ expect(files_of(two_page_issue).keys.size).to eq 0
89
+ # Make the mulit-page-pdf with IssuePDFComposer#compose:
90
+ composer.compose
91
+ # reload issue files, as they have been updated; check for PDF:
92
+ two_page_issue.reload
93
+ files = files_of(two_page_issue)
94
+ expect(files.keys.size).to eq 1
95
+ # getting path initiates a repository checkout of file:
96
+ path = files.values[0].path
97
+ # we found a PDF, simple check only extension (not validating):
98
+ expect(path.end_with?('pdf')).to be true
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ describe NewspaperWorks::Logging do
4
+ describe "mixin logging module" do
5
+ let(:klass) do
6
+ Class.new do
7
+ include NewspaperWorks::Logging
8
+ end
9
+ end
10
+
11
+ let(:loggable) { klass.new }
12
+
13
+ let(:configured) do
14
+ obj = loggable
15
+ # expectation is that this is called by consuming class constructor:
16
+ obj.configure_logger('ingest-test')
17
+ obj
18
+ end
19
+
20
+ it "requires configuration by consuming class" do
21
+ name = 'random_testing_logname'
22
+ expect(loggable.instance_variable_get(:@logger)).to be_nil
23
+ expect(described_class.configured).not_to include name
24
+ loggable.configure_logger(name)
25
+ expect(loggable.instance_variable_get(:@logger)).not_to be_nil
26
+ expect(described_class.configured).to include name
27
+ end
28
+
29
+ it "logs formatted message to rails logger with write_log" do
30
+ message = "FYI: heads-up, this is a message"
31
+ expect(Rails.logger).to receive(:add).with(
32
+ Logger::INFO,
33
+ configured.message_format(message),
34
+ nil
35
+ )
36
+ configured.write_log(message)
37
+ end
38
+
39
+ it "writes to named log file" do
40
+ # need to reset global de-dupe state for additional logger, just for
41
+ # purposes of this test
42
+ described_class.configured = []
43
+ message = "Instant coffee"
44
+ named_log = configured.instance_variable_get(:@named_log)
45
+ expect(named_log).to receive(:add).with(
46
+ Logger::INFO,
47
+ configured.message_format(message),
48
+ nil
49
+ )
50
+ configured.write_log(message)
51
+ end
52
+ end
53
+ end