newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,86 @@
1
+ require 'spec_helper'
2
+ require 'model_shared'
3
+
4
+ RSpec.describe NewspaperWorks::IndexesRelationships do
5
+ # use an instance var so we can create fixtures only once
6
+ before(:all) { @page_for_indexrel, @page2 = model_fixtures(:newspaper_pages) }
7
+ let(:page_indexer) { NewspaperPageIndexer.new(@page_for_indexrel) }
8
+ let(:solr_doc) { {} }
9
+
10
+ describe '#index_relationships' do
11
+ before { page_indexer.index_relationships(@page_for_indexrel, solr_doc) }
12
+ it 'sets the relationship fields correctly' do
13
+ expect(solr_doc['publication_id_ssi']).not_to be_falsey
14
+ expect(solr_doc['container_id_ssi']).not_to be_falsey
15
+ expect(solr_doc['issue_id_ssi']).not_to be_falsey
16
+ expect(solr_doc['article_ids_ssim']).not_to be_falsey
17
+ end
18
+ end
19
+
20
+ describe '#index_publication' do
21
+ before { page_indexer.index_publication(@page_for_indexrel, solr_doc) }
22
+ it 'sets the publication title fields correctly' do
23
+ expect(solr_doc['publication_id_ssi']).not_to be_falsey
24
+ expect(solr_doc['publication_title_ssi']).to eq("Yesterday's News")
25
+ expect(solr_doc['publication_unique_id_ssi']).to eq("sn1234567")
26
+ end
27
+ end
28
+
29
+ describe '#index_container' do
30
+ before { page_indexer.index_container(@page_for_indexrel, solr_doc) }
31
+ it 'sets the container fields correctly' do
32
+ expect(solr_doc['container_id_ssi']).not_to be_falsey
33
+ expect(solr_doc['container_title_ssi']).to eq('Reel123a')
34
+ end
35
+ end
36
+
37
+ describe '#index_issue' do
38
+ before { page_indexer.index_issue(@page_for_indexrel, solr_doc) }
39
+ it 'sets the issue fields correctly' do
40
+ expect(solr_doc['issue_id_ssi']).not_to be_falsey
41
+ expect(solr_doc['issue_title_ssi']).to eq('December 7, 1941')
42
+ expect(solr_doc['issue_edition_number_ssi']).to eq('1')
43
+ expect(solr_doc['publication_date_dtsi']).to eq('1941-12-07T00:00:00Z')
44
+ end
45
+ end
46
+
47
+ describe '#index_pages' do
48
+ let(:article) { @page_for_indexrel.articles.first }
49
+ let(:article_indexer) { NewspaperArticleIndexer.new(article) }
50
+ before { page_indexer.index_pages(article, solr_doc) }
51
+ it 'sets the issue fields correctly' do
52
+ expect(solr_doc['page_ids_ssim'].first).to eq(@page_for_indexrel.id)
53
+ expect(solr_doc['page_titles_ssim'].first).to eq('Page 1')
54
+ end
55
+ end
56
+
57
+ describe '#index_siblings' do
58
+ let(:solr_doc_2) { {} }
59
+ before do
60
+ page_indexer.index_siblings(@page_for_indexrel, solr_doc)
61
+ page_indexer.index_siblings(@page2, solr_doc_2)
62
+ end
63
+ it 'sets the prev/next fields correctly' do
64
+ expect(solr_doc['is_preceding_page_of_ssi']).not_to be_falsey
65
+ expect(solr_doc['is_following_page_of_ssi']).to be_nil
66
+ expect(solr_doc['first_page_bsi']).to be_truthy
67
+ expect(solr_doc_2['is_preceding_page_of_ssi']).to be_nil
68
+ expect(solr_doc_2['is_following_page_of_ssi']).not_to be_falsey
69
+ end
70
+ end
71
+
72
+ describe '#index_articles' do
73
+ before { page_indexer.index_articles(@page_for_indexrel, solr_doc) }
74
+ it 'sets the article fields correctly' do
75
+ expect(solr_doc['article_ids_ssim']).not_to be_blank
76
+ expect(solr_doc['article_titles_ssim'].first).to eq('Happening now')
77
+ end
78
+ end
79
+
80
+ describe '#index_parent_facets' do
81
+ before { page_indexer.index_parent_facets(@page_for_indexrel.issue, solr_doc) }
82
+ it 'sets the facet fields correctly' do
83
+ expect(solr_doc['language_sim']).not_to be_blank
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperArticleIndexer do
4
+ let(:article) do
5
+ NewspaperArticle.new(
6
+ id: 'foo1234',
7
+ title: ['Whatever'],
8
+ genre: %w[http://id.loc.gov/vocabulary/graphicMaterials/tgm000098 FOO]
9
+ )
10
+ end
11
+ let(:indexer) { described_class.new(article) }
12
+
13
+ describe '#generate_solr_document' do
14
+ subject { indexer.generate_solr_document }
15
+
16
+ it 'adds the correct fields to the Solr document' do
17
+ expect(subject['genre_tesim']).not_to be_falsey
18
+ expect(subject['genre_sim']).not_to be_falsey
19
+ end
20
+
21
+ it 'indexes genre terms with a URI correctly' do
22
+ expect(subject['genre_tesim']).to include('Advertisement')
23
+ end
24
+
25
+ it 'indexes genre terms without a URI correctly' do
26
+ expect(subject['genre_sim']).to include('FOO')
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,19 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperIssueIndexer do
4
+ let(:issue) do
5
+ NewspaperIssue.new(
6
+ id: 'foo1234',
7
+ title: ['Whatever']
8
+ )
9
+ end
10
+ let(:indexer) { described_class.new(issue) }
11
+
12
+ describe '#generate_solr_document' do
13
+ subject { indexer.generate_solr_document }
14
+
15
+ it 'adds the default edition field to the Solr document' do
16
+ expect(subject['edition_number_tesim']).to eq('1')
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperTitleIndexer do
4
+ let(:ntitle) do
5
+ NewspaperTitle.new(
6
+ id: 'foo1234',
7
+ title: ['Whatever'],
8
+ publication_date_start: '1975',
9
+ publication_date_end: '1995'
10
+ )
11
+ end
12
+ let(:indexer) { described_class.new(ntitle) }
13
+
14
+ describe '#generate_solr_document' do
15
+ subject { indexer.generate_solr_document }
16
+
17
+ it 'indexes date ranges correctly' do
18
+ expect(subject['publication_date_start_dtsi']).to eq('1975-01-01T00:00:00Z')
19
+ expect(subject['publication_date_end_dtsi']).to eq('1995-12-31T23:59:59Z')
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,23 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::NewspaperCoreIndexer do
4
+ let(:geonames_uri) { 'http://sws.geonames.org/4950065/' }
5
+ let(:pop) { Hyrax::ControlledVocabularies::Location.new(geonames_uri) }
6
+ let(:article) do
7
+ NewspaperArticle.new(
8
+ id: 'foo1234',
9
+ title: ['Whatever'],
10
+ place_of_publication: pop
11
+ )
12
+ end
13
+ let(:indexer) { described_class.new(article) }
14
+
15
+ describe '#generate_solr_document' do
16
+ subject { indexer.generate_solr_document }
17
+ it 'processes place_of_publication field' do
18
+ expect(subject['place_of_publication_tesim']).to include(geonames_uri)
19
+ expect(subject['place_of_publication_city_sim']).to include('Salem')
20
+ expect(subject['place_of_publication_state_sim']).to include('Massachusetts')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe NewspaperWorks::Configuration do
4
+ subject { described_class.new }
5
+
6
+ it { is_expected.to respond_to(:publication_unique_id_property) }
7
+ it { is_expected.to respond_to(:publication_unique_id_field) }
8
+
9
+ describe '#publicationunique_id_property' do
10
+ subject { described_class.new.publication_unique_id_property }
11
+ it { is_expected.to eq(:lccn) }
12
+ end
13
+
14
+ describe '#publication_unique_id_field' do
15
+ subject { described_class.new.publication_unique_id_field }
16
+ it { is_expected.to eq('lccn_tesim') }
17
+ end
18
+ end
@@ -0,0 +1,245 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+ require 'misc_shared'
5
+
6
+ RSpec.describe NewspaperWorks::Data::WorkDerivatives do
7
+ include_context "shared setup"
8
+
9
+ let(:bare_work) do
10
+ work = NewspaperPage.new
11
+ work.title = ['Another one']
12
+ work.save!
13
+ work
14
+ end
15
+
16
+ let(:work) do
17
+ # sample work comes from shared setup, but we need derivative, save...
18
+ mk_txt_derivative(sample_work)
19
+ sample_work.save!(validate: false)
20
+ sample_work
21
+ end
22
+
23
+ let(:adapter) { described_class.new(work) }
24
+
25
+ let(:txt1) do
26
+ whitelist = Hyrax.config.whitelisted_ingest_dirs
27
+ whitelist.push('/tmp') unless whitelist.include?('/tmp')
28
+ file = Tempfile.new(['txt1', '.txt'])
29
+ file.write('hello')
30
+ file.flush
31
+ file
32
+ end
33
+
34
+ let(:txt2) do
35
+ file = Tempfile.new('txt2.txt')
36
+ file.write('bye')
37
+ file.flush
38
+ file
39
+ end
40
+
41
+ let(:encoded_text) do
42
+ file = Tempfile.new('txt_encoded.txt', encoding: 'UTF-8')
43
+ file.write('Gorgonzola Dolce® — on sale for £12.50/kg')
44
+ file.flush
45
+ file
46
+ end
47
+
48
+ describe "enumerates available derivatives like hash" do
49
+ it "includes expected derivative path for work" do
50
+ expect(adapter.keys).to include 'txt'
51
+ end
52
+
53
+ it "can be introspected for quantity of derivatives" do
54
+ # `size` method without argument is count of derivatives,
55
+ # functions equivalently to adapter.keys.size
56
+ expect(adapter.size).to eq adapter.keys.size
57
+ end
58
+
59
+ it "enumerates expected derivative extension for work" do
60
+ ext_found = adapter.keys
61
+ expect(ext_found).to include 'txt'
62
+ end
63
+
64
+ it "enumerates expected derivative extension for file set" do
65
+ file_set = work.members.select { |m| m.class == FileSet }[0]
66
+ adapter = described_class.new(file_set)
67
+ ext_found = adapter.keys
68
+ expect(ext_found).to include 'txt'
69
+ end
70
+
71
+ it "enumerates expected derivative extension for file set id" do
72
+ file_set = work.members.select { |m| m.class == FileSet }[0]
73
+ adapter = described_class.new(file_set.id)
74
+ ext_found = adapter.keys
75
+ expect(ext_found).to include 'txt'
76
+ end
77
+ end
78
+
79
+ describe "loads derivatives for a work" do
80
+ it "Loads text derivative path" do
81
+ expect(File.exist?(adapter.path('txt'))).to be true
82
+ expect(adapter.exist?('txt')).to be true
83
+ end
84
+
85
+ it "Loads text derivative data" do
86
+ expect(adapter.data('txt')).to include 'mythical'
87
+ end
88
+
89
+ it "Handles character encoding on read" do
90
+ # replace fixture text derivative for work with encoded text
91
+ adapter.attach(encoded_text.path, 'txt')
92
+ data = adapter.data('txt')
93
+ expect(data).to include '—' # em-dash
94
+ expect(data).to include '£' # gb-pound sign
95
+ expect(data.encoding.to_s).to eq 'UTF-8'
96
+ end
97
+
98
+ it "Loads thumbnail derivative data" do
99
+ mk_thumbnail_derivative(work)
100
+ # get size by loading data
101
+ expect(adapter.data('thumbnail').bytes.size).to eq 16_743
102
+ # get size by File.size via .size method
103
+ expect(adapter.size('thumbnail')).to eq 16_743
104
+ end
105
+
106
+ it "Can access jp2 derivative" do
107
+ mk_jp2_derivative(work)
108
+ expect(File.exist?(adapter.path('jp2'))).to be true
109
+ expect(adapter.exist?('jp2')).to be true
110
+ end
111
+ end
112
+
113
+ describe "create, update, delete derivatives" do
114
+ it "will queue derivative file assignment" do
115
+ adapter = described_class.new(bare_work)
116
+ adapter.assign(example_gray_jp2)
117
+ expect(adapter.assigned).to include example_gray_jp2
118
+ end
119
+
120
+ it "will fail to assign file in non-whitelisted dir" do
121
+ adapter = described_class.new(bare_work)
122
+ # need a non-whitlisted file that exists:
123
+ bad_path = File.expand_path("../../spec_helper.rb", fixture_path)
124
+ expect { adapter.assign(bad_path) }.to raise_error(SecurityError)
125
+ end
126
+
127
+ it "will remove file assignment from queue" do
128
+ adapter = described_class.new(bare_work)
129
+ expect(adapter.state).to eq 'empty'
130
+ adapter.assign(example_gray_jp2)
131
+ expect(adapter.assigned).to include example_gray_jp2
132
+ expect(adapter.state).to eq 'dirty'
133
+ adapter.unassign(example_gray_jp2)
134
+ expect(adapter.assigned).not_to include example_gray_jp2
135
+ expect(adapter.state).to eq 'empty'
136
+ end
137
+
138
+ it "will queue a deletion" do
139
+ # Given a work with a derivative (txt) already assigned
140
+ expect(adapter.state).to eq 'saved'
141
+ # unassigning path...
142
+ adapter.unassign('txt')
143
+ # will lead to queued unassignment (intent to delete)...
144
+ expect(adapter.unassigned).to include 'txt'
145
+ # and a 'dirty' adapter state (unflushed changes):
146
+ expect(adapter.state).to eq 'dirty'
147
+ end
148
+
149
+ it "will flush a removal and addition on commit!" do
150
+ # Given a work with a derivative (txt) already assigned
151
+ expect(adapter.keys).to include 'txt'
152
+ expect(adapter.keys).not_to include 'jp2'
153
+ # unassigning path...
154
+ adapter.unassign('txt')
155
+ # and assigning another attachment:
156
+ adapter.assign(example_gray_jp2)
157
+ # ...committing these will flush the changes (synchronously):
158
+ adapter.commit!
159
+ expect(adapter.keys).not_to include 'txt'
160
+ expect(adapter.keys).to include 'jp2'
161
+ expect(adapter.size('jp2')).to eq 27_703
162
+ end
163
+
164
+ it "can attach derivative from file" do
165
+ expect(adapter.keys).not_to include 'jp2'
166
+ adapter.attach(example_gray_jp2, 'jp2')
167
+ expect(adapter.exist?('jp2')).to be true
168
+ expect(adapter.path('jp2')).not_to be nil
169
+ expect(File.size(adapter.path('jp2'))).to eq File.size(example_gray_jp2)
170
+ expect(adapter.keys).to include 'jp2'
171
+ d_path = path_factory.derivative_path_for_reference(adapter.fileset_id, 'jp2')
172
+ expect(adapter.values).to include d_path
173
+ end
174
+
175
+ it "can replace aderivative with new attachment" do
176
+ adapter.attach(txt1.path, 'txt')
177
+ expect(adapter.data('txt')).to eq 'hello'
178
+ adapter.attach(txt2.path, 'txt')
179
+ expect(adapter.data('txt')).to eq 'bye'
180
+ end
181
+
182
+ it "can delete an attached derivative" do
183
+ adapter.attach(txt1.path, 'txt')
184
+ expect(adapter.keys).to include 'txt'
185
+ expect(adapter.data('txt')).to eq 'hello'
186
+ adapter.delete('txt')
187
+ expect(adapter.path('txt')).to be nil
188
+ expect(adapter.keys).not_to include 'txt'
189
+ end
190
+
191
+ it "persists log of attachment to RDBMS" do
192
+ adapter.assign(txt1.path)
193
+ result = NewspaperWorks::DerivativeAttachment.find_by(
194
+ fileset_id: adapter.fileset.id,
195
+ path: txt1.path,
196
+ destination_name: 'txt'
197
+ )
198
+ expect(result).not_to be_nil
199
+ end
200
+
201
+ it "persists a log of path relation to primary file" do
202
+ # this is an integration test by practical necessity, with
203
+ # WorkFiles adapting a bare work with no fileset.
204
+ work_files = NewspaperWorks::Data::WorkFiles.of(bare_work)
205
+ work_files.assign(example_gray_jp2)
206
+ adapter = work_files.derivatives
207
+ adapter.assign(txt1.path)
208
+ result = NewspaperWorks::IngestFileRelation.find_by(
209
+ derivative_path: txt1.path,
210
+ file_path: example_gray_jp2
211
+ )
212
+ expect(result).not_to be_nil
213
+ end
214
+
215
+ it "commits queued derivatives" do
216
+ NewspaperWorks::IngestFileRelation.where(file_path: example_gray_jp2).delete_all
217
+ work_files = NewspaperWorks::Data::WorkFiles.of(bare_work)
218
+ work_files.assign(example_gray_jp2)
219
+ adapter = work_files.derivatives
220
+ adapter.assign(txt1.path)
221
+ expect(File.exist?(txt1.path)).to be true
222
+ expect(adapter.keys.size).to eq 0
223
+ # we need a fileset, saved with import_url, attached to work:
224
+ fileset = valid_file_set
225
+ fileset.import_url = 'file://' + example_gray_jp2
226
+ fileset.save!
227
+ bare_work.members.push(fileset)
228
+ bare_work.save!
229
+ fileset.reload
230
+ expect(fileset.member_of[0].id).to eq bare_work.id
231
+ # with a new adapter instance...
232
+ adapter2 = described_class.of(bare_work)
233
+ # call .commit_queued! with our fileset...
234
+ expect(File.exist?(txt1.path)).to be true
235
+ adapter2.commit_queued!(fileset)
236
+ # ...which should result in saved, reloaded derivative...
237
+ expect(adapter2.keys.size).to eq 1
238
+ expect(File.size(adapter2.values[0])).to eq File.size(txt1.path)
239
+ # ...also found via Hyrax::DerviativePath:
240
+ found = Hyrax::DerivativePath.derivatives_for_reference(fileset.id)
241
+ expect(found.size).to eq 1
242
+ expect(File.size(found[0])).to eq File.size(txt1.path)
243
+ end
244
+ end
245
+ end
@@ -0,0 +1,99 @@
1
+ require 'spec_helper'
2
+ require 'misc_shared'
3
+
4
+ RSpec.describe NewspaperWorks::Data::WorkFile do
5
+ include_context "shared setup"
6
+
7
+ # sample objects:
8
+ let(:work) { work_with_file }
9
+
10
+ describe "adapter composition" do
11
+ it "adapts work with nil fileset" do
12
+ adapter = described_class.new(work)
13
+ expect(adapter.work).to be work
14
+ expect(adapter.fileset).to be_nil
15
+ end
16
+
17
+ it "adapts work with 'of' alt constructor" do
18
+ adapter = described_class.of(work)
19
+ expect(adapter.work).to be work
20
+ end
21
+
22
+ it "adapts work and explicitly provided fileset" do
23
+ fileset = work.members.select { |m| m.class == FileSet }[0]
24
+ adapter = described_class.of(work, fileset)
25
+ expect(adapter.work).to be work
26
+ expect(adapter.fileset).to be fileset
27
+ end
28
+
29
+ it "constructs with a parent object, if provided" do
30
+ fileset = work.members.select { |m| m.class == FileSet }[0]
31
+ parent = double('parent')
32
+ adapter = described_class.of(work, fileset, parent)
33
+ expect(adapter.parent).to be parent
34
+ end
35
+ end
36
+
37
+ describe "read file metadata" do
38
+ it "gets original filename" do
39
+ fileset = work.members.select { |m| m.class == FileSet }[0]
40
+ adapter = described_class.of(work, fileset)
41
+ expect(adapter.name).to eq fileset.original_file.original_name
42
+ expect(adapter.name).to eq 'credits.md'
43
+ end
44
+
45
+ it "gets miscellaneous metadata field values" do
46
+ fileset = work.members.select { |m| m.class == FileSet }[0]
47
+ adapter = described_class.of(work, fileset)
48
+ # expectations for accessors of size, date_*, mime_type
49
+ expect(adapter.size).to eq File.size(txt_path)
50
+ expect(adapter.name).to eq 'credits.md'
51
+ expect(adapter.mime_type).to eq 'text/plain'
52
+ # getting actual value for date fields requires digging through
53
+ # multiple layers of ActiveTuples indirection...
54
+ expect(adapter.date_created.to_a[0].to_s).to eq static_date.to_s
55
+ expect(adapter.date_modified.to_a[0].to_s).to eq static_date.to_s
56
+ end
57
+ end
58
+
59
+ describe "read binary via transparent repository checkout" do
60
+ it "gets path (from checkout)" do
61
+ fileset = work.members.select { |m| m.class == FileSet }[0]
62
+ adapter = described_class.of(work, fileset)
63
+ # Get a path to a working copy
64
+ path = adapter.path
65
+ expect(path).to be_a String
66
+ expect(File.exist?(path)).to be true
67
+ # size of working copy binary checkout matches size in computed metadata
68
+ expect(File.size(path)).to eq fileset.original_file.size
69
+ end
70
+
71
+ it "gets data as bytes" do
72
+ fileset = work.members.select { |m| m.class == FileSet }[0]
73
+ adapter = described_class.of(work, fileset)
74
+ # Get a data from the working copy
75
+ data = adapter.data
76
+ expect(data).to be_a String
77
+ # size of working copy binary checkout matches size in computed metadata
78
+ expect(data.size).to eq fileset.original_file.size
79
+ end
80
+
81
+ it "runs block on data as IO" do
82
+ fileset = work.members.select { |m| m.class == FileSet }[0]
83
+ adapter = described_class.of(work, fileset)
84
+ adapter.with_io { |io| expect(io.read.size).to eq File.size(txt_path) }
85
+ end
86
+ end
87
+
88
+ describe "derivative access" do
89
+ it "gets derivatives for file" do
90
+ fileset = work.members.select { |m| m.class == FileSet }[0]
91
+ adapter = described_class.of(work, fileset)
92
+ expect(adapter.derivatives.class).to eq \
93
+ NewspaperWorks::Data::WorkDerivatives
94
+ expect(adapter.derivatives.fileset).to be fileset
95
+ expect(adapter.derivatives.work).to be work
96
+ expect(adapter.derivatives.parent).to be adapter
97
+ end
98
+ end
99
+ end