newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,314 @@
1
+ require 'hyrax'
2
+
3
+ module NewspaperWorks
4
+ module Data
5
+ # TODO: consider compositional refactoring (not mixins), but this
6
+ # may make readability/comprehendability higher, and yield
7
+ # higher applied/practical complexity.
8
+ class WorkDerivatives
9
+ include NewspaperWorks::Data::FilesetHelper
10
+ include NewspaperWorks::Data::PathHelper
11
+
12
+ # Work is primary adapted context
13
+ # @return [ActiveFedora::Base] Hyrax work-type object
14
+ attr_accessor :work
15
+
16
+ # FileSet is secondary adapted context
17
+ # @return [FileSet] fileset for work, with regard to these derivatives
18
+ attr_accessor :fileset
19
+
20
+ # Parent pointer to WorkFile object representing fileset
21
+ # @return [NewspaperWorks::Data::WorkFile] WorkFile for fileset, work pair
22
+ attr_accessor :parent
23
+
24
+ # Assigned attachment queue (of paths)
25
+ # @return [Array<String>] list of paths queued for attachment
26
+ attr_accessor :assigned
27
+
28
+ # Assigned deletion queue (of destination names)
29
+ # @return [Array<String>] list of destination names queued for deletion
30
+ attr_accessor :unassigned
31
+
32
+ # mapping of special names Hyrax uses for derivatives, not extension:
33
+ @remap_names = {
34
+ 'jpeg' => 'thumbnail'
35
+ }
36
+ class << self
37
+ attr_accessor :remap_names
38
+ end
39
+
40
+ # alternate constructor spelling:
41
+ def self.of(work, fileset = nil, parent = nil)
42
+ new(work, fileset, parent)
43
+ end
44
+
45
+ # Adapt work and either specific or first fileset
46
+ def initialize(work, fileset = nil, parent = nil)
47
+ # adapted context usually work, may be string id of FileSet
48
+ @work = work
49
+ @fileset = fileset.nil? ? first_fileset : fileset
50
+ # computed name-to-path mapping, initially nil as sentinel for JIT load
51
+ @paths = nil
52
+ # assignments for attachment
53
+ @assigned = []
54
+ # un-assignments for deletion
55
+ @unassigned = []
56
+ # parent is NewspaperWorks::Data::WorkFile object for derivatives
57
+ @parent = parent
58
+ end
59
+
60
+ # Assignment state
61
+ # @return [String] A label describing the state of assignment queues
62
+ def state
63
+ load_paths
64
+ return 'dirty' unless @unassigned.empty? && @assigned.empty?
65
+ return 'empty' if @paths.keys.empty?
66
+ 'saved'
67
+ end
68
+
69
+ # Assign a path to assigned queue for attachment
70
+ # @param path [String] Path to source file
71
+ def assign(path)
72
+ path = normalize_path(path)
73
+ validate_path(path)
74
+ @assigned.push(path)
75
+ # We are keeping assignment both in ephemeral, transient @assigned
76
+ # and mirroring to db to share context with other components:
77
+ log_assignment(path, path_destination_name(path))
78
+ end
79
+
80
+ # Assign a destination name to unassigned queue for deletion -- OR --
81
+ # remove a path from queue of assigned items
82
+ # @param name [String] Destination name (file extension), or source path
83
+ def unassign(name)
84
+ # if name is queued path, remove from @assigned queue:
85
+ if @assigned.include?(name)
86
+ @assigned.delete(name)
87
+ unlog_assignment(name, path_destination_name(name))
88
+ end
89
+ # if name is known destination name, remove
90
+ @unassigned.push(name) if exist?(name)
91
+ end
92
+
93
+ # commit pending changes to work files
94
+ # beginning with removals, then with new assignments
95
+ def commit!
96
+ @unassigned.each { |name| delete(name) }
97
+ @assigned.each do |path|
98
+ attach(path, path_destination_name(path))
99
+ end
100
+ # reset queues after work is complete
101
+ @assigned = []
102
+ @unassigned = []
103
+ end
104
+
105
+ # Given a fileset meeting both of the following conditions:
106
+ # 1. a non-nil import_url value;
107
+ # 2. is attached to a work (persisted in Fedora, if not yet in Solr)...
108
+ # ...this method gets associated derivative paths queued and attach all.
109
+ # @param file_set [FileSet] saved file set, attached to work,
110
+ # with identifier, and a non-nil import_url
111
+ def commit_queued!(file_set)
112
+ raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil?
113
+ import_path = file_url_to_path(file_set.import_url)
114
+ work = file_set.member_of.select(&:work?)[0]
115
+ raise ArgumentError, 'Work not found for fileset' if work.nil?
116
+ derivatives = WorkDerivatives.of(work, file_set)
117
+ IngestFileRelation.derivatives_for_file(import_path).each do |path|
118
+ next unless File.exist?(path)
119
+ attachment_record = DerivativeAttachment.where(path: path).first
120
+ derivatives.attach(path, attachment_record.destination_name)
121
+ # update previously nil fileset id
122
+ attachment_record.fileset_id = file_set.id
123
+ attachment_record.save!
124
+ end
125
+ @fileset ||= file_set
126
+ load_paths
127
+ end
128
+
129
+ # attach a single derivative file to work
130
+ # @param file [String, IO] path to file or IO object
131
+ # @param name [String] destination name, usually file extension
132
+ def attach(file, name)
133
+ raise 'Cannot save for nil fileset' if fileset.nil?
134
+ mkdir_pairtree
135
+ path = path_factory.derivative_path_for_reference(fileset, name)
136
+ # if file argument is path, copy file
137
+ if file.class == String
138
+ FileUtils.copy(file, path)
139
+ else
140
+ # otherwise, presume file is an IO, read, write it
141
+ # note: does not close input file/IO, presume that is caller's
142
+ # responsibility.
143
+ orig_pos = file.tell
144
+ file.seek(0)
145
+ File.open(path, 'w') { |dstfile| dstfile.write(file.read) }
146
+ file.seek(orig_pos)
147
+ end
148
+ # finally, reload @paths after mutation
149
+ load_paths
150
+ end
151
+
152
+ # Delete a derivative file from work, by destination name
153
+ # @param name [String] destination name, usually file extension
154
+ def delete(name, force: nil)
155
+ raise 'Cannot save for nil fileset' if fileset.nil?
156
+ path = path_factory.derivative_path_for_reference(fileset, name)
157
+ # will remove file, if it exists; won't remove pairtree, even
158
+ # if it becomes empty, as that is excess scope.
159
+ FileUtils.rm(path, force: force) if File.exist?(path)
160
+ # finally, reload @paths after mutation
161
+ load_paths
162
+ end
163
+
164
+ # Load all paths/names to @paths once, upon first access
165
+ def load_paths
166
+ fsid = fileset_id
167
+ if fsid.nil?
168
+ @paths = {}
169
+ return
170
+ end
171
+ # list of paths
172
+ paths = path_factory.derivatives_for_reference(fsid)
173
+ # names from paths
174
+ @paths = paths.map { |e| [path_destination_name(e), e] }.to_h
175
+ end
176
+
177
+ # path to existing derivative file for destination name
178
+ # @param name [String] destination name, usually file extension
179
+ # @return [String, NilClass] path (or nil)
180
+ def path(name)
181
+ load_paths if @paths.nil?
182
+ result = @paths[name]
183
+ return if result.nil?
184
+ File.exist?(result) ? result : nil
185
+ end
186
+
187
+ # Run a block in context of the opened derivative file for reading
188
+ # @param name [String] destination name, usually file extension
189
+ # @param block [Proc] block/proc to run in context of file IO
190
+ def with_io(name, &block)
191
+ mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb'
192
+ filepath = path(name)
193
+ return if filepath.nil?
194
+ File.open(filepath, mode, &block)
195
+ end
196
+
197
+ # Get number of derivatives or, if a destination name argument
198
+ # is provided, the size of derivative file
199
+ # @param name [String] optional destination name, usually file extension
200
+ # @return [Integer] size in bytes
201
+ def size(name = nil)
202
+ load_paths if @paths.nil?
203
+ return @paths.size if name.nil?
204
+ File.size(@paths[name])
205
+ end
206
+
207
+ # Check if derivative file exists for destination name
208
+ # @param name [String] optional destination name, usually file extension
209
+ # @return [TrueClass, FalseClass] boolean
210
+ def exist?(name)
211
+ keys.include?(name) && File.exist?(self[name])
212
+ end
213
+
214
+ # Get raw binary or encoded text data of file as a String
215
+ # @param name [String] destination name, usually file extension
216
+ # @return [String] Raw bytes, or if text file, a UTF-8 encoded String
217
+ def data(name)
218
+ result = ''
219
+ with_io(name) do |io|
220
+ result += io.read
221
+ end
222
+ result
223
+ end
224
+
225
+ private
226
+
227
+ def primary_file_path
228
+ if fileset.nil?
229
+ # if there is a nil fileset, we look for *intent* in the form
230
+ # of the first assigned file path for single-file work.
231
+ work_file = parent
232
+ return if work_file.nil?
233
+ work_files = work_file.parent
234
+ return if work_files.nil?
235
+ work_files.assigned[0]
236
+ else
237
+ file_url_to_path(fileset.import_url) unless fileset.import_url.nil?
238
+ end
239
+ end
240
+
241
+ def file_url_to_path(url)
242
+ url.gsub('file://', '')
243
+ end
244
+
245
+ def log_primary_file_relation(path)
246
+ file_path = primary_file_path
247
+ return if file_path.nil?
248
+ NewspaperWorks::IngestFileRelation.create!(
249
+ file_path: file_path,
250
+ derivative_path: path
251
+ )
252
+ end
253
+
254
+ def log_assignment(path, name)
255
+ NewspaperWorks::DerivativeAttachment.create!(
256
+ fileset_id: fileset_id,
257
+ path: path,
258
+ destination_name: name
259
+ )
260
+ log_primary_file_relation(path)
261
+ end
262
+
263
+ def unlog_assignment(path, name)
264
+ if fileset_id.nil?
265
+ NewspaperWorks::DerivativeAttachment.where(
266
+ path: path,
267
+ destination_name: name
268
+ ).destroy_all
269
+ else
270
+ NewspaperWorks::DerivativeAttachment.where(
271
+ fileset_id: fileset_id,
272
+ path: path,
273
+ destination_name: name
274
+ ).destroy_all
275
+ end
276
+ # note: there is deliberately no attempt to "unlog" primary
277
+ # file relation, as leaving it should have no side-effect.
278
+ end
279
+
280
+ def path_destination_name(path)
281
+ ext = path.split('.')[-1]
282
+ self.class.remap_names[ext] || ext
283
+ end
284
+
285
+ def respond_to_missing?(symbol, include_priv = false)
286
+ {}.respond_to?(symbol, include_priv)
287
+ end
288
+
289
+ def method_missing(method, *args, &block)
290
+ # if we proxy mapping/hash enumertion methods,
291
+ # make sure @paths loaded, then proxy to it.
292
+ if respond_to_missing?(method)
293
+ load_paths if @paths.nil?
294
+ return @paths.send(method, *args, &block)
295
+ end
296
+ super
297
+ end
298
+
299
+ def path_factory
300
+ Hyrax::DerivativePath
301
+ end
302
+
303
+ # make shared path for derivatives to live, given
304
+ def mkdir_pairtree
305
+ # Hyrax::DerivativePath has no public method to directly get the
306
+ # bare pairtree path for derivatives for a fileset, but we
307
+ # can infer it...
308
+ path = path_factory.derivative_path_for_reference(fileset, '')
309
+ dir = File.join(path.split('/')[0..-2])
310
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
311
+ end
312
+ end
313
+ end
314
+ end
@@ -0,0 +1,92 @@
1
+ # encoding=utf-8
2
+
3
+ require 'hyrax'
4
+
5
+ module NewspaperWorks
6
+ module Data
7
+ # WorkFile is a read-only convenience wrapper for just-in-time
8
+ # file operations, and is the type of values returned by
9
+ # NewspaperWorks::Data::WorkFiles (container) adapter.
10
+ class WorkFile
11
+ # accessors for adaptation relationships:
12
+ attr_accessor :work, :parent, :fileset
13
+ # delegate these metadata properties to @fileset.original_file:
14
+ delegate :size, :date_created, :date_modified, :mime_type, to: :unwrapped
15
+
16
+ # alternate constructor spelling:
17
+ def self.of(work, fileset = nil, parent = nil)
18
+ new(work, fileset, parent)
19
+ end
20
+
21
+ def initialize(work, fileset = nil, parent = nil)
22
+ @work = work
23
+ # If fileset is nil, presume *first* fileset of work, as in
24
+ # the single-file-per-work use-case:
25
+ @fileset = fileset
26
+ # Parent is WorkFiles (container) object, if applciable:
27
+ @parent = parent
28
+ end
29
+
30
+ # Get original repository object representing file (not fileset).
31
+ # @return [ActiveFedora::File] repository file persistence object
32
+ def unwrapped
33
+ return nil if @fileset.nil?
34
+ @fileset.original_file
35
+ end
36
+
37
+ def ==(other)
38
+ return false if @fileset.nil?
39
+ unwrapped.id == other.unwrapped.id
40
+ end
41
+
42
+ # Get path to working copy of file on local filesystem;
43
+ # checkout file from repository/source as needed.
44
+ # @return [String] path to working copy of binary
45
+ def path
46
+ return nil if @fileset.nil?
47
+ checkout
48
+ end
49
+
50
+ # Read data from working copy of file on local filesystem;
51
+ # checkout file from repository/source as needed.
52
+ # @return [String] byte data of binary/file payload
53
+ def data
54
+ return '' if @fileset.nil?
55
+ File.read(path, mode: 'rb')
56
+ end
57
+
58
+ # Run block/proc upon data of file;
59
+ # checkout file from repository/source as needed.
60
+ # @yield [io] read-only IO or File object to block/proc.
61
+ def with_io(&block)
62
+ filepath = path
63
+ return if filepath.nil?
64
+ File.open(filepath, 'rb', &block)
65
+ end
66
+
67
+ # Get filename from stored metadata
68
+ # @return [String] file name stored in repository metadata for file
69
+ def name
70
+ return nil if @fileset.nil?
71
+ unwrapped.original_name
72
+ end
73
+
74
+ # Derivatives for fileset associated with this primary file object
75
+ # @return [NewspaperWorks::Data::WorkDerviatives] derivatives adapter
76
+ def derivatives
77
+ NewspaperWorks::Data::WorkDerivatives.of(work, fileset, self)
78
+ end
79
+
80
+ private
81
+
82
+ def checkout
83
+ file = @fileset.original_file
84
+ # find_or_retrieve returns path to working copy, but only
85
+ # fetches from Fedora if no working copy exists on filesystem.
86
+ # NOTE: there may be some benefit to memoizing to avoid
87
+ # call and File.exist? IO operation, but YAGNI for now.
88
+ Hyrax::WorkingDirectory.find_or_retrieve(file.id, @fileset.id)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,181 @@
1
+ module NewspaperWorks
2
+ module Data
3
+ class WorkFiles
4
+ include NewspaperWorks::Data::PathHelper
5
+
6
+ attr_accessor :work, :assigned, :unassigned
7
+ delegate :include?, to: :keys
8
+
9
+ # alternate constructor spelling:
10
+ def self.of(work)
11
+ new(work)
12
+ end
13
+
14
+ def initialize(work)
15
+ @work = work
16
+ @assigned = []
17
+ @unassigned = []
18
+ @derivatives = nil
19
+ end
20
+
21
+ # Derivatives for specified fileset or first fileset found.
22
+ # The `WorkDerivatives` adapter as assign/commmit! semantics just
23
+ # like `WorkFiles`, and also acts like a hash/mapping of
24
+ # destination names (usually file extension) to path of saved
25
+ # derviative. Always returns same instance (memoized after first
26
+ # use) of `WorkDerivatives`.
27
+ # @return [NewspaperWorks::Data::WorkDerviatives] derivatives adapter
28
+ def derivatives(fileset: nil)
29
+ fileset ||= @fileset
30
+ return @derivatives unless @derivatives.nil?
31
+ if fileset.nil?
32
+ # for the deferred assignement case, we have no fileset yet...
33
+ work_file = NewspaperWorks::Data::WorkFile.of(work, nil, self)
34
+ return work_file.derivatives
35
+ end
36
+ # Otherwise, delegate actual construction to WorkFile.derivatives:
37
+ @derivatives = values[0].derivatives
38
+ end
39
+
40
+ # Assignment state
41
+ # @return [String] A label describing the state of assignment queues
42
+ def state
43
+ return 'dirty' unless @assigned.empty? && @unassigned.empty?
44
+ return 'empty' if keys.empty?
45
+ # TODO: implement 'pending' as intermediate state between 'dirty'
46
+ # and saved, where we look for saved state that matches what was
47
+ # previously assigned in THIS instance. We can only know that
48
+ # changes initiated by this instance in this thread are pending
49
+ # because there's no global storage for the assignment queue.
50
+ 'saved'
51
+ end
52
+
53
+ # List of fileset (not file) id keys, presumes system like Hyrax
54
+ # is only keeping a 1:1 between fileset and contained PCDM file,
55
+ # because derivatives are not stored in the FileSet.
56
+ # @return [String] fileset ids
57
+ def keys
58
+ filesets.map(&:id)
59
+ end
60
+
61
+ # List of WorkFile for each primary file
62
+ # @return [Array<NewspaperWorks::Data::WorkFile>] adapter for persisted
63
+ # primary file
64
+ def values
65
+ keys.map(&method(:get))
66
+ end
67
+
68
+ # Array of [id, WorkFile] for each primary file
69
+ # @return [Array<Array>] key/value pairs for primary files of work
70
+ def entries
71
+ filesets.map { |fs| [fs.id, self[fs.id]] }
72
+ end
73
+
74
+ # List of local file names for attachments, based on original ingested
75
+ # or uploaded file name.
76
+ # @return [Array<String>]
77
+ def names
78
+ filesets.map(&method(:original_name))
79
+ end
80
+
81
+ # Get a WorkFile adapter representing primary file, either by name or id
82
+ # @param name_or_id [String] Fileset id or work-local file name
83
+ # @return [NewspaperWorks::Data::WorkFile] adapter for persisted
84
+ # primary file
85
+ def get(name_or_id)
86
+ return get_by_fileset_id(name_or_id) if keys.include?(name_or_id)
87
+ get_by_filename(name_or_id)
88
+ end
89
+
90
+ # Assign a path to assigned queue for attachment
91
+ # @param path [String] Path to source file
92
+ def assign(path)
93
+ path = normalize_path(path)
94
+ validate_path(path)
95
+ @assigned.push(path)
96
+ end
97
+
98
+ # Assign a name or id to unassigned queue for deletion -- OR -- remove a
99
+ # path from queue of assigned items
100
+ # @param name_or_id [String] Fileset id, local file name, or source path
101
+ def unassign(name_or_id)
102
+ # if name_or_id is queued path, remove from @assigned queue:
103
+ @assigned.delete(name_or_id) if @assigned.include?(name_or_id)
104
+ # if name_or_id is known id or name, remove
105
+ @unassigned.push(name_or_id) if include?(name_or_id)
106
+ end
107
+
108
+ # commit pending changes to work files
109
+ # beginning with removals, then with new assignments
110
+ def commit!
111
+ commit_unassigned
112
+ commit_assigned
113
+ end
114
+
115
+ alias [] :get
116
+
117
+ private
118
+
119
+ def get_by_fileset_id(id)
120
+ nil unless keys.include?(id)
121
+ fileset = FileSet.find(id)
122
+ NewspaperWorks::Data::WorkFile.of(work, fileset, self)
123
+ end
124
+
125
+ # Get one WorkFile object based on filename in metadata
126
+ def get_by_filename(name)
127
+ r = filesets.select { |fs| original_name(fs) == name }
128
+ # checkout first match
129
+ r.empty? ? nil : NewspaperWorks::Data::WorkFile.of(work, r[0], self)
130
+ end
131
+
132
+ def original_name(fileset)
133
+ fileset.original_file.original_name
134
+ end
135
+
136
+ def filesets
137
+ # file sets with non-nil original file contained:
138
+ work.members.select { |m| m.class == FileSet && m.original_file }
139
+ end
140
+
141
+ def user
142
+ return User.find_by(email: work.depositor) unless work.depositor.nil?
143
+ defined?(current_user) ? current_user : User.batch_user
144
+ end
145
+
146
+ def ensure_depositor
147
+ return unless @work.depositor.nil?
148
+ @work.depositor = user.user_key
149
+ end
150
+
151
+ def commit_unassigned
152
+ # for each (name or) id to be removed from work, use actor to destroy
153
+ @unassigned.each do |id|
154
+ # "actor" here is simply a multi-adapter of Fileset, User
155
+ # Calling destroy will:
156
+ # 1. unlink fileset from work, and save work
157
+ # 2. Destroy fileset:
158
+ # - :before_destroy callback will delegate derivative cleanup
159
+ # to derivatives service component(s).
160
+ # - Remove fileset from storage/persistence layers
161
+ # - Invoke (logging or other) :after_destroy callback
162
+ Hyrax::Actors::FileSetActor.new(get(id).fileset, user).destroy
163
+ work.reload
164
+ end
165
+ end
166
+
167
+ def commit_assigned
168
+ return if @assigned.nil? || @assigned.empty?
169
+ ensure_depositor
170
+ remote_files = @assigned.map do |path|
171
+ { url: path_to_uri(path), file_name: File.basename(path) }
172
+ end
173
+ attrs = { remote_files: remote_files }
174
+ # Create an environment for actor stack:
175
+ env = Hyrax::Actors::Environment.new(@work, Ability.new(user), attrs)
176
+ # Invoke default Hyrax actor stack middleware:
177
+ Hyrax::CurationConcern.actor.create(env)
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,35 @@
1
+ require 'newspaper_works/data/fileset_helper'
2
+ require 'newspaper_works/data/path_helper'
3
+ require 'newspaper_works/data/work_derivatives'
4
+ require 'newspaper_works/data/work_files'
5
+ require 'newspaper_works/data/work_file'
6
+
7
+ module NewspaperWorks
8
+ # Module for data access helper / adapter classes supporting, enhancing
9
+ # NewspaperWorks work models
10
+ module Data
11
+ # Handler for after_create_fileset, to be called by block subscribing to
12
+ # and overriding default Hyrax `:after_create_fileset` handler, via
13
+ # app integrating newspaper_works.
14
+ def self.handle_after_create_fileset(file_set, user)
15
+ handle_queued_derivative_attachments(file_set)
16
+ # Hyrax queues this job by default, and since newspaper_works
17
+ # overrides the single subscriber Hyrax uses to do so, we
18
+ # must call this here:
19
+ FileSetAttachedEventJob.perform_later(file_set, user)
20
+ work = file_set.member_of[0]
21
+ # Hyrax CreateWithRemoteFilesActor has glaring omission re: this job,
22
+ # so we call it here, once we have a fileset to copy permissions to.
23
+ InheritPermissionsJob.perform_later(work) unless work.nil?
24
+ end
25
+
26
+ def self.handle_queued_derivative_attachments(file_set)
27
+ return if file_set.import_url.nil?
28
+ work = file_set.member_of.select(&:work?)[0]
29
+ derivatives = NewspaperWorks::Data::WorkDerivatives.of(work)
30
+ # For now, becuase this is IO-bound operation, it makes sense to have
31
+ # this not be a job, but run inline:
32
+ derivatives.commit_queued!(file_set)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,42 @@
1
+ require 'active_fedora'
2
+ require 'hyrax'
3
+ require 'blacklight_iiif_search'
4
+ require 'blacklight_advanced_search'
5
+
6
+ module NewspaperWorks
7
+ # module constants:
8
+ GEM_PATH = Gem::Specification.find_by_name("newspaper_works").gem_dir
9
+
10
+ # Engine Class
11
+ class Engine < ::Rails::Engine
12
+ isolate_namespace NewspaperWorks
13
+
14
+ config.to_prepare do
15
+ # Inject PluggableDerivativeService ahead of Hyrax default.
16
+ # This wraps Hyrax default, but allows multiple valid services
17
+ # to be configured, instead of just the _first_ valid service.
18
+ #
19
+ # To configure specific services, inject each service, in desired order
20
+ # to NewspaperWorks::PluggableDerivativeService.plugins array.
21
+
22
+ Hyrax::DerivativeService.services.unshift(
23
+ NewspaperWorks::PluggableDerivativeService
24
+ )
25
+
26
+ # Register specific derivative services to be considered by
27
+ # PluggableDerivativeService:
28
+ [
29
+ NewspaperWorks::JP2DerivativeService,
30
+ NewspaperWorks::PDFDerivativeService,
31
+ NewspaperWorks::TextExtractionDerivativeService,
32
+ NewspaperWorks::TIFFDerivativeService
33
+ ].each do |plugin|
34
+ NewspaperWorks::PluggableDerivativeService.plugins.push plugin
35
+ end
36
+
37
+ # Register actor to handle any NewspaperWorks upload behaviors before
38
+ # CreateWithFilesActor gets to them:
39
+ Hyrax::CurationConcern.actor_factory.insert_before Hyrax::Actors::CreateWithFilesActor, NewspaperWorks::Actors::NewspaperWorksUploadActor
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,14 @@
1
+ module NewspaperWorks
2
+ # generic/base NewspaperWorks-specific exception:
3
+ class NewspaperWorksError < StandardError
4
+ end
5
+
6
+ # Data transformation or read-error:
7
+ class DataError < NewspaperWorksError
8
+ end
9
+
10
+ # Specific exception for temporary state where one or more PDF page source
11
+ # files are not ready, for which a retry at a later time is warranted.
12
+ class PagesNotReady < DataError
13
+ end
14
+ end