newspaper_works 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,11 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperIssue`
3
+ module Hyrax
4
+ # Newspaper Issue Form Class
5
+ class NewspaperIssueForm < ::NewspaperWorks::NewspaperCoreFormData
6
+ self.model_class = ::NewspaperIssue
7
+ self.terms += [:alternative_title, :volume, :edition_number, :edition_name,
8
+ :issue_number, :extent, :publication_date]
9
+ self.terms -= [:creator, :contributor, :description, :subject]
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperPage`
3
+ module Hyrax
4
+ # Newspaper Page Form Class
5
+ class NewspaperPageForm < Hyrax::Forms::WorkForm
6
+ self.model_class = ::NewspaperPage
7
+ self.terms += [:height, :width, :resource_type, :text_direction,
8
+ :page_number, :section]
9
+ self.terms -= [:creator, :keyword, :rights_statement, :contributor,
10
+ :description, :license, :subject, :date_created, :subject,
11
+ :language, :based_near, :related_url, :source,
12
+ :resource_type, :publisher]
13
+ self.required_fields -= [:creator, :keyword, :rights_statement]
14
+ end
15
+ end
@@ -0,0 +1,12 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperTitle`
3
+ module Hyrax
4
+ # Newspaper Title Form Class
5
+ class NewspaperTitleForm < ::NewspaperWorks::NewspaperCoreFormData
6
+ self.model_class = ::NewspaperTitle
7
+ self.terms += [:alternative_title, :edition_name, :frequency, :preceded_by,
8
+ :succeeded_by, :publication_date_start,
9
+ :publication_date_end]
10
+ self.terms -= [:creator, :contributor, :description, :source, :subject]
11
+ end
12
+ end
@@ -0,0 +1,17 @@
1
+ module NewspaperWorks
2
+ class NewspaperCoreFormData < Hyrax::Forms::WorkForm
3
+ self.terms += [:resource_type, :place_of_publication, :issn, :lccn,
4
+ :oclcnum, :held_by]
5
+ self.terms -= [:based_near, :date_created, :keyword, :related_url, :source]
6
+ self.required_fields += [:resource_type, :language, :held_by]
7
+ self.required_fields -= [:creator, :keyword, :rights_statement]
8
+
9
+ def self.build_permitted_params
10
+ super + [
11
+ {
12
+ place_of_publication_attributes: [:id, :_destroy]
13
+ }
14
+ ]
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,5 @@
1
+ module Hyrax
2
+ module NewspaperArticlesHelper
3
+ include NewspaperWorks::BreadcrumbHelper
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module Hyrax
2
+ module NewspaperContainersHelper
3
+ include NewspaperWorks::BreadcrumbHelper
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module Hyrax
2
+ module NewspaperIssuesHelper
3
+ include NewspaperWorks::BreadcrumbHelper
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module Hyrax
2
+ module NewspaperPagesHelper
3
+ include NewspaperWorks::BreadcrumbHelper
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module NewspaperWorks
2
+ # Application Helper module
3
+ module ApplicationHelper
4
+ end
5
+ end
@@ -0,0 +1,92 @@
1
+ module NewspaperWorks
2
+ module BreadcrumbHelper
3
+ # create an array of links representing the ancestors of the current object
4
+ #
5
+ # @param presenter [Newspaper*Presenter] the presenter for the current Newspaper object
6
+ # @param link_class [String] the class for the breadcrumb links
7
+ def newspaper_breadcrumbs(presenter, link_class = nil)
8
+ breadcrumbs = []
9
+ ancestors = { title: :publication_id, issue: :issue_id, page: :page_ids }
10
+ ancestors.each do |k, v|
11
+ breadcrumbs << create_breadcrumb_link(k, presenter, link_class) if presenter.respond_to?(v)
12
+ end
13
+ breadcrumbs << breadcrumb_object_title(presenter.title.first)
14
+ breadcrumbs.flatten
15
+ end
16
+
17
+ # create an array of links representing ancestors of the current object
18
+ #
19
+ # @param object_type [Symbol] the type of newspaper object, as a symbol (e.g. :issue)
20
+ # @param presenter [Newspaper*Presenter] the presenter for the current Newspaper object
21
+ # @param link_class [String] the class for the breadcrumb links
22
+ def create_breadcrumb_link(object_type, presenter, link_class = nil)
23
+ links = []
24
+ case object_type
25
+ when :title
26
+ links << breadcrumb_object_link(object_type, presenter.publication_id,
27
+ presenter.publication_title, link_class)
28
+ when :issue
29
+ links << breadcrumb_object_link(object_type, presenter.issue_id,
30
+ breadcrumb_object_title(presenter.issue_title), link_class)
31
+ when :page
32
+ unless presenter.page_ids.blank? || presenter.page_titles.blank?
33
+ presenter.page_ids.each_with_index do |id, index|
34
+ links << breadcrumb_object_link(object_type, id, breadcrumb_object_title(presenter.page_titles[index]),
35
+ link_class)
36
+ end
37
+ end
38
+ end
39
+ links
40
+ end
41
+
42
+ # create a link for an ancestor of the current object
43
+ #
44
+ # @param object_type [Symbol] the type of newspaper object, as a symbol (e.g. :issue)
45
+ # @param id [String] the id of the ancestor Newspaper object
46
+ # @param title [String] the title of the ancestor Newspaper object
47
+ # @param link_class [String] the class for the breadcrumb links
48
+ def breadcrumb_object_link(object_type, id, title, link_class = nil)
49
+ return [] unless id && title
50
+ link_path = "hyrax_newspaper_#{object_type}_path"
51
+ link_to(title,
52
+ main_app.send(link_path, id),
53
+ class: link_class)
54
+ end
55
+
56
+ # Format link titles for ancestor link. Should return either the portion of
57
+ # the title that describes the page number or a formatted date. If neither
58
+ # is found, will return back the original title variable
59
+ #
60
+ # @param title [String] the title of the ancestor Newspaper object
61
+ def breadcrumb_object_title(title)
62
+ return nil unless title.is_a? String
63
+ page_slice_start_index = title.downcase =~ /page/
64
+ return title[page_slice_start_index..-1] if page_slice_start_index
65
+ begin
66
+ return title.to_date.strftime("%B %e, %Y")
67
+ rescue ArgumentError
68
+ return title
69
+ end
70
+ end
71
+
72
+ # create link to the previous NewspaperPage
73
+ #
74
+ # @param presenter [NewspaperPagePresenter] presenter for current NewspaperPage object
75
+ # @param options [Hash] hash of link options
76
+ def previous_page_link(presenter, options = {})
77
+ link_to("<< #{t('hyrax.newspaper_page.previous_page')}",
78
+ main_app.hyrax_newspaper_page_path(presenter.previous_page_id),
79
+ options)
80
+ end
81
+
82
+ # create link to the next NewspaperPage
83
+ #
84
+ # @param presenter [NewspaperPagePresenter] presenter for current NewspaperPage object
85
+ # @param options [Hash] hash of link options
86
+ def next_page_link(presenter, options = {})
87
+ link_to("#{t('hyrax.newspaper_page.next_page')} >>",
88
+ main_app.hyrax_newspaper_page_path(presenter.next_page_id),
89
+ options)
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,103 @@
1
+ module NewspaperWorks
2
+ module NewspaperWorksHelperBehavior
3
+ ##
4
+ # create link anchor to be read by UniversalViewer
5
+ # in order to show keyword search
6
+ # @param query_params_hash [Hash] current_search_session.query_params
7
+ # @return [String] or [nil] anchor
8
+ def iiif_search_anchor(query_params_hash)
9
+ query = search_query(query_params_hash)
10
+ return nil if query.blank?
11
+ "?h=#{query}"
12
+ end
13
+
14
+ ##
15
+ # get the query, which may be in a different object,
16
+ # depending if regular search or newspapers_search was run
17
+ # @param query_params_hash [Hash] current_search_session.query_params
18
+ # @return [String] or [nil] query
19
+ def search_query(query_params_hash)
20
+ query_params_hash[:q] || query_params_hash[:all_fields]
21
+ end
22
+
23
+ ##
24
+ # based on Blacklight::CatalogHelperBehavior#render_thumbnail_tag
25
+ # setup the thumbnail link for a NewspaperPage or Article
26
+ #
27
+ # @param document [SolrDocument]
28
+ # @param query_params_hash [Hash] current_search_session.query_params
29
+ # @return [String]
30
+ def render_newspaper_thumbnail_tag(document, query_params_hash)
31
+ thumbnail = newspaper_thumbnail_tag(document)
32
+ return unless thumbnail
33
+ anchor = iiif_search_anchor(query_params_hash)
34
+ case document[blacklight_config.view_config(document_index_view_type).display_type_field].first
35
+ when 'NewspaperPage'
36
+ link_to(thumbnail, hyrax_newspaper_page_path(document.id, anchor: anchor))
37
+ when 'NewspaperArticle'
38
+ link_to(thumbnail, hyrax_newspaper_article_path(document.id, anchor: anchor))
39
+ else
40
+ link_to_document document, thumbnail
41
+ end
42
+ end
43
+
44
+ ##
45
+ # based on Blacklight::CatalogHelperBehavior#render_thumbnail_tag
46
+ # return the thumbnail image_tag
47
+ #
48
+ # @param document [SolrDocument]
49
+ # @return [String]
50
+ def newspaper_thumbnail_tag(document)
51
+ if blacklight_config.view_config(document_index_view_type).thumbnail_method
52
+ send(blacklight_config.view_config(document_index_view_type).thumbnail_method,
53
+ document)
54
+ elsif blacklight_config.view_config(document_index_view_type).thumbnail_field
55
+ url = thumbnail_url(document)
56
+ image_tag url if url.present?
57
+ end
58
+ end
59
+
60
+ ##
61
+ # return the matching highlighted terms from Solr highlight field
62
+ #
63
+ # @param document [SolrDocument]
64
+ # @param hl_fl [String] the name of the Solr field with highlights
65
+ # @param hl_tag [String] the HTML element name used for marking highlights
66
+ # configured in Solr as hl.tag.pre value
67
+ # @return [String]
68
+ def highlight_matches(document, hl_fl, hl_tag)
69
+ hl_matches = []
70
+ # regex: find all chars between hl_tag, but NOT other <element>
71
+ regex = /<#{hl_tag}>[^<>]+<\/#{hl_tag}>/
72
+ hls = document.highlight_field(hl_fl)
73
+ return nil unless hls.present?
74
+ hls.each do |hl|
75
+ matches = hl.scan(regex)
76
+ matches.each do |match|
77
+ hl_matches << match.gsub(/<[\/]*#{hl_tag}>/, '').downcase
78
+ end
79
+ end
80
+ hl_matches.uniq.sort.join(' ')
81
+ end
82
+
83
+ ##
84
+ # print the ocr snippets. if more than one, separate with <br/>
85
+ #
86
+ # @param options [Hash] options hash provided by Blacklight
87
+ # @return [String] snippets HTML to be rendered
88
+ # rubocop:disable Rails/OutputSafety
89
+ def render_ocr_snippets(options = {})
90
+ snippets = options[:value]
91
+ snippets_content = [content_tag('div',
92
+ "... #{snippets.first} ...".html_safe,
93
+ class: 'ocr_snippet first_snippet')]
94
+ if snippets.length > 1
95
+ snippets_content << render(partial: 'catalog/snippets_more',
96
+ locals: { snippets: snippets.drop(1),
97
+ options: options })
98
+ end
99
+ snippets_content.join("\n").html_safe
100
+ end
101
+ # rubocop:enable Rails/OutputSafety
102
+ end
103
+ end
@@ -0,0 +1,5 @@
1
+ module NewspaperWorks
2
+ module NewspapersHelper
3
+ include NewspaperWorks::BreadcrumbHelper
4
+ end
5
+ end
@@ -0,0 +1,17 @@
1
+ # indexes the full text of a Newspaper object
2
+ module NewspaperWorks
3
+ module IndexesFullText
4
+ # index full text
5
+ # load text from plain text derivative
6
+ # index as *both* stored (for highlighting) and non-stored (Hyrax default) text field
7
+ #
8
+ # @param work [Newspaper*] an instance of a NewspaperWorks model
9
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
10
+ def index_full_text(work, solr_doc)
11
+ text = NewspaperWorks::Data::WorkDerivatives.new(work).data('txt')
12
+ text = text.gsub(/\n/, ' ').squeeze(' ')
13
+ solr_doc['all_text_timv'] = text
14
+ solr_doc['all_text_tsimv'] = text
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,67 @@
1
+ # indexes the place_of_publication field
2
+ module NewspaperWorks
3
+ module IndexesPlaceOfPublication
4
+ # wrapper for methods for indexing place_of_publication values
5
+ #
6
+ # @param object [Newspaper*] an instance of a NewspaperWorks model
7
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
8
+ def index_pop(object, solr_doc)
9
+ return unless object.respond_to?(:place_of_publication)
10
+ object.place_of_publication.each do |pop|
11
+ next unless pop.is_a?(ActiveTriples::Resource)
12
+ geonames_id = pop.id.match(/[\d]{4,}/).to_s
13
+ geodata = get_geodata(geonames_id)
14
+ return false if geodata.blank?
15
+ add_geodata_fields(solr_doc)
16
+ index_pop_geodata(geodata, solr_doc)
17
+ end
18
+ end
19
+
20
+ # adds empty placeholder fields to solr_doc for incoming geodata
21
+ #
22
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
23
+ def add_geodata_fields(solr_doc)
24
+ %w[city county state country].each do |place|
25
+ solr_doc["place_of_publication_#{place}_sim"] ||= []
26
+ end
27
+ solr_doc['place_of_publication_label_tesim'] ||= []
28
+ solr_doc['place_of_publication_label_sim'] ||= []
29
+ solr_doc['place_of_publication_llsim'] ||= []
30
+ end
31
+
32
+ # adds geographic data to solr_doc Hash, with fields for
33
+ # city, county, state, country, coordinates
34
+ #
35
+ # @param geodata [Hash] hash of GeoNames data returned by #get_geodata
36
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
37
+ def index_pop_geodata(geodata, solr_doc)
38
+ city = geodata['name']
39
+ county = geodata['adminName2']
40
+ state = geodata['adminName1']
41
+ country = geodata['countryName']
42
+ solr_doc['place_of_publication_city_sim'] << city
43
+ solr_doc['place_of_publication_county_sim'] << county
44
+ solr_doc['place_of_publication_state_sim'] << state
45
+ solr_doc['place_of_publication_country_sim'] << country
46
+ display_name = [city, state, country].compact.join(', ')
47
+ solr_doc['place_of_publication_label_tesim'] << display_name
48
+ solr_doc['place_of_publication_label_sim'] << display_name
49
+ return unless geodata['lat'] && geodata['lng']
50
+ # TODO: this should use a Solr location_rpt field type
51
+ solr_doc['place_of_publication_llsim'] << "#{geodata['lat']},#{geodata['lng']}"
52
+ end
53
+
54
+ # fetch data from GeoNames API
55
+ #
56
+ # @param geoname_id [String] GeoNames id of geographic entity
57
+ # @return [Hash] GeoNames API response as Hash
58
+ def get_geodata(geoname_id)
59
+ return false if geoname_id.to_i.zero?
60
+ geonames_un = Qa::Authorities::Geonames.username
61
+ return false unless geonames_un
62
+ geonames_url = "http://api.geonames.org/getJSON?geonameId=#{geoname_id}&username=#{geonames_un}"
63
+ resp = Faraday.new(geonames_url).get
64
+ JSON.parse(resp.body)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,35 @@
1
+ # indexes the publication_date_start and _end fields
2
+ module NewspaperWorks
3
+ module IndexesPublicationDateRange
4
+ # adds publication date start to solr_doc Hash in Solr datetime format
5
+ #
6
+ # @param pubdate [String] publication start date
7
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
8
+ def index_pubdate_start(pubdate, solr_doc)
9
+ case pubdate
10
+ when /\A\d{4}\z/
11
+ solr_doc['publication_date_start_dtsi'] = "#{pubdate}-01-01".to_datetime
12
+ when /\A\d{4}-\d{2}\z/
13
+ solr_doc['publication_date_start_dtsi'] = "#{pubdate}-01".to_datetime
14
+ end
15
+ solr_doc['publication_date_start_ssi'] = nil
16
+ end
17
+
18
+ # adds publication date end to solr_doc Hash in Solr datetime format
19
+ #
20
+ # @param pubdate [String] publication end date
21
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
22
+ def index_pubdate_end(pubdate, solr_doc)
23
+ end_time = 'T23:59:59Z'
24
+ case pubdate
25
+ when /\A\d{4}\z/
26
+ solr_doc['publication_date_end_dtsi'] = "#{pubdate}-12-31#{end_time}"
27
+ when /\A\d{4}-\d{2}\z/
28
+ date_split = pubdate.split('-')
29
+ end_day = Date.new(date_split[0].to_i, date_split[1].to_i, -1).strftime('%d')
30
+ solr_doc['publication_date_end_dtsi'] = "#{pubdate}-#{end_day}#{end_time}"
31
+ end
32
+ solr_doc['publication_date_end_ssi'] = nil
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,125 @@
1
+ # indexes parent relationships e.g. issue->title, page->issue, etc
2
+ module NewspaperWorks
3
+ module IndexesRelationships
4
+ # index relationships
5
+ #
6
+ # @param object [Newspaper*] an instance of a NewspaperWorks model
7
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
8
+ def index_relationships(object, solr_doc)
9
+ index_publication(object, solr_doc) unless object.is_a?(NewspaperTitle)
10
+ case object
11
+ when NewspaperPage
12
+ index_issue(object, solr_doc)
13
+ index_container(object, solr_doc)
14
+ index_articles(object, solr_doc)
15
+ index_siblings(object, solr_doc)
16
+ when NewspaperArticle
17
+ index_issue(object, solr_doc)
18
+ index_pages(object, solr_doc)
19
+ end
20
+ end
21
+
22
+ # index the publication info
23
+ #
24
+ # @param object [Newspaper*] an instance of a NewspaperWorks model
25
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
26
+ def index_publication(object, solr_doc)
27
+ newspaper_title = object.publication
28
+ return unless newspaper_title.is_a?(NewspaperTitle)
29
+ solr_doc['publication_id_ssi'] = newspaper_title.id
30
+ solr_doc['publication_title_ssi'] = newspaper_title.title.first
31
+ publication_unique_id = newspaper_title.send(NewspaperWorks.config.publication_unique_id_property)
32
+ solr_doc['publication_unique_id_ssi'] = publication_unique_id
33
+ index_parent_facets(newspaper_title, solr_doc)
34
+ end
35
+
36
+ # index the container info
37
+ #
38
+ # @param page [NewspaperPage]
39
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
40
+ def index_container(page, solr_doc)
41
+ newspaper_container = page.container
42
+ return unless newspaper_container.is_a?(NewspaperContainer)
43
+ solr_doc['container_id_ssi'] = newspaper_container.id
44
+ solr_doc['container_title_ssi'] = newspaper_container.title.first
45
+ end
46
+
47
+ # index the issue info
48
+ #
49
+ # @param object [NewspaperPage||NewspaperArticle]
50
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
51
+ def index_issue(object, solr_doc)
52
+ newspaper_issue = object.issue
53
+ return unless newspaper_issue.is_a?(NewspaperIssue)
54
+ solr_doc['issue_id_ssi'] = newspaper_issue.id
55
+ solr_doc['issue_title_ssi'] = newspaper_issue.title.first
56
+ solr_doc['publication_date_dtsi'] ||= newspaper_issue.publication_date.to_datetime if newspaper_issue.publication_date.present?
57
+ solr_doc['issue_volume_ssi'] = newspaper_issue.volume
58
+ solr_doc['issue_edition_number_ssi'] = newspaper_issue.edition_number || '1'
59
+ solr_doc['issue_number_ssi'] = newspaper_issue.issue_number
60
+ index_parent_facets(newspaper_issue, solr_doc)
61
+ end
62
+
63
+ # index the pages info
64
+ #
65
+ # @param article [NewspaperArticle]
66
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
67
+ def index_pages(article, solr_doc)
68
+ newspaper_pages = article.pages
69
+ return if newspaper_pages.blank? || !newspaper_pages.first.is_a?(NewspaperPage)
70
+ solr_doc['page_ids_ssim'] = []
71
+ solr_doc['page_titles_ssim'] = []
72
+ newspaper_pages.each do |n_page|
73
+ solr_doc['page_ids_ssim'] << n_page.id
74
+ solr_doc['page_titles_ssim'] << n_page.title.first
75
+ end
76
+ end
77
+
78
+ # index previous/next siblings info
79
+ #
80
+ # @param page [NewspaperPage]
81
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
82
+ def index_siblings(page, solr_doc)
83
+ newspaper_issue = page.issue
84
+ return unless newspaper_issue.is_a?(NewspaperIssue)
85
+ page_ids = newspaper_issue.ordered_page_ids
86
+ this_page_index = page_ids.index(page.id)
87
+ return unless this_page_index
88
+ solr_doc['is_following_page_of_ssi'] = page_ids[this_page_index - 1].presence unless this_page_index.zero?
89
+ solr_doc['is_preceding_page_of_ssi'] = page_ids[this_page_index + 1].presence
90
+ solr_doc['first_page_bsi'] = true if this_page_index.zero?
91
+ end
92
+
93
+ # index the articles info
94
+ #
95
+ # @param page [NewspaperPage]
96
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
97
+ def index_articles(page, solr_doc)
98
+ newspaper_articles = page.articles
99
+ return if newspaper_articles.blank? || !newspaper_articles.first.is_a?(NewspaperArticle)
100
+ solr_doc['article_ids_ssim'] = []
101
+ solr_doc['article_titles_ssim'] = []
102
+ newspaper_articles.each do |n_article|
103
+ solr_doc['article_ids_ssim'] << n_article.id
104
+ solr_doc['article_titles_ssim'] << n_article.title.first
105
+ end
106
+ end
107
+
108
+ # index common facet properties
109
+ # TODO: this could probably be DRY'd out a bit,
110
+ # overlaps with IndexesPlaceOfPublication#index_pop
111
+ #
112
+ # @param parent [NewspaperTitle||NewspaperIssue]
113
+ # @param solr_doc [Hash] the hash of field data to be pushed to Solr
114
+ def index_parent_facets(parent, solr_doc)
115
+ parent_doc = parent.to_solr
116
+ fields = %w[language_sim place_of_publication_label_sim
117
+ place_of_publication_city_sim place_of_publication_county_sim
118
+ place_of_publication_state_sim place_of_publication_country_sim
119
+ place_of_publication_llsim place_of_publication_label_tesim]
120
+ fields.each do |field|
121
+ solr_doc[field] ||= parent_doc[field]
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,16 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperArticle`
3
+ class NewspaperArticleIndexer < NewspaperWorks::NewspaperCoreIndexer
4
+ def generate_solr_document
5
+ super.tap do |solr_doc|
6
+ # index the labels for the genre URIs, as searchable and facetable
7
+ article_genre_service = Hyrax::ArticleGenreService.new
8
+ genre_labels = []
9
+ object.genre.each do |value|
10
+ genre_labels << article_genre_service.label(value) { value }
11
+ end
12
+ solr_doc['genre_tesim'] = genre_labels.presence
13
+ solr_doc['genre_sim'] = genre_labels.presence
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperContainer`
3
+ class NewspaperContainerIndexer < NewspaperWorks::NewspaperCoreIndexer
4
+ # This indexes the default metadata. You can remove it if you want to
5
+ # provide your own metadata and indexing.
6
+ # include Hyrax::IndexesBasicMetadata
7
+
8
+ # Fetch remote labels for based_near. You can remove this if you don't want
9
+ # this behavior
10
+ # include Hyrax::IndexesLinkedMetadata
11
+
12
+ # Uncomment this block if you want to add custom indexing behavior:
13
+ # def generate_solr_document
14
+ # super.tap do |solr_doc|
15
+ # solr_doc['my_custom_field_ssim'] = object.my_custom_property
16
+ # end
17
+ # end
18
+ end
@@ -0,0 +1,26 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperIssue`
3
+ class NewspaperIssueIndexer < NewspaperWorks::NewspaperCoreIndexer
4
+ # This indexes the default metadata. You can remove it if you want to
5
+ # provide your own metadata and indexing.
6
+ # include Hyrax::IndexesBasicMetadata
7
+
8
+ # Fetch remote labels for based_near. You can remove this if you don't want
9
+ # this behavior
10
+ # include Hyrax::IndexesLinkedMetadata
11
+
12
+ # Uncomment this block if you want to add custom indexing behavior:
13
+ def generate_solr_document
14
+ super.tap do |solr_doc|
15
+ # set manually to ensure correct field type (_dtsi)
16
+ if object.publication_date =~ /\A\d{4}-\d{2}-\d{2}\z/
17
+ solr_doc['publication_date_ssi'] = nil
18
+ solr_doc['publication_date_dtsi'] = object.publication_date.to_datetime
19
+ end
20
+
21
+ # if edition number is not set, add a default
22
+ # to support ChronAm-style URL pattern linking
23
+ solr_doc['edition_number_tesim'] ||= '1'
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ class NewspaperPageIndexer < NewspaperWorks::NewspaperCoreIndexer
2
+ include NewspaperWorks::IndexesFullText
3
+
4
+ def generate_solr_document
5
+ super.tap do |solr_doc|
6
+ index_full_text(object, solr_doc)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,19 @@
1
+ # Generated via
2
+ # `rails generate hyrax:work NewspaperTitle`
3
+ class NewspaperTitleIndexer < NewspaperWorks::NewspaperCoreIndexer
4
+ # This indexes the default metadata. You can remove it if you want to
5
+ # provide your own metadata and indexing.
6
+ # include Hyrax::IndexesBasicMetadata
7
+ include NewspaperWorks::IndexesPublicationDateRange
8
+
9
+ # Fetch remote labels for based_near. You can remove this if you don't want
10
+ # this behavior
11
+ # include Hyrax::IndexesLinkedMetadata
12
+
13
+ def generate_solr_document
14
+ super.tap do |solr_doc|
15
+ index_pubdate_start(object.publication_date_start, solr_doc) if object.publication_date_start.present?
16
+ index_pubdate_end(object.publication_date_end, solr_doc) if object.publication_date_end.present?
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ # Core indexer for newspaper work types
2
+ module NewspaperWorks
3
+ class NewspaperCoreIndexer < Hyrax::WorkIndexer
4
+ # This indexes the default metadata. You can remove it if you want to
5
+ # provide your own metadata and indexing.
6
+ include Hyrax::IndexesBasicMetadata
7
+ include NewspaperWorks::IndexesPlaceOfPublication
8
+ include NewspaperWorks::IndexesRelationships
9
+
10
+ # Fetch remote labels for based_near. You can remove this if you don't want
11
+ # this behavior
12
+ # include Hyrax::IndexesLinkedMetadata
13
+
14
+ def generate_solr_document
15
+ super.tap do |solr_doc|
16
+ index_pop(object, solr_doc)
17
+ index_relationships(object, solr_doc)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,4 @@
1
+ module NewspaperWorks
2
+ class ApplicationJob < ActiveJob::Base
3
+ end
4
+ end