newspaper_works 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (461) hide show
  1. checksums.yaml +7 -0
  2. data/.fcrepo_wrapper +4 -0
  3. data/.gitignore +43 -0
  4. data/.rubocop.yml +143 -0
  5. data/.solr_wrapper +8 -0
  6. data/.travis.yml +50 -0
  7. data/Gemfile +47 -0
  8. data/LICENSE +203 -0
  9. data/README.md +159 -0
  10. data/Rakefile +38 -0
  11. data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
  12. data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
  13. data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
  14. data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
  15. data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
  16. data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
  17. data/app/assets/config/newspaper_works_manifest.js +2 -0
  18. data/app/assets/images/newspaper_works/.keep +0 -0
  19. data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
  20. data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
  21. data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
  22. data/app/assets/javascripts/newspaper_works.js +4 -0
  23. data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
  24. data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
  25. data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
  26. data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
  27. data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
  28. data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
  29. data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
  30. data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
  31. data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
  32. data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
  33. data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
  34. data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
  35. data/app/forms/hyrax/newspaper_article_form.rb +11 -0
  36. data/app/forms/hyrax/newspaper_container_form.rb +11 -0
  37. data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
  38. data/app/forms/hyrax/newspaper_page_form.rb +15 -0
  39. data/app/forms/hyrax/newspaper_title_form.rb +12 -0
  40. data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
  41. data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
  42. data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
  43. data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
  44. data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
  45. data/app/helpers/newspaper_works/application_helper.rb +5 -0
  46. data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
  47. data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
  48. data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
  49. data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
  50. data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
  51. data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
  52. data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
  53. data/app/indexers/newspaper_article_indexer.rb +16 -0
  54. data/app/indexers/newspaper_container_indexer.rb +18 -0
  55. data/app/indexers/newspaper_issue_indexer.rb +26 -0
  56. data/app/indexers/newspaper_page_indexer.rb +9 -0
  57. data/app/indexers/newspaper_title_indexer.rb +19 -0
  58. data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
  59. data/app/jobs/newspaper_works/application_job.rb +4 -0
  60. data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
  61. data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
  62. data/app/mailers/newspaper_works/application_mailer.rb +8 -0
  63. data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
  64. data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
  65. data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
  66. data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
  67. data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
  68. data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
  69. data/app/models/file_set.rb +10 -0
  70. data/app/models/newspaper_article.rb +158 -0
  71. data/app/models/newspaper_container.rb +86 -0
  72. data/app/models/newspaper_issue.rb +115 -0
  73. data/app/models/newspaper_page.rb +70 -0
  74. data/app/models/newspaper_title.rb +111 -0
  75. data/app/models/newspaper_works/application_record.rb +6 -0
  76. data/app/models/newspaper_works/derivative_attachment.rb +8 -0
  77. data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
  78. data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
  79. data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
  80. data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
  81. data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
  82. data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
  83. data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
  84. data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
  85. data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
  86. data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
  87. data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
  88. data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
  89. data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
  90. data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
  91. data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
  92. data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
  93. data/app/services/hyrax/article_genre_service.rb +9 -0
  94. data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
  95. data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
  96. data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
  97. data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
  98. data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
  99. data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
  100. data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
  101. data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
  102. data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
  103. data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
  104. data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
  105. data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
  106. data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
  107. data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
  108. data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
  109. data/app/views/catalog/_snippets_more.html.erb +16 -0
  110. data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
  111. data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
  112. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  113. data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
  114. data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
  115. data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
  116. data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
  117. data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
  118. data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
  119. data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
  120. data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
  121. data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
  122. data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
  123. data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
  124. data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
  125. data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
  126. data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
  127. data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
  128. data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
  129. data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
  130. data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
  131. data/app/views/newspaper_works/base/_show.html.erb +45 -0
  132. data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
  133. data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
  134. data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
  135. data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
  136. data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
  137. data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
  138. data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
  139. data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
  140. data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
  141. data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
  142. data/app/views/records/edit_fields/_genre.html.erb +4 -0
  143. data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
  144. data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
  145. data/bin/rails +13 -0
  146. data/config/fcrepo_wrapper_test.yml +5 -0
  147. data/config/initializers/assets.rb +2 -0
  148. data/config/locales/newspaper_article.de.yml +12 -0
  149. data/config/locales/newspaper_article.en.yml +12 -0
  150. data/config/locales/newspaper_article.es.yml +12 -0
  151. data/config/locales/newspaper_article.fr.yml +12 -0
  152. data/config/locales/newspaper_article.it.yml +12 -0
  153. data/config/locales/newspaper_article.pt-BR.yml +12 -0
  154. data/config/locales/newspaper_article.zh.yml +12 -0
  155. data/config/locales/newspaper_container.de.yml +8 -0
  156. data/config/locales/newspaper_container.en.yml +8 -0
  157. data/config/locales/newspaper_container.es.yml +8 -0
  158. data/config/locales/newspaper_container.fr.yml +8 -0
  159. data/config/locales/newspaper_container.it.yml +8 -0
  160. data/config/locales/newspaper_container.pt-BR.yml +8 -0
  161. data/config/locales/newspaper_container.zh.yml +8 -0
  162. data/config/locales/newspaper_issue.de.yml +8 -0
  163. data/config/locales/newspaper_issue.en.yml +8 -0
  164. data/config/locales/newspaper_issue.es.yml +8 -0
  165. data/config/locales/newspaper_issue.fr.yml +8 -0
  166. data/config/locales/newspaper_issue.it.yml +8 -0
  167. data/config/locales/newspaper_issue.pt-BR.yml +8 -0
  168. data/config/locales/newspaper_issue.zh.yml +8 -0
  169. data/config/locales/newspaper_page.de.yml +15 -0
  170. data/config/locales/newspaper_page.en.yml +15 -0
  171. data/config/locales/newspaper_page.es.yml +15 -0
  172. data/config/locales/newspaper_page.fr.yml +15 -0
  173. data/config/locales/newspaper_page.it.yml +15 -0
  174. data/config/locales/newspaper_page.pt-BR.yml +15 -0
  175. data/config/locales/newspaper_page.zh.yml +15 -0
  176. data/config/locales/newspaper_title.de.yml +8 -0
  177. data/config/locales/newspaper_title.en.yml +8 -0
  178. data/config/locales/newspaper_title.es.yml +8 -0
  179. data/config/locales/newspaper_title.fr.yml +8 -0
  180. data/config/locales/newspaper_title.it.yml +8 -0
  181. data/config/locales/newspaper_title.pt-BR.yml +8 -0
  182. data/config/locales/newspaper_title.zh.yml +8 -0
  183. data/config/locales/newspaper_works.de.yml +50 -0
  184. data/config/locales/newspaper_works.en.yml +52 -0
  185. data/config/locales/newspaper_works.es.yml +52 -0
  186. data/config/locales/newspaper_works.fr.yml +52 -0
  187. data/config/locales/newspaper_works.it.yml +52 -0
  188. data/config/locales/newspaper_works.pt-BR.yml +52 -0
  189. data/config/locales/newspaper_works.zh.yml +52 -0
  190. data/config/routes.rb +9 -0
  191. data/config/solr_wrapper_test.yml +9 -0
  192. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  193. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  194. data/config/test-fixture/solr-config/elevate.xml +36 -0
  195. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  196. data/config/test-fixture/solr-config/protwords.txt +21 -0
  197. data/config/test-fixture/solr-config/schema.xml +366 -0
  198. data/config/test-fixture/solr-config/scripts.conf +24 -0
  199. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  200. data/config/test-fixture/solr-config/spellings.txt +2 -0
  201. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  202. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  203. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  204. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  205. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  206. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  207. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  208. data/config/vendor/imagemagick-6-policy.xml +76 -0
  209. data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
  210. data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
  211. data/lib/generators/newspaper_works/assets_generator.rb +29 -0
  212. data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
  213. data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
  214. data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
  215. data/lib/generators/newspaper_works/install_generator.rb +97 -0
  216. data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
  217. data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
  218. data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
  219. data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
  220. data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
  221. data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
  222. data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
  223. data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
  224. data/lib/newspaper_works/configuration.rb +14 -0
  225. data/lib/newspaper_works/data/fileset_helper.rb +25 -0
  226. data/lib/newspaper_works/data/path_helper.rb +40 -0
  227. data/lib/newspaper_works/data/work_derivatives.rb +314 -0
  228. data/lib/newspaper_works/data/work_file.rb +92 -0
  229. data/lib/newspaper_works/data/work_files.rb +181 -0
  230. data/lib/newspaper_works/data.rb +35 -0
  231. data/lib/newspaper_works/engine.rb +42 -0
  232. data/lib/newspaper_works/errors.rb +14 -0
  233. data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
  234. data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
  235. data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
  236. data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
  237. data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
  238. data/lib/newspaper_works/ingest/from_command.rb +52 -0
  239. data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
  240. data/lib/newspaper_works/ingest/issue_images.rb +51 -0
  241. data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
  242. data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
  243. data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
  244. data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
  245. data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
  246. data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
  247. data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
  248. data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
  249. data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
  250. data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
  251. data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
  252. data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
  253. data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
  254. data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
  255. data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
  256. data/lib/newspaper_works/ingest/ndnp.rb +21 -0
  257. data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
  258. data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
  259. data/lib/newspaper_works/ingest/page_image.rb +52 -0
  260. data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
  261. data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
  262. data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
  263. data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
  264. data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
  265. data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
  266. data/lib/newspaper_works/ingest/publication_info.rb +44 -0
  267. data/lib/newspaper_works/ingest.rb +90 -0
  268. data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
  269. data/lib/newspaper_works/logging.rb +54 -0
  270. data/lib/newspaper_works/page_finder.rb +62 -0
  271. data/lib/newspaper_works/resource_fetcher.rb +78 -0
  272. data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
  273. data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
  274. data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
  275. data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
  276. data/lib/newspaper_works/text_extraction.rb +10 -0
  277. data/lib/newspaper_works/version.rb +3 -0
  278. data/lib/newspaper_works.rb +19 -0
  279. data/lib/tasks/newspaper_works_tasks.rake +39 -0
  280. data/newspaper_works.gemspec +49 -0
  281. data/spec/.keep.txt +1 -0
  282. data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
  283. data/spec/controllers/catalog_controller_spec.rb +63 -0
  284. data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
  285. data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
  286. data/spec/factories/ability.rb +6 -0
  287. data/spec/factories/newspaper_issue.rb +7 -0
  288. data/spec/factories/newspaper_issue_ingest.rb +6 -0
  289. data/spec/factories/newspaper_page.rb +7 -0
  290. data/spec/factories/newspaper_page_ingest.rb +6 -0
  291. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  292. data/spec/factories/newspaper_title.rb +8 -0
  293. data/spec/factories/uploaded_pdf_file.rb +9 -0
  294. data/spec/factories/user.rb +13 -0
  295. data/spec/features/front_pages_for_title_spec.rb +19 -0
  296. data/spec/features/newspaper_title_search_spec.rb +30 -0
  297. data/spec/features/newspapers_search_spec.rb +49 -0
  298. data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
  299. data/spec/features_shared.rb +71 -0
  300. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  301. data/spec/fixtures/files/4.1.07.tiff +0 -0
  302. data/spec/fixtures/files/README.md +7 -0
  303. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  304. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  305. data/spec/fixtures/files/credits.md +16 -0
  306. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  307. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  308. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  309. data/spec/fixtures/files/minimal-alto.xml +31 -0
  310. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  311. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  312. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  313. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  314. data/spec/fixtures/files/ocr_alto.xml +202 -0
  315. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  316. data/spec/fixtures/files/ocr_color.tiff +0 -0
  317. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  318. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  319. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  320. data/spec/fixtures/files/page1.tiff +0 -0
  321. data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
  322. data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
  323. data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
  324. data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
  325. data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
  326. data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
  327. data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
  328. data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
  329. data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
  330. data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
  331. data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
  332. data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
  333. data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
  334. data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
  335. data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
  336. data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
  337. data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
  338. data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
  339. data/spec/fixtures/files/resource_mocks/urls.json +82 -0
  340. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  341. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  342. data/spec/fixtures/files/thumbnail.jpg +0 -0
  343. data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
  344. data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
  345. data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
  346. data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
  347. data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
  348. data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
  349. data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
  350. data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
  351. data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
  352. data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
  353. data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
  354. data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
  355. data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
  356. data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
  357. data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
  358. data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
  359. data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
  360. data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
  361. data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
  362. data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
  363. data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
  364. data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
  365. data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
  366. data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
  367. data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
  368. data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
  369. data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
  370. data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
  371. data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
  372. data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
  373. data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
  374. data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
  375. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
  376. data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
  377. data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
  378. data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
  379. data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
  380. data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
  381. data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
  382. data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
  383. data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
  384. data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
  385. data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
  386. data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
  387. data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
  388. data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
  389. data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
  390. data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
  391. data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
  392. data/spec/lib/newspaper_works/logging_spec.rb +53 -0
  393. data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
  394. data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
  395. data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
  396. data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
  397. data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
  398. data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
  399. data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
  400. data/spec/misc_shared.rb +109 -0
  401. data/spec/model_shared.rb +134 -0
  402. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
  403. data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
  404. data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
  405. data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
  406. data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
  407. data/spec/models/newspaper_article_spec.rb +73 -0
  408. data/spec/models/newspaper_container_spec.rb +111 -0
  409. data/spec/models/newspaper_issue_spec.rb +91 -0
  410. data/spec/models/newspaper_page_spec.rb +44 -0
  411. data/spec/models/newspaper_title_spec.rb +116 -0
  412. data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
  413. data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
  414. data/spec/models/solr_document_spec.rb +14 -0
  415. data/spec/ndnp_shared.rb +48 -0
  416. data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
  417. data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
  418. data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
  419. data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
  420. data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
  421. data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
  422. data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
  423. data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
  424. data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
  425. data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
  426. data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
  427. data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
  428. data/spec/routing/route_spec.rb +52 -0
  429. data/spec/search_builders/custom_search_builder_spec.rb +34 -0
  430. data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
  431. data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
  432. data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
  433. data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
  434. data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
  435. data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
  436. data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
  437. data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
  438. data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
  439. data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
  440. data/spec/spec_helper.rb +261 -0
  441. data/spec/support/controller_level_helpers.rb +28 -0
  442. data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
  443. data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
  444. data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
  445. data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
  446. data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
  447. data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
  448. data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
  449. data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
  450. data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
  451. data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
  452. data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
  453. data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
  454. data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
  455. data/tasks/newspaperworks_dev.rake +26 -0
  456. data/test/integration/navigation_test.rb +7 -0
  457. data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
  458. data/test/newspaper_works_test.rb +7 -0
  459. data/test/test_helper.rb +17 -0
  460. data/tmp/.keep +0 -0
  461. metadata +1037 -0
@@ -0,0 +1,120 @@
1
+ require 'open3'
2
+
3
+ module NewspaperWorks
4
+ class JP2DerivativeService < NewspaperPageDerivativeService
5
+ # OpenJPEG 2000 Command to make NDNP-compliant grayscale JP2:
6
+ CMD_GRAY = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
7
+ '-d 0,0 -b 64,64 -n 6 -p RLCP -t 1024,1024 -I -M 1 ' \
8
+ '-r 64,53.821,45.249,40,32,26.911,22.630,20,16,14.286,' \
9
+ '11.364,10,8,6.667,5.556,4.762,4,3.333,2.857,2.500,2,' \
10
+ '1.667,1.429,1.190,1'.freeze
11
+
12
+ # OpenJPEG 2000 Command to make RGB JP2:
13
+ CMD_COLOR = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
14
+ '-d 0,0 -b 64,64 -n 6 -p RPCL -t 1024,1024 -I -M 1 '\
15
+ '-r 2.4,1.48331273,.91673033,.56657224,.35016049,.21641118,' \
16
+ '.13374944,.0944,.08266171'.freeze
17
+
18
+ # OpenJPEG 1.x command replacement for 2.x opj_compress, takes same options;
19
+ # this is necessary on Ubuntu Trusty (e.g. Travis CI)
20
+ CMD_1X = 'image_to_j2k'.freeze
21
+
22
+ # Target file extension of this service plugin:
23
+ TARGET_EXT = 'jp2'.freeze
24
+
25
+ attr_accessor :source_meta
26
+ attr_reader :file_set
27
+ delegate :uri, :mime_type, to: :file_set
28
+
29
+ def initialize(file_set)
30
+ # cached result string for imagemagick `identify` command
31
+ @source_meta = nil
32
+ @command = nil
33
+ @unlink_after_creation = []
34
+ super(file_set)
35
+ end
36
+
37
+ def create_derivatives(filename)
38
+ # Base class takes care of loading @source_path, @dest_path
39
+ super(filename)
40
+
41
+ # no creation if jp2 master => deemed unnecessary/duplicative
42
+ return if mime_type == 'image/jp2'
43
+
44
+ # if we have a non-TIFF source, or a 1-bit monochrome source, we need
45
+ # to make a NetPBM-based intermediate (temporary) file for OpenJPEG
46
+ # to consume.
47
+ needs_intermediate = !tiff_source? || one_bit?
48
+
49
+ # We use either intermediate temp file, or temp symlink (to work
50
+ # around OpenJPEG 2000 file naming quirk).
51
+ needs_intermediate ? make_intermediate_source : make_symlink
52
+
53
+ # Get OpenJPEG command, rendered with source, destination, appropriate
54
+ # to either color or grayscale source
55
+ render_cmd = opj_command
56
+
57
+ # Run the generated command to make derivative file at @dest_path
58
+ `#{render_cmd}`
59
+
60
+ # Clean up any intermediate files or symlinks used during creation
61
+ cleanup_intermediate
62
+ end
63
+
64
+ private
65
+
66
+ # source introspection:
67
+
68
+ def tiff_source?
69
+ identify.include?('TIFF')
70
+ end
71
+
72
+ def make_symlink
73
+ # OpenJPEG binaries have annoying quirk of only using TIFF input
74
+ # files whose name ends in .TIF or .tif (three letter); for all
75
+ # non-monochrome TIFF files, we just assume we need to symlink
76
+ # to such a filename.
77
+ tmpname = File.join(Dir.tmpdir, "#{SecureRandom.uuid}.tif")
78
+ FileUtils.ln_s(@source_path, tmpname)
79
+ @unlink_after_creation.push(tmpname)
80
+ # finally, point @source_path for command at intermediate link:
81
+ @source_path = tmpname
82
+ end
83
+
84
+ def make_intermediate_source
85
+ # generate a random filename to be made, with appropriate extension,
86
+ # inside /tmp dir:
87
+ tmpname = File.join(
88
+ Dir.tmpdir,
89
+ format(
90
+ "#{SecureRandom.uuid}.%<ext>s",
91
+ ext: use_color? ? 'ppm' : 'pgm'
92
+ )
93
+ )
94
+ # if pdf source, get only first page
95
+ source_path = @source_path
96
+ source_path += '[0]' if @source_path.ends_with?('pdf')
97
+ # Use ImageMagick `convert` to create intermediate bitmap:
98
+ `convert #{source_path} #{tmpname}`
99
+ @unlink_after_creation.push(tmpname)
100
+ # finally, point @source_path for command at intermediate file:
101
+ @source_path = tmpname
102
+ end
103
+
104
+ def opj_command
105
+ # Get a command template appropriate to OpenJPEG 1.x or 2.x
106
+ use_openjpeg_1x = `which opj_compress`.empty?
107
+ cmd = use_color? ? CMD_COLOR : CMD_GRAY
108
+ cmd = cmd.sub('opj_compress', 'image_to_j2k') if use_openjpeg_1x
109
+ # return command with source and destination file names injected
110
+ format(cmd, source_file: @source_path, out_file: @dest_path)
111
+ end
112
+
113
+ def cleanup_intermediate
114
+ # remove symlink or intermediate file once we no longer need
115
+ @unlink_after_creation.each do |path|
116
+ FileUtils.rm(path)
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,91 @@
1
+ module NewspaperWorks
2
+ # Base type for derivative services specific to NewspaperPage only
3
+ class NewspaperPageDerivativeService
4
+ attr_reader :file_set, :master_format
5
+ delegate :uri, :mime_type, to: :file_set
6
+
7
+ TARGET_EXT = nil
8
+
9
+ def self.target_ext
10
+ self::TARGET_EXT
11
+ end
12
+
13
+ def initialize(file_set)
14
+ @file_set = file_set
15
+ @dest_path = nil
16
+ @source_path = nil
17
+ @source_meta = nil
18
+ end
19
+
20
+ def valid?
21
+ parent = file_set.in_works[0]
22
+ # fallback to Fedora-stored relationships if work's aggregation of
23
+ # file set is not indexed in Solr
24
+ parent = file_set.member_of.select(&:work?)[0] if parent.nil?
25
+ parent.class == NewspaperPage
26
+ end
27
+
28
+ def derivative_path_factory
29
+ Hyrax::DerivativePath
30
+ end
31
+
32
+ # prepare full path for passed extension/destination name, return path
33
+ def prepare_path(extension)
34
+ dest_path = derivative_path_factory.derivative_path_for_reference(
35
+ @file_set,
36
+ extension
37
+ )
38
+ dir = File.join(dest_path.split('/')[0..-2])
39
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
40
+ dest_path
41
+ end
42
+
43
+ # calculate and ensure directory components for singular @dest_path
44
+ # should only be used by subclasses producing a single derivative
45
+ def load_destpath
46
+ @dest_path = prepare_path(self.class.target_ext)
47
+ end
48
+
49
+ def identify
50
+ if @source_meta.nil?
51
+ path = @source_path
52
+ cmd = "identify #{path}"
53
+ # fallback to graphicsmagick if source is jp2, as Ubuntu 16.10
54
+ # ImageMagick has no jp2 support.
55
+ cmd = 'gm ' + cmd if path.ends_with?('jp2')
56
+ Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
57
+ @source_meta = stdout.read
58
+ end
59
+ end
60
+ @source_meta
61
+ end
62
+
63
+ def use_color?
64
+ # imagemagick `identify` output describes color space:
65
+ !(identify.include?('Gray') || one_bit?)
66
+ end
67
+
68
+ # is source one-bit monochrome?
69
+ def one_bit?
70
+ identify.include?('1-bit')
71
+ end
72
+
73
+ def create_derivatives(filename)
74
+ # presuming that filename is full path to source file
75
+ @source_path = filename
76
+
77
+ # Get destination path from Hyrax for file extension defined in
78
+ # TARGET_EXT constant on respective derivative service subclass.
79
+ load_destpath
80
+ end
81
+
82
+ def cleanup_derivatives(*args)
83
+ target_ext = args && args[0] ? args[0] : self.class.target_ext
84
+ derivative_path_factory.derivatives_for_reference(file_set).each do |path|
85
+ FileUtils.rm_f(path) if path.ends_with?(target_ext)
86
+ end
87
+ end
88
+
89
+ # def cleanup_derivatives; end
90
+ end
91
+ end
@@ -0,0 +1,45 @@
1
+ require 'open3'
2
+
3
+ module NewspaperWorks
4
+ class PDFDerivativeService < NewspaperPageDerivativeService
5
+ TARGET_EXT = 'pdf'.freeze
6
+
7
+ # PDF (JPEG, 8 bit grayscale), 150ppi
8
+ GRAY_PDF_CMD = 'convert %<source_file>s ' \
9
+ '-resize 1800 -density 150 ' \
10
+ '-depth 8 -colorspace Gray ' \
11
+ '-compress jpeg %<out_file>s'.freeze
12
+
13
+ # sRBG color PDF (JPEG, 8 bits per channel), 150ppi
14
+ COLOR_PDF_CMD = 'convert %<source_file>s ' \
15
+ '-resize 1800 -density 150 ' \
16
+ '-depth 8 ' \
17
+ '-compress jpeg %<out_file>s'.freeze
18
+
19
+ # graphicsmagick prefix, may be needed for jp2 source on Ubuntu
20
+ GM_PREFX = 'gm '.freeze
21
+
22
+ def initialize(file_set)
23
+ super(file_set)
24
+ end
25
+
26
+ # Get conversion command; command varies on whether or not we have
27
+ # JP2 source, and whether we have color or grayscale material.
28
+ def convert_cmd
29
+ template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
30
+ cmd = format(template, source_file: @source_path, out_file: @dest_path)
31
+ @source_path.ends_with?('jp2') ? GM_PREFIX + cmd : cmd
32
+ end
33
+
34
+ def create_derivatives(filename)
35
+ # Base class takes care of loading @source_path, @dest_path
36
+ super(filename)
37
+
38
+ # no creation if pdf master
39
+ return if mime_type == 'application/pdf'
40
+
41
+ # Get and run imagemagick or graphicsmagick command
42
+ `#{convert_cmd}`
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,114 @@
1
+ # General derivative service for NewspaperWorks, which is meant to wrap
2
+ # and replace the stock Hyrax::FileSetDerivativeService with a proxy
3
+ # that runs one or more derivative service "plugin" components.
4
+ #
5
+ # Note: Hyrax::DerivativeService consumes this, instead of (directly)
6
+ # consuming Hyrax::FileSetDerivativeService.
7
+ #
8
+ # Unlike the "run the first valid plugin" arrangement that the
9
+ # Hyrax::DerivativeService uses to run an actual derivative creation
10
+ # service component, this component is:
11
+ #
12
+ # (a) Consumed by Hyrax::DerivativeService as that first valid plugin;
13
+ #
14
+ # (b) Wraps and runs 0..* plugins, not just the first.
15
+ #
16
+ # This should be registered to take precedence over default by:
17
+ # Hyrax::DerivativeService.services.unshift(
18
+ # NewspaperWorks::PluggableDerivativeService
19
+ # )
20
+ #
21
+ # Modify NewspaperWorks::PluggableDerivativeService.plugins
22
+ # to add, remove, or reorder plugin (derivative service) classes.
23
+ #
24
+ class NewspaperWorks::PluggableDerivativeService
25
+ attr_reader :file_set
26
+ delegate :uri, :mime_type, to: :file_set
27
+
28
+ # default plugin Hyrax OOTB, makes thumbnails and sometimes extracts text:
29
+ default_plugin = Hyrax::FileSetDerivativesService
30
+
31
+ # make and expose an array of plugins
32
+ @plugins = [default_plugin]
33
+ @allowed_methods = [:cleanup_derivatives, :create_derivatives]
34
+ class << self
35
+ attr_accessor :plugins, :allowed_methods
36
+ end
37
+
38
+ def plugins
39
+ self.class.plugins
40
+ end
41
+
42
+ def initialize(file_set)
43
+ @file_set = file_set
44
+ end
45
+
46
+ def valid?
47
+ # this wrapper/proxy/composite is always valid, but it may compose
48
+ # multiple plugins, some of which may or may not be valid, so
49
+ # validity checks happen within as well.
50
+ true
51
+ end
52
+
53
+ def respond_to_missing?(method_name)
54
+ self.class.allowed_methods.include?(method_name) || super
55
+ end
56
+
57
+ # get derivative services relevant to method name and file_set context
58
+ # -- omits plugins if particular destination exists or will soon.
59
+ def services(method_name)
60
+ result = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
61
+ result.select do |plugin|
62
+ dest = nil
63
+ dest = plugin.class.target_ext if plugin.class.respond_to?(:target_ext)
64
+ !skip_destination?(method_name, dest)
65
+ end
66
+ end
67
+
68
+ def method_missing(name, *args, **opts, &block)
69
+ if respond_to_missing?(name)
70
+ # we have an allowed method, construct services and include all valid
71
+ # services for the file_set
72
+ # services = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
73
+ # run all valid services, in order:
74
+ services(name).each do |plugin|
75
+ plugin.send(name, *args)
76
+ end
77
+ else
78
+ super
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ def skip_destination?(method_name, destination_name)
85
+ return false if file_set.id.nil? || destination_name.nil?
86
+ return false unless method_name == :create_derivatives
87
+ # skip :create_derivatives if existing --> do not re-create
88
+ existing_derivative?(destination_name) ||
89
+ impending_derivative?(destination_name)
90
+ end
91
+
92
+ def existing_derivative?(name)
93
+ path = derivative_path_factory.derivative_path_for_reference(
94
+ file_set,
95
+ name
96
+ )
97
+ File.exist?(path)
98
+ end
99
+
100
+ # is there an impending attachment from ingest logged to db?
101
+ # -- avoids stomping over pre-made derivative
102
+ # for which an attachment is still in-progress.
103
+ def impending_derivative?(name)
104
+ result = NewspaperWorks::DerivativeAttachment.find_by(
105
+ fileset_id: file_set.id,
106
+ destination_name: name
107
+ )
108
+ !result.nil?
109
+ end
110
+
111
+ def derivative_path_factory
112
+ Hyrax::DerivativePath
113
+ end
114
+ end
@@ -0,0 +1,56 @@
1
+ module NewspaperWorks
2
+ class TextExtractionDerivativeService < NewspaperPageDerivativeService
3
+ def initialize(file_set)
4
+ super(file_set)
5
+ @alto_path = nil
6
+ @txt_path = nil
7
+ end
8
+
9
+ def create_derivatives(src)
10
+ from_alto = NewspaperWorks::TextFormatsFromALTOService.new(
11
+ file_set
12
+ )
13
+ return from_alto.create_derivatives(src) unless from_alto.alto_path.nil?
14
+ create_derivatives_from_ocr(src)
15
+ end
16
+
17
+ def create_derivatives_from_ocr(filename)
18
+ @source_path = filename
19
+ # prepare destination directory for ALTO (as .xml files):
20
+ @alto_path = prepare_path('xml')
21
+ # prepare destination directory for plain text (as .txt files):
22
+ @txt_path = prepare_path('txt')
23
+ # prepare destination directory for flat JSON (as .json files):
24
+ @json_path = prepare_path('json')
25
+ ocr = NewspaperWorks::TextExtraction::PageOCR.new(filename)
26
+ # OCR will run once, on first method call to either .alto or .plain:
27
+ write_plain_text(ocr.plain)
28
+ write_alto(ocr.alto)
29
+ write_json(ocr.word_json)
30
+ end
31
+
32
+ def write_alto(xml)
33
+ File.open(@alto_path, 'w') do |outfile|
34
+ outfile.write(xml)
35
+ end
36
+ end
37
+
38
+ def write_plain_text(text)
39
+ File.open(@txt_path, 'w') do |outfile|
40
+ outfile.write(text)
41
+ end
42
+ end
43
+
44
+ def write_json(text)
45
+ File.open(@json_path, 'w') do |outfile|
46
+ outfile.write(text)
47
+ end
48
+ end
49
+
50
+ def cleanup_derivatives
51
+ super('txt')
52
+ super('xml')
53
+ super('json')
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,77 @@
1
+ module NewspaperWorks
2
+ # Plugin to make text format derviatives (JSON, plain-text) from ALTO,
3
+ # either existing derivative, or an impending attachment.
4
+ # NOTE: to keep this from conflicting with TextExtractionDerivativeService,
5
+ # this class should be invoked by it, not PluggableDerivativeService.
6
+ class TextFormatsFromALTOService < NewspaperPageDerivativeService
7
+ TARGET_EXT = 'tiff'.freeze
8
+
9
+ def save_derivative(destination, data)
10
+ # Load/prepare base of "pairtree" dir structure for extension, fileset
11
+ prepare_path(destination)
12
+ #
13
+ save_path = derivative_path_factory.derivative_path_for_reference(
14
+ @file_set,
15
+ destination
16
+ )
17
+ # Write data as UTF-8 encoded text
18
+ File.open(save_path, "w:UTF-8") do |f|
19
+ f.write(data)
20
+ end
21
+ end
22
+
23
+ def nonempty_file?(path)
24
+ return false if path.nil?
25
+ return false unless File.exist?(path)
26
+ !File.size(path).zero?
27
+ end
28
+
29
+ # if there was no derivative yet, there might be one in-transit from
30
+ # an ingest, so check for that, and use its source if applicable:
31
+ def incoming_alto_path
32
+ path = NewspaperWorks::DerivativeAttachment.where(
33
+ fileset_id: @file_set.id,
34
+ destination_name: 'xml'
35
+ ).pluck(:path).uniq.first
36
+ path if nonempty_file?(path)
37
+ end
38
+
39
+ def alto_path
40
+ # check first for existing, non-empty derivative data:
41
+ path = derivative_path_factory.derivative_path_for_reference(
42
+ @file_set,
43
+ 'xml'
44
+ )
45
+ return path if nonempty_file?(path)
46
+ incoming_alto_path
47
+ end
48
+
49
+ def alto
50
+ path = alto_path
51
+ File.read(path, encoding: 'UTF-8') unless path.nil?
52
+ end
53
+
54
+ def create_derivatives(_filename)
55
+ # as this plugin makes derivatives of derivative, _filename is ignored
56
+ source_file = alto
57
+ return if source_file.nil?
58
+ # Image width from characterized primary file helps ensure proper scaling:
59
+ file = @file_set.original_file
60
+ width = file.nil? ? nil : file.width[0].to_i
61
+ height = file.nil? ? nil : file.height[0].to_i
62
+ # ALTOReader is responsible for transcoding, this class just saves result
63
+ reader = NewspaperWorks::TextExtraction::AltoReader.new(
64
+ source_file,
65
+ width,
66
+ height
67
+ )
68
+ save_derivative('json', reader.json)
69
+ save_derivative('txt', reader.text)
70
+ end
71
+
72
+ def cleanup_derivatives(*args)
73
+ # do nothing here; NewspaperWorks::TextExtractionDerivativeService
74
+ # has this job instead for cleaning ALTO, JSON, TXT.
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,54 @@
1
+ require 'open3'
2
+
3
+ module NewspaperWorks
4
+ class TIFFDerivativeService < NewspaperPageDerivativeService
5
+ TARGET_EXT = 'tiff'.freeze
6
+
7
+ # For imagemagick commands, the output type is determined by the
8
+ # output file's extension.
9
+ # TIFF (LZW, 8 bit grayscale)
10
+ GRAY_CMD = 'convert %<source_file>s ' \
11
+ '-depth 8 -colorspace Gray ' \
12
+ '-compress lzw %<out_file>s'.freeze
13
+
14
+ # Monochrome one-bit black/white TIFF, Group 4 compressed:
15
+ MONO_CMD = 'convert %<source_file>s ' \
16
+ '-depth 1 -monochrome -compress Group4 -type bilevel ' \
17
+ '%<out_file>s'.freeze
18
+
19
+ # sRBG color TIFF (8 bits per channel, lzw)
20
+ COLOR_CMD = 'convert %<source_file>s ' \
21
+ '-depth 24 ' \
22
+ '-compress lzw %<out_file>s'.freeze
23
+
24
+ # graphicsmagick prefix, may be needed for jp2 source on Ubuntu
25
+ GM_PREFX = 'gm '.freeze
26
+
27
+ def initialize(file_set)
28
+ super(file_set)
29
+ end
30
+
31
+ # Get conversion command; command varies on whether or not we have
32
+ # JP2 source, and whether we have color or grayscale material.
33
+ def convert_cmd
34
+ source_path = @source_path
35
+ source_path += '[0]' if @source_path.ends_with?('pdf')
36
+ template = use_color? ? COLOR_CMD : GRAY_CMD
37
+ template = MONO_CMD if one_bit?
38
+ cmd = format(template, source_file: source_path, out_file: @dest_path)
39
+ # normalization of command based on source
40
+ @source_path.ends_with?('jp2') ? GM_PREFIX + cmd : cmd
41
+ end
42
+
43
+ def create_derivatives(filename)
44
+ # Base class takes care of loading @source_path, @dest_path
45
+ super(filename)
46
+
47
+ # no creation if pdf master
48
+ return if mime_type == 'image/tiff'
49
+
50
+ # Get and run imagemagick or graphicsmagick command
51
+ `#{convert_cmd}`
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,48 @@
1
+ module NewspaperWorks
2
+ # validates start and end date are properly formatted and end date comes after
3
+ # or on the same date as the start date.
4
+ class PublicationDateStartEndValidator < ActiveModel::Validator
5
+ DATE_RANGE_REGEX = /\A\d{4}(-((0[1-9])|(1[0-2])))?(-(([0-2][1-9])|3[0-1]))?\z/
6
+
7
+ def validate(record)
8
+ start_date = record.publication_date_start
9
+ end_date = record.publication_date_end
10
+ valid_dates?(start_date, end_date, record) && start_before_end?(start_date, end_date, record)
11
+ end
12
+
13
+ private
14
+
15
+ def publication_date_valid?(pub_date)
16
+ return false unless DATE_RANGE_REGEX.match(pub_date)
17
+ date_split = pub_date.split("-").map(&:to_i)
18
+ return false if date_split.length == 3 &&
19
+ !Date.valid_date?(date_split[0], date_split[1], date_split[2])
20
+ true
21
+ end
22
+
23
+ def start_before_end?(start_date, end_date, record)
24
+ return true unless start_date && end_date
25
+ date_error = "Publication start date must be earlier or the same as end date."
26
+ pub_start = start_date.split("-")
27
+ pub_end = end_date.split("-")
28
+ (0..2).each do |i|
29
+ if pub_start[i] && pub_end[i] && pub_end[i] < pub_start[i]
30
+ record.errors[:publication_date_start] << date_error
31
+ break
32
+ end
33
+ end
34
+ record.errors[:publication_date_start].blank?
35
+ end
36
+
37
+ def valid_dates?(start_date, end_date, record)
38
+ date_error = "Incorrect Date. Date input should be formatted yyyy[-mm][-dd] and be a valid date."
39
+ if start_date
40
+ record.errors[:publication_date_start] << date_error unless publication_date_valid?(start_date)
41
+ end
42
+ if end_date
43
+ record.errors[:publication_date_end] << date_error unless publication_date_valid?(end_date)
44
+ end
45
+ record.errors[:publication_date_start].blank? && record.errors[:publication_date_end].blank?
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,16 @@
1
+ module NewspaperWorks
2
+ # validates that a properly formatted date has been entered
3
+ class PublicationDateValidator < ActiveModel::Validator
4
+ DATE_REGEX = /\A\d{4}-((0[1-9])|(1[0-2]))-((0[1-9])|([1-2][0-9])|(3[0-1]))\z/
5
+ def validate(record)
6
+ error_msg = "Incorrect Date. Date input should be formatted yyyy-mm-dd and be a valid date."
7
+ return unless record.publication_date.present?
8
+ unless DATE_REGEX.match(record.publication_date)
9
+ record.errors[:publication_date] << error_msg
10
+ return
11
+ end
12
+ date_split = record.publication_date.split("-").map(&:to_i)
13
+ record.errors[:publication_date] << error_msg unless Date.valid_date?(date_split[0], date_split[1], date_split[2])
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,9 @@
1
+ <div class="document col-xs-6 col-md-3">
2
+ <div class="thumbnail" data-fileset="<%= document.file_set_ids&.first %>" data-query="<%= highlight_matches(document, 'all_text_tsimv', 'em') || search_query(current_search_session.query_params) %>">
3
+ <%= render_newspaper_thumbnail_tag(document,
4
+ current_search_session.query_params) %>
5
+ <div class="caption">
6
+ <%= render_document_partials document, blacklight_config.view_config(:gallery).partials, :document_counter => document_counter %>
7
+ </div>
8
+ </div>
9
+ </div>
@@ -0,0 +1,9 @@
1
+ <div class="document col-xs-6 col-md-3">
2
+ <div class="thumbnail" data-fileset="<%= document.file_set_ids&.first %>" data-query="<%= highlight_matches(document, 'all_text_tsimv', 'em') || search_query(current_search_session.query_params) %>">
3
+ <%= render_newspaper_thumbnail_tag(document,
4
+ current_search_session.query_params) %>
5
+ <div class="caption">
6
+ <%= render_document_partials document, blacklight_config.view_config(:gallery).partials, :document_counter => document_counter %>
7
+ </div>
8
+ </div>
9
+ </div>
@@ -0,0 +1,23 @@
1
+ <%# based on blacklight/app/views/catalog/_index_header_default.html.erb %>
2
+ <%# header bar for doc items in index view -%>
3
+ <div class="documentHeader row">
4
+ <%# main title container for doc partial view
5
+ How many bootstrap columns need to be reserved
6
+ for bookmarks control depends on size.
7
+ -%>
8
+ <% document_actions = capture do %>
9
+ <% # bookmark functions for items/docs -%>
10
+ <%= render_index_doc_actions document, wrapping_class: "index-document-functions col-sm-3 col-lg-2" %>
11
+ <% end %>
12
+ <h3 class="index_title document-title-heading <%= document_actions.present? ? "col-sm-9 col-lg-10" : "col-md-12" %>">
13
+ <% if counter = document_counter_with_offset(document_counter) %>
14
+ <span class="document-counter">
15
+ <%= t('blacklight.search.documents.counter', counter: counter) %>
16
+ </span>
17
+ <% end %>
18
+ <%= link_to document.title_or_label,
19
+ hyrax_newspaper_article_path(document.id,
20
+ anchor: iiif_search_anchor(current_search_session.query_params)) %>
21
+ </h3>
22
+ <%= document_actions %>
23
+ </div>