newspaper_works 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperIssue`
|
3
|
+
module Hyrax
|
4
|
+
# Newspaper Issue Form Class
|
5
|
+
class NewspaperIssueForm < ::NewspaperWorks::NewspaperCoreFormData
|
6
|
+
self.model_class = ::NewspaperIssue
|
7
|
+
self.terms += [:alternative_title, :volume, :edition_number, :edition_name,
|
8
|
+
:issue_number, :extent, :publication_date]
|
9
|
+
self.terms -= [:creator, :contributor, :description, :subject]
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperPage`
|
3
|
+
module Hyrax
|
4
|
+
# Newspaper Page Form Class
|
5
|
+
class NewspaperPageForm < Hyrax::Forms::WorkForm
|
6
|
+
self.model_class = ::NewspaperPage
|
7
|
+
self.terms += [:height, :width, :resource_type, :text_direction,
|
8
|
+
:page_number, :section]
|
9
|
+
self.terms -= [:creator, :keyword, :rights_statement, :contributor,
|
10
|
+
:description, :license, :subject, :date_created, :subject,
|
11
|
+
:language, :based_near, :related_url, :source,
|
12
|
+
:resource_type, :publisher]
|
13
|
+
self.required_fields -= [:creator, :keyword, :rights_statement]
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperTitle`
|
3
|
+
module Hyrax
|
4
|
+
# Newspaper Title Form Class
|
5
|
+
class NewspaperTitleForm < ::NewspaperWorks::NewspaperCoreFormData
|
6
|
+
self.model_class = ::NewspaperTitle
|
7
|
+
self.terms += [:alternative_title, :edition_name, :frequency, :preceded_by,
|
8
|
+
:succeeded_by, :publication_date_start,
|
9
|
+
:publication_date_end]
|
10
|
+
self.terms -= [:creator, :contributor, :description, :source, :subject]
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
class NewspaperCoreFormData < Hyrax::Forms::WorkForm
|
3
|
+
self.terms += [:resource_type, :place_of_publication, :issn, :lccn,
|
4
|
+
:oclcnum, :held_by]
|
5
|
+
self.terms -= [:based_near, :date_created, :keyword, :related_url, :source]
|
6
|
+
self.required_fields += [:resource_type, :language, :held_by]
|
7
|
+
self.required_fields -= [:creator, :keyword, :rights_statement]
|
8
|
+
|
9
|
+
def self.build_permitted_params
|
10
|
+
super + [
|
11
|
+
{
|
12
|
+
place_of_publication_attributes: [:id, :_destroy]
|
13
|
+
}
|
14
|
+
]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
module BreadcrumbHelper
|
3
|
+
# create an array of links representing the ancestors of the current object
|
4
|
+
#
|
5
|
+
# @param presenter [Newspaper*Presenter] the presenter for the current Newspaper object
|
6
|
+
# @param link_class [String] the class for the breadcrumb links
|
7
|
+
def newspaper_breadcrumbs(presenter, link_class = nil)
|
8
|
+
breadcrumbs = []
|
9
|
+
ancestors = { title: :publication_id, issue: :issue_id, page: :page_ids }
|
10
|
+
ancestors.each do |k, v|
|
11
|
+
breadcrumbs << create_breadcrumb_link(k, presenter, link_class) if presenter.respond_to?(v)
|
12
|
+
end
|
13
|
+
breadcrumbs << breadcrumb_object_title(presenter.title.first)
|
14
|
+
breadcrumbs.flatten
|
15
|
+
end
|
16
|
+
|
17
|
+
# create an array of links representing ancestors of the current object
|
18
|
+
#
|
19
|
+
# @param object_type [Symbol] the type of newspaper object, as a symbol (e.g. :issue)
|
20
|
+
# @param presenter [Newspaper*Presenter] the presenter for the current Newspaper object
|
21
|
+
# @param link_class [String] the class for the breadcrumb links
|
22
|
+
def create_breadcrumb_link(object_type, presenter, link_class = nil)
|
23
|
+
links = []
|
24
|
+
case object_type
|
25
|
+
when :title
|
26
|
+
links << breadcrumb_object_link(object_type, presenter.publication_id,
|
27
|
+
presenter.publication_title, link_class)
|
28
|
+
when :issue
|
29
|
+
links << breadcrumb_object_link(object_type, presenter.issue_id,
|
30
|
+
breadcrumb_object_title(presenter.issue_title), link_class)
|
31
|
+
when :page
|
32
|
+
unless presenter.page_ids.blank? || presenter.page_titles.blank?
|
33
|
+
presenter.page_ids.each_with_index do |id, index|
|
34
|
+
links << breadcrumb_object_link(object_type, id, breadcrumb_object_title(presenter.page_titles[index]),
|
35
|
+
link_class)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
links
|
40
|
+
end
|
41
|
+
|
42
|
+
# create a link for an ancestor of the current object
|
43
|
+
#
|
44
|
+
# @param object_type [Symbol] the type of newspaper object, as a symbol (e.g. :issue)
|
45
|
+
# @param id [String] the id of the ancestor Newspaper object
|
46
|
+
# @param title [String] the title of the ancestor Newspaper object
|
47
|
+
# @param link_class [String] the class for the breadcrumb links
|
48
|
+
def breadcrumb_object_link(object_type, id, title, link_class = nil)
|
49
|
+
return [] unless id && title
|
50
|
+
link_path = "hyrax_newspaper_#{object_type}_path"
|
51
|
+
link_to(title,
|
52
|
+
main_app.send(link_path, id),
|
53
|
+
class: link_class)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Format link titles for ancestor link. Should return either the portion of
|
57
|
+
# the title that describes the page number or a formatted date. If neither
|
58
|
+
# is found, will return back the original title variable
|
59
|
+
#
|
60
|
+
# @param title [String] the title of the ancestor Newspaper object
|
61
|
+
def breadcrumb_object_title(title)
|
62
|
+
return nil unless title.is_a? String
|
63
|
+
page_slice_start_index = title.downcase =~ /page/
|
64
|
+
return title[page_slice_start_index..-1] if page_slice_start_index
|
65
|
+
begin
|
66
|
+
return title.to_date.strftime("%B %e, %Y")
|
67
|
+
rescue ArgumentError
|
68
|
+
return title
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# create link to the previous NewspaperPage
|
73
|
+
#
|
74
|
+
# @param presenter [NewspaperPagePresenter] presenter for current NewspaperPage object
|
75
|
+
# @param options [Hash] hash of link options
|
76
|
+
def previous_page_link(presenter, options = {})
|
77
|
+
link_to("<< #{t('hyrax.newspaper_page.previous_page')}",
|
78
|
+
main_app.hyrax_newspaper_page_path(presenter.previous_page_id),
|
79
|
+
options)
|
80
|
+
end
|
81
|
+
|
82
|
+
# create link to the next NewspaperPage
|
83
|
+
#
|
84
|
+
# @param presenter [NewspaperPagePresenter] presenter for current NewspaperPage object
|
85
|
+
# @param options [Hash] hash of link options
|
86
|
+
def next_page_link(presenter, options = {})
|
87
|
+
link_to("#{t('hyrax.newspaper_page.next_page')} >>",
|
88
|
+
main_app.hyrax_newspaper_page_path(presenter.next_page_id),
|
89
|
+
options)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
module NewspaperWorksHelperBehavior
|
3
|
+
##
|
4
|
+
# create link anchor to be read by UniversalViewer
|
5
|
+
# in order to show keyword search
|
6
|
+
# @param query_params_hash [Hash] current_search_session.query_params
|
7
|
+
# @return [String] or [nil] anchor
|
8
|
+
def iiif_search_anchor(query_params_hash)
|
9
|
+
query = search_query(query_params_hash)
|
10
|
+
return nil if query.blank?
|
11
|
+
"?h=#{query}"
|
12
|
+
end
|
13
|
+
|
14
|
+
##
|
15
|
+
# get the query, which may be in a different object,
|
16
|
+
# depending if regular search or newspapers_search was run
|
17
|
+
# @param query_params_hash [Hash] current_search_session.query_params
|
18
|
+
# @return [String] or [nil] query
|
19
|
+
def search_query(query_params_hash)
|
20
|
+
query_params_hash[:q] || query_params_hash[:all_fields]
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# based on Blacklight::CatalogHelperBehavior#render_thumbnail_tag
|
25
|
+
# setup the thumbnail link for a NewspaperPage or Article
|
26
|
+
#
|
27
|
+
# @param document [SolrDocument]
|
28
|
+
# @param query_params_hash [Hash] current_search_session.query_params
|
29
|
+
# @return [String]
|
30
|
+
def render_newspaper_thumbnail_tag(document, query_params_hash)
|
31
|
+
thumbnail = newspaper_thumbnail_tag(document)
|
32
|
+
return unless thumbnail
|
33
|
+
anchor = iiif_search_anchor(query_params_hash)
|
34
|
+
case document[blacklight_config.view_config(document_index_view_type).display_type_field].first
|
35
|
+
when 'NewspaperPage'
|
36
|
+
link_to(thumbnail, hyrax_newspaper_page_path(document.id, anchor: anchor))
|
37
|
+
when 'NewspaperArticle'
|
38
|
+
link_to(thumbnail, hyrax_newspaper_article_path(document.id, anchor: anchor))
|
39
|
+
else
|
40
|
+
link_to_document document, thumbnail
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# based on Blacklight::CatalogHelperBehavior#render_thumbnail_tag
|
46
|
+
# return the thumbnail image_tag
|
47
|
+
#
|
48
|
+
# @param document [SolrDocument]
|
49
|
+
# @return [String]
|
50
|
+
def newspaper_thumbnail_tag(document)
|
51
|
+
if blacklight_config.view_config(document_index_view_type).thumbnail_method
|
52
|
+
send(blacklight_config.view_config(document_index_view_type).thumbnail_method,
|
53
|
+
document)
|
54
|
+
elsif blacklight_config.view_config(document_index_view_type).thumbnail_field
|
55
|
+
url = thumbnail_url(document)
|
56
|
+
image_tag url if url.present?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# return the matching highlighted terms from Solr highlight field
|
62
|
+
#
|
63
|
+
# @param document [SolrDocument]
|
64
|
+
# @param hl_fl [String] the name of the Solr field with highlights
|
65
|
+
# @param hl_tag [String] the HTML element name used for marking highlights
|
66
|
+
# configured in Solr as hl.tag.pre value
|
67
|
+
# @return [String]
|
68
|
+
def highlight_matches(document, hl_fl, hl_tag)
|
69
|
+
hl_matches = []
|
70
|
+
# regex: find all chars between hl_tag, but NOT other <element>
|
71
|
+
regex = /<#{hl_tag}>[^<>]+<\/#{hl_tag}>/
|
72
|
+
hls = document.highlight_field(hl_fl)
|
73
|
+
return nil unless hls.present?
|
74
|
+
hls.each do |hl|
|
75
|
+
matches = hl.scan(regex)
|
76
|
+
matches.each do |match|
|
77
|
+
hl_matches << match.gsub(/<[\/]*#{hl_tag}>/, '').downcase
|
78
|
+
end
|
79
|
+
end
|
80
|
+
hl_matches.uniq.sort.join(' ')
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# print the ocr snippets. if more than one, separate with <br/>
|
85
|
+
#
|
86
|
+
# @param options [Hash] options hash provided by Blacklight
|
87
|
+
# @return [String] snippets HTML to be rendered
|
88
|
+
# rubocop:disable Rails/OutputSafety
|
89
|
+
def render_ocr_snippets(options = {})
|
90
|
+
snippets = options[:value]
|
91
|
+
snippets_content = [content_tag('div',
|
92
|
+
"... #{snippets.first} ...".html_safe,
|
93
|
+
class: 'ocr_snippet first_snippet')]
|
94
|
+
if snippets.length > 1
|
95
|
+
snippets_content << render(partial: 'catalog/snippets_more',
|
96
|
+
locals: { snippets: snippets.drop(1),
|
97
|
+
options: options })
|
98
|
+
end
|
99
|
+
snippets_content.join("\n").html_safe
|
100
|
+
end
|
101
|
+
# rubocop:enable Rails/OutputSafety
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# indexes the full text of a Newspaper object
|
2
|
+
module NewspaperWorks
|
3
|
+
module IndexesFullText
|
4
|
+
# index full text
|
5
|
+
# load text from plain text derivative
|
6
|
+
# index as *both* stored (for highlighting) and non-stored (Hyrax default) text field
|
7
|
+
#
|
8
|
+
# @param work [Newspaper*] an instance of a NewspaperWorks model
|
9
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
10
|
+
def index_full_text(work, solr_doc)
|
11
|
+
text = NewspaperWorks::Data::WorkDerivatives.new(work).data('txt')
|
12
|
+
text = text.gsub(/\n/, ' ').squeeze(' ')
|
13
|
+
solr_doc['all_text_timv'] = text
|
14
|
+
solr_doc['all_text_tsimv'] = text
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# indexes the place_of_publication field
|
2
|
+
module NewspaperWorks
|
3
|
+
module IndexesPlaceOfPublication
|
4
|
+
# wrapper for methods for indexing place_of_publication values
|
5
|
+
#
|
6
|
+
# @param object [Newspaper*] an instance of a NewspaperWorks model
|
7
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
8
|
+
def index_pop(object, solr_doc)
|
9
|
+
return unless object.respond_to?(:place_of_publication)
|
10
|
+
object.place_of_publication.each do |pop|
|
11
|
+
next unless pop.is_a?(ActiveTriples::Resource)
|
12
|
+
geonames_id = pop.id.match(/[\d]{4,}/).to_s
|
13
|
+
geodata = get_geodata(geonames_id)
|
14
|
+
return false if geodata.blank?
|
15
|
+
add_geodata_fields(solr_doc)
|
16
|
+
index_pop_geodata(geodata, solr_doc)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# adds empty placeholder fields to solr_doc for incoming geodata
|
21
|
+
#
|
22
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
23
|
+
def add_geodata_fields(solr_doc)
|
24
|
+
%w[city county state country].each do |place|
|
25
|
+
solr_doc["place_of_publication_#{place}_sim"] ||= []
|
26
|
+
end
|
27
|
+
solr_doc['place_of_publication_label_tesim'] ||= []
|
28
|
+
solr_doc['place_of_publication_label_sim'] ||= []
|
29
|
+
solr_doc['place_of_publication_llsim'] ||= []
|
30
|
+
end
|
31
|
+
|
32
|
+
# adds geographic data to solr_doc Hash, with fields for
|
33
|
+
# city, county, state, country, coordinates
|
34
|
+
#
|
35
|
+
# @param geodata [Hash] hash of GeoNames data returned by #get_geodata
|
36
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
37
|
+
def index_pop_geodata(geodata, solr_doc)
|
38
|
+
city = geodata['name']
|
39
|
+
county = geodata['adminName2']
|
40
|
+
state = geodata['adminName1']
|
41
|
+
country = geodata['countryName']
|
42
|
+
solr_doc['place_of_publication_city_sim'] << city
|
43
|
+
solr_doc['place_of_publication_county_sim'] << county
|
44
|
+
solr_doc['place_of_publication_state_sim'] << state
|
45
|
+
solr_doc['place_of_publication_country_sim'] << country
|
46
|
+
display_name = [city, state, country].compact.join(', ')
|
47
|
+
solr_doc['place_of_publication_label_tesim'] << display_name
|
48
|
+
solr_doc['place_of_publication_label_sim'] << display_name
|
49
|
+
return unless geodata['lat'] && geodata['lng']
|
50
|
+
# TODO: this should use a Solr location_rpt field type
|
51
|
+
solr_doc['place_of_publication_llsim'] << "#{geodata['lat']},#{geodata['lng']}"
|
52
|
+
end
|
53
|
+
|
54
|
+
# fetch data from GeoNames API
|
55
|
+
#
|
56
|
+
# @param geoname_id [String] GeoNames id of geographic entity
|
57
|
+
# @return [Hash] GeoNames API response as Hash
|
58
|
+
def get_geodata(geoname_id)
|
59
|
+
return false if geoname_id.to_i.zero?
|
60
|
+
geonames_un = Qa::Authorities::Geonames.username
|
61
|
+
return false unless geonames_un
|
62
|
+
geonames_url = "http://api.geonames.org/getJSON?geonameId=#{geoname_id}&username=#{geonames_un}"
|
63
|
+
resp = Faraday.new(geonames_url).get
|
64
|
+
JSON.parse(resp.body)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# indexes the publication_date_start and _end fields
|
2
|
+
module NewspaperWorks
|
3
|
+
module IndexesPublicationDateRange
|
4
|
+
# adds publication date start to solr_doc Hash in Solr datetime format
|
5
|
+
#
|
6
|
+
# @param pubdate [String] publication start date
|
7
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
8
|
+
def index_pubdate_start(pubdate, solr_doc)
|
9
|
+
case pubdate
|
10
|
+
when /\A\d{4}\z/
|
11
|
+
solr_doc['publication_date_start_dtsi'] = "#{pubdate}-01-01".to_datetime
|
12
|
+
when /\A\d{4}-\d{2}\z/
|
13
|
+
solr_doc['publication_date_start_dtsi'] = "#{pubdate}-01".to_datetime
|
14
|
+
end
|
15
|
+
solr_doc['publication_date_start_ssi'] = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
# adds publication date end to solr_doc Hash in Solr datetime format
|
19
|
+
#
|
20
|
+
# @param pubdate [String] publication end date
|
21
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
22
|
+
def index_pubdate_end(pubdate, solr_doc)
|
23
|
+
end_time = 'T23:59:59Z'
|
24
|
+
case pubdate
|
25
|
+
when /\A\d{4}\z/
|
26
|
+
solr_doc['publication_date_end_dtsi'] = "#{pubdate}-12-31#{end_time}"
|
27
|
+
when /\A\d{4}-\d{2}\z/
|
28
|
+
date_split = pubdate.split('-')
|
29
|
+
end_day = Date.new(date_split[0].to_i, date_split[1].to_i, -1).strftime('%d')
|
30
|
+
solr_doc['publication_date_end_dtsi'] = "#{pubdate}-#{end_day}#{end_time}"
|
31
|
+
end
|
32
|
+
solr_doc['publication_date_end_ssi'] = nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# indexes parent relationships e.g. issue->title, page->issue, etc
|
2
|
+
module NewspaperWorks
|
3
|
+
module IndexesRelationships
|
4
|
+
# index relationships
|
5
|
+
#
|
6
|
+
# @param object [Newspaper*] an instance of a NewspaperWorks model
|
7
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
8
|
+
def index_relationships(object, solr_doc)
|
9
|
+
index_publication(object, solr_doc) unless object.is_a?(NewspaperTitle)
|
10
|
+
case object
|
11
|
+
when NewspaperPage
|
12
|
+
index_issue(object, solr_doc)
|
13
|
+
index_container(object, solr_doc)
|
14
|
+
index_articles(object, solr_doc)
|
15
|
+
index_siblings(object, solr_doc)
|
16
|
+
when NewspaperArticle
|
17
|
+
index_issue(object, solr_doc)
|
18
|
+
index_pages(object, solr_doc)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# index the publication info
|
23
|
+
#
|
24
|
+
# @param object [Newspaper*] an instance of a NewspaperWorks model
|
25
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
26
|
+
def index_publication(object, solr_doc)
|
27
|
+
newspaper_title = object.publication
|
28
|
+
return unless newspaper_title.is_a?(NewspaperTitle)
|
29
|
+
solr_doc['publication_id_ssi'] = newspaper_title.id
|
30
|
+
solr_doc['publication_title_ssi'] = newspaper_title.title.first
|
31
|
+
publication_unique_id = newspaper_title.send(NewspaperWorks.config.publication_unique_id_property)
|
32
|
+
solr_doc['publication_unique_id_ssi'] = publication_unique_id
|
33
|
+
index_parent_facets(newspaper_title, solr_doc)
|
34
|
+
end
|
35
|
+
|
36
|
+
# index the container info
|
37
|
+
#
|
38
|
+
# @param page [NewspaperPage]
|
39
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
40
|
+
def index_container(page, solr_doc)
|
41
|
+
newspaper_container = page.container
|
42
|
+
return unless newspaper_container.is_a?(NewspaperContainer)
|
43
|
+
solr_doc['container_id_ssi'] = newspaper_container.id
|
44
|
+
solr_doc['container_title_ssi'] = newspaper_container.title.first
|
45
|
+
end
|
46
|
+
|
47
|
+
# index the issue info
|
48
|
+
#
|
49
|
+
# @param object [NewspaperPage||NewspaperArticle]
|
50
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
51
|
+
def index_issue(object, solr_doc)
|
52
|
+
newspaper_issue = object.issue
|
53
|
+
return unless newspaper_issue.is_a?(NewspaperIssue)
|
54
|
+
solr_doc['issue_id_ssi'] = newspaper_issue.id
|
55
|
+
solr_doc['issue_title_ssi'] = newspaper_issue.title.first
|
56
|
+
solr_doc['publication_date_dtsi'] ||= newspaper_issue.publication_date.to_datetime if newspaper_issue.publication_date.present?
|
57
|
+
solr_doc['issue_volume_ssi'] = newspaper_issue.volume
|
58
|
+
solr_doc['issue_edition_number_ssi'] = newspaper_issue.edition_number || '1'
|
59
|
+
solr_doc['issue_number_ssi'] = newspaper_issue.issue_number
|
60
|
+
index_parent_facets(newspaper_issue, solr_doc)
|
61
|
+
end
|
62
|
+
|
63
|
+
# index the pages info
|
64
|
+
#
|
65
|
+
# @param article [NewspaperArticle]
|
66
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
67
|
+
def index_pages(article, solr_doc)
|
68
|
+
newspaper_pages = article.pages
|
69
|
+
return if newspaper_pages.blank? || !newspaper_pages.first.is_a?(NewspaperPage)
|
70
|
+
solr_doc['page_ids_ssim'] = []
|
71
|
+
solr_doc['page_titles_ssim'] = []
|
72
|
+
newspaper_pages.each do |n_page|
|
73
|
+
solr_doc['page_ids_ssim'] << n_page.id
|
74
|
+
solr_doc['page_titles_ssim'] << n_page.title.first
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# index previous/next siblings info
|
79
|
+
#
|
80
|
+
# @param page [NewspaperPage]
|
81
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
82
|
+
def index_siblings(page, solr_doc)
|
83
|
+
newspaper_issue = page.issue
|
84
|
+
return unless newspaper_issue.is_a?(NewspaperIssue)
|
85
|
+
page_ids = newspaper_issue.ordered_page_ids
|
86
|
+
this_page_index = page_ids.index(page.id)
|
87
|
+
return unless this_page_index
|
88
|
+
solr_doc['is_following_page_of_ssi'] = page_ids[this_page_index - 1].presence unless this_page_index.zero?
|
89
|
+
solr_doc['is_preceding_page_of_ssi'] = page_ids[this_page_index + 1].presence
|
90
|
+
solr_doc['first_page_bsi'] = true if this_page_index.zero?
|
91
|
+
end
|
92
|
+
|
93
|
+
# index the articles info
|
94
|
+
#
|
95
|
+
# @param page [NewspaperPage]
|
96
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
97
|
+
def index_articles(page, solr_doc)
|
98
|
+
newspaper_articles = page.articles
|
99
|
+
return if newspaper_articles.blank? || !newspaper_articles.first.is_a?(NewspaperArticle)
|
100
|
+
solr_doc['article_ids_ssim'] = []
|
101
|
+
solr_doc['article_titles_ssim'] = []
|
102
|
+
newspaper_articles.each do |n_article|
|
103
|
+
solr_doc['article_ids_ssim'] << n_article.id
|
104
|
+
solr_doc['article_titles_ssim'] << n_article.title.first
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# index common facet properties
|
109
|
+
# TODO: this could probably be DRY'd out a bit,
|
110
|
+
# overlaps with IndexesPlaceOfPublication#index_pop
|
111
|
+
#
|
112
|
+
# @param parent [NewspaperTitle||NewspaperIssue]
|
113
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
114
|
+
def index_parent_facets(parent, solr_doc)
|
115
|
+
parent_doc = parent.to_solr
|
116
|
+
fields = %w[language_sim place_of_publication_label_sim
|
117
|
+
place_of_publication_city_sim place_of_publication_county_sim
|
118
|
+
place_of_publication_state_sim place_of_publication_country_sim
|
119
|
+
place_of_publication_llsim place_of_publication_label_tesim]
|
120
|
+
fields.each do |field|
|
121
|
+
solr_doc[field] ||= parent_doc[field]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperArticle`
|
3
|
+
class NewspaperArticleIndexer < NewspaperWorks::NewspaperCoreIndexer
|
4
|
+
def generate_solr_document
|
5
|
+
super.tap do |solr_doc|
|
6
|
+
# index the labels for the genre URIs, as searchable and facetable
|
7
|
+
article_genre_service = Hyrax::ArticleGenreService.new
|
8
|
+
genre_labels = []
|
9
|
+
object.genre.each do |value|
|
10
|
+
genre_labels << article_genre_service.label(value) { value }
|
11
|
+
end
|
12
|
+
solr_doc['genre_tesim'] = genre_labels.presence
|
13
|
+
solr_doc['genre_sim'] = genre_labels.presence
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperContainer`
|
3
|
+
class NewspaperContainerIndexer < NewspaperWorks::NewspaperCoreIndexer
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
5
|
+
# provide your own metadata and indexing.
|
6
|
+
# include Hyrax::IndexesBasicMetadata
|
7
|
+
|
8
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
9
|
+
# this behavior
|
10
|
+
# include Hyrax::IndexesLinkedMetadata
|
11
|
+
|
12
|
+
# Uncomment this block if you want to add custom indexing behavior:
|
13
|
+
# def generate_solr_document
|
14
|
+
# super.tap do |solr_doc|
|
15
|
+
# solr_doc['my_custom_field_ssim'] = object.my_custom_property
|
16
|
+
# end
|
17
|
+
# end
|
18
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperIssue`
|
3
|
+
class NewspaperIssueIndexer < NewspaperWorks::NewspaperCoreIndexer
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
5
|
+
# provide your own metadata and indexing.
|
6
|
+
# include Hyrax::IndexesBasicMetadata
|
7
|
+
|
8
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
9
|
+
# this behavior
|
10
|
+
# include Hyrax::IndexesLinkedMetadata
|
11
|
+
|
12
|
+
# Uncomment this block if you want to add custom indexing behavior:
|
13
|
+
def generate_solr_document
|
14
|
+
super.tap do |solr_doc|
|
15
|
+
# set manually to ensure correct field type (_dtsi)
|
16
|
+
if object.publication_date =~ /\A\d{4}-\d{2}-\d{2}\z/
|
17
|
+
solr_doc['publication_date_ssi'] = nil
|
18
|
+
solr_doc['publication_date_dtsi'] = object.publication_date.to_datetime
|
19
|
+
end
|
20
|
+
|
21
|
+
# if edition number is not set, add a default
|
22
|
+
# to support ChronAm-style URL pattern linking
|
23
|
+
solr_doc['edition_number_tesim'] ||= '1'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Generated via
|
2
|
+
# `rails generate hyrax:work NewspaperTitle`
|
3
|
+
class NewspaperTitleIndexer < NewspaperWorks::NewspaperCoreIndexer
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
5
|
+
# provide your own metadata and indexing.
|
6
|
+
# include Hyrax::IndexesBasicMetadata
|
7
|
+
include NewspaperWorks::IndexesPublicationDateRange
|
8
|
+
|
9
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
10
|
+
# this behavior
|
11
|
+
# include Hyrax::IndexesLinkedMetadata
|
12
|
+
|
13
|
+
def generate_solr_document
|
14
|
+
super.tap do |solr_doc|
|
15
|
+
index_pubdate_start(object.publication_date_start, solr_doc) if object.publication_date_start.present?
|
16
|
+
index_pubdate_end(object.publication_date_end, solr_doc) if object.publication_date_end.present?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Core indexer for newspaper work types
|
2
|
+
module NewspaperWorks
|
3
|
+
class NewspaperCoreIndexer < Hyrax::WorkIndexer
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
5
|
+
# provide your own metadata and indexing.
|
6
|
+
include Hyrax::IndexesBasicMetadata
|
7
|
+
include NewspaperWorks::IndexesPlaceOfPublication
|
8
|
+
include NewspaperWorks::IndexesRelationships
|
9
|
+
|
10
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
11
|
+
# this behavior
|
12
|
+
# include Hyrax::IndexesLinkedMetadata
|
13
|
+
|
14
|
+
def generate_solr_document
|
15
|
+
super.tap do |solr_doc|
|
16
|
+
index_pop(object, solr_doc)
|
17
|
+
index_relationships(object, solr_doc)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|