newspaper_works 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperIssue`
|
|
3
|
+
module Hyrax
|
|
4
|
+
# Newspaper Issue Form Class
|
|
5
|
+
class NewspaperIssueForm < ::NewspaperWorks::NewspaperCoreFormData
|
|
6
|
+
self.model_class = ::NewspaperIssue
|
|
7
|
+
self.terms += [:alternative_title, :volume, :edition_number, :edition_name,
|
|
8
|
+
:issue_number, :extent, :publication_date]
|
|
9
|
+
self.terms -= [:creator, :contributor, :description, :subject]
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperPage`
|
|
3
|
+
module Hyrax
|
|
4
|
+
# Newspaper Page Form Class
|
|
5
|
+
class NewspaperPageForm < Hyrax::Forms::WorkForm
|
|
6
|
+
self.model_class = ::NewspaperPage
|
|
7
|
+
self.terms += [:height, :width, :resource_type, :text_direction,
|
|
8
|
+
:page_number, :section]
|
|
9
|
+
self.terms -= [:creator, :keyword, :rights_statement, :contributor,
|
|
10
|
+
:description, :license, :subject, :date_created, :subject,
|
|
11
|
+
:language, :based_near, :related_url, :source,
|
|
12
|
+
:resource_type, :publisher]
|
|
13
|
+
self.required_fields -= [:creator, :keyword, :rights_statement]
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperTitle`
|
|
3
|
+
module Hyrax
|
|
4
|
+
# Newspaper Title Form Class
|
|
5
|
+
class NewspaperTitleForm < ::NewspaperWorks::NewspaperCoreFormData
|
|
6
|
+
self.model_class = ::NewspaperTitle
|
|
7
|
+
self.terms += [:alternative_title, :edition_name, :frequency, :preceded_by,
|
|
8
|
+
:succeeded_by, :publication_date_start,
|
|
9
|
+
:publication_date_end]
|
|
10
|
+
self.terms -= [:creator, :contributor, :description, :source, :subject]
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
class NewspaperCoreFormData < Hyrax::Forms::WorkForm
|
|
3
|
+
self.terms += [:resource_type, :place_of_publication, :issn, :lccn,
|
|
4
|
+
:oclcnum, :held_by]
|
|
5
|
+
self.terms -= [:based_near, :date_created, :keyword, :related_url, :source]
|
|
6
|
+
self.required_fields += [:resource_type, :language, :held_by]
|
|
7
|
+
self.required_fields -= [:creator, :keyword, :rights_statement]
|
|
8
|
+
|
|
9
|
+
def self.build_permitted_params
|
|
10
|
+
super + [
|
|
11
|
+
{
|
|
12
|
+
place_of_publication_attributes: [:id, :_destroy]
|
|
13
|
+
}
|
|
14
|
+
]
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
module BreadcrumbHelper
|
|
3
|
+
# create an array of links representing the ancestors of the current object
|
|
4
|
+
#
|
|
5
|
+
# @param presenter [Newspaper*Presenter] the presenter for the current Newspaper object
|
|
6
|
+
# @param link_class [String] the class for the breadcrumb links
|
|
7
|
+
def newspaper_breadcrumbs(presenter, link_class = nil)
|
|
8
|
+
breadcrumbs = []
|
|
9
|
+
ancestors = { title: :publication_id, issue: :issue_id, page: :page_ids }
|
|
10
|
+
ancestors.each do |k, v|
|
|
11
|
+
breadcrumbs << create_breadcrumb_link(k, presenter, link_class) if presenter.respond_to?(v)
|
|
12
|
+
end
|
|
13
|
+
breadcrumbs << breadcrumb_object_title(presenter.title.first)
|
|
14
|
+
breadcrumbs.flatten
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# create an array of links representing ancestors of the current object
|
|
18
|
+
#
|
|
19
|
+
# @param object_type [Symbol] the type of newspaper object, as a symbol (e.g. :issue)
|
|
20
|
+
# @param presenter [Newspaper*Presenter] the presenter for the current Newspaper object
|
|
21
|
+
# @param link_class [String] the class for the breadcrumb links
|
|
22
|
+
def create_breadcrumb_link(object_type, presenter, link_class = nil)
|
|
23
|
+
links = []
|
|
24
|
+
case object_type
|
|
25
|
+
when :title
|
|
26
|
+
links << breadcrumb_object_link(object_type, presenter.publication_id,
|
|
27
|
+
presenter.publication_title, link_class)
|
|
28
|
+
when :issue
|
|
29
|
+
links << breadcrumb_object_link(object_type, presenter.issue_id,
|
|
30
|
+
breadcrumb_object_title(presenter.issue_title), link_class)
|
|
31
|
+
when :page
|
|
32
|
+
unless presenter.page_ids.blank? || presenter.page_titles.blank?
|
|
33
|
+
presenter.page_ids.each_with_index do |id, index|
|
|
34
|
+
links << breadcrumb_object_link(object_type, id, breadcrumb_object_title(presenter.page_titles[index]),
|
|
35
|
+
link_class)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
links
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# create a link for an ancestor of the current object
|
|
43
|
+
#
|
|
44
|
+
# @param object_type [Symbol] the type of newspaper object, as a symbol (e.g. :issue)
|
|
45
|
+
# @param id [String] the id of the ancestor Newspaper object
|
|
46
|
+
# @param title [String] the title of the ancestor Newspaper object
|
|
47
|
+
# @param link_class [String] the class for the breadcrumb links
|
|
48
|
+
def breadcrumb_object_link(object_type, id, title, link_class = nil)
|
|
49
|
+
return [] unless id && title
|
|
50
|
+
link_path = "hyrax_newspaper_#{object_type}_path"
|
|
51
|
+
link_to(title,
|
|
52
|
+
main_app.send(link_path, id),
|
|
53
|
+
class: link_class)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Format link titles for ancestor link. Should return either the portion of
|
|
57
|
+
# the title that describes the page number or a formatted date. If neither
|
|
58
|
+
# is found, will return back the original title variable
|
|
59
|
+
#
|
|
60
|
+
# @param title [String] the title of the ancestor Newspaper object
|
|
61
|
+
def breadcrumb_object_title(title)
|
|
62
|
+
return nil unless title.is_a? String
|
|
63
|
+
page_slice_start_index = title.downcase =~ /page/
|
|
64
|
+
return title[page_slice_start_index..-1] if page_slice_start_index
|
|
65
|
+
begin
|
|
66
|
+
return title.to_date.strftime("%B %e, %Y")
|
|
67
|
+
rescue ArgumentError
|
|
68
|
+
return title
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# create link to the previous NewspaperPage
|
|
73
|
+
#
|
|
74
|
+
# @param presenter [NewspaperPagePresenter] presenter for current NewspaperPage object
|
|
75
|
+
# @param options [Hash] hash of link options
|
|
76
|
+
def previous_page_link(presenter, options = {})
|
|
77
|
+
link_to("<< #{t('hyrax.newspaper_page.previous_page')}",
|
|
78
|
+
main_app.hyrax_newspaper_page_path(presenter.previous_page_id),
|
|
79
|
+
options)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# create link to the next NewspaperPage
|
|
83
|
+
#
|
|
84
|
+
# @param presenter [NewspaperPagePresenter] presenter for current NewspaperPage object
|
|
85
|
+
# @param options [Hash] hash of link options
|
|
86
|
+
def next_page_link(presenter, options = {})
|
|
87
|
+
link_to("#{t('hyrax.newspaper_page.next_page')} >>",
|
|
88
|
+
main_app.hyrax_newspaper_page_path(presenter.next_page_id),
|
|
89
|
+
options)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
module NewspaperWorksHelperBehavior
|
|
3
|
+
##
|
|
4
|
+
# create link anchor to be read by UniversalViewer
|
|
5
|
+
# in order to show keyword search
|
|
6
|
+
# @param query_params_hash [Hash] current_search_session.query_params
|
|
7
|
+
# @return [String] or [nil] anchor
|
|
8
|
+
def iiif_search_anchor(query_params_hash)
|
|
9
|
+
query = search_query(query_params_hash)
|
|
10
|
+
return nil if query.blank?
|
|
11
|
+
"?h=#{query}"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
##
|
|
15
|
+
# get the query, which may be in a different object,
|
|
16
|
+
# depending if regular search or newspapers_search was run
|
|
17
|
+
# @param query_params_hash [Hash] current_search_session.query_params
|
|
18
|
+
# @return [String] or [nil] query
|
|
19
|
+
def search_query(query_params_hash)
|
|
20
|
+
query_params_hash[:q] || query_params_hash[:all_fields]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
##
|
|
24
|
+
# based on Blacklight::CatalogHelperBehavior#render_thumbnail_tag
|
|
25
|
+
# setup the thumbnail link for a NewspaperPage or Article
|
|
26
|
+
#
|
|
27
|
+
# @param document [SolrDocument]
|
|
28
|
+
# @param query_params_hash [Hash] current_search_session.query_params
|
|
29
|
+
# @return [String]
|
|
30
|
+
def render_newspaper_thumbnail_tag(document, query_params_hash)
|
|
31
|
+
thumbnail = newspaper_thumbnail_tag(document)
|
|
32
|
+
return unless thumbnail
|
|
33
|
+
anchor = iiif_search_anchor(query_params_hash)
|
|
34
|
+
case document[blacklight_config.view_config(document_index_view_type).display_type_field].first
|
|
35
|
+
when 'NewspaperPage'
|
|
36
|
+
link_to(thumbnail, hyrax_newspaper_page_path(document.id, anchor: anchor))
|
|
37
|
+
when 'NewspaperArticle'
|
|
38
|
+
link_to(thumbnail, hyrax_newspaper_article_path(document.id, anchor: anchor))
|
|
39
|
+
else
|
|
40
|
+
link_to_document document, thumbnail
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
##
|
|
45
|
+
# based on Blacklight::CatalogHelperBehavior#render_thumbnail_tag
|
|
46
|
+
# return the thumbnail image_tag
|
|
47
|
+
#
|
|
48
|
+
# @param document [SolrDocument]
|
|
49
|
+
# @return [String]
|
|
50
|
+
def newspaper_thumbnail_tag(document)
|
|
51
|
+
if blacklight_config.view_config(document_index_view_type).thumbnail_method
|
|
52
|
+
send(blacklight_config.view_config(document_index_view_type).thumbnail_method,
|
|
53
|
+
document)
|
|
54
|
+
elsif blacklight_config.view_config(document_index_view_type).thumbnail_field
|
|
55
|
+
url = thumbnail_url(document)
|
|
56
|
+
image_tag url if url.present?
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
##
|
|
61
|
+
# return the matching highlighted terms from Solr highlight field
|
|
62
|
+
#
|
|
63
|
+
# @param document [SolrDocument]
|
|
64
|
+
# @param hl_fl [String] the name of the Solr field with highlights
|
|
65
|
+
# @param hl_tag [String] the HTML element name used for marking highlights
|
|
66
|
+
# configured in Solr as hl.tag.pre value
|
|
67
|
+
# @return [String]
|
|
68
|
+
def highlight_matches(document, hl_fl, hl_tag)
|
|
69
|
+
hl_matches = []
|
|
70
|
+
# regex: find all chars between hl_tag, but NOT other <element>
|
|
71
|
+
regex = /<#{hl_tag}>[^<>]+<\/#{hl_tag}>/
|
|
72
|
+
hls = document.highlight_field(hl_fl)
|
|
73
|
+
return nil unless hls.present?
|
|
74
|
+
hls.each do |hl|
|
|
75
|
+
matches = hl.scan(regex)
|
|
76
|
+
matches.each do |match|
|
|
77
|
+
hl_matches << match.gsub(/<[\/]*#{hl_tag}>/, '').downcase
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
hl_matches.uniq.sort.join(' ')
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
##
|
|
84
|
+
# print the ocr snippets. if more than one, separate with <br/>
|
|
85
|
+
#
|
|
86
|
+
# @param options [Hash] options hash provided by Blacklight
|
|
87
|
+
# @return [String] snippets HTML to be rendered
|
|
88
|
+
# rubocop:disable Rails/OutputSafety
|
|
89
|
+
def render_ocr_snippets(options = {})
|
|
90
|
+
snippets = options[:value]
|
|
91
|
+
snippets_content = [content_tag('div',
|
|
92
|
+
"... #{snippets.first} ...".html_safe,
|
|
93
|
+
class: 'ocr_snippet first_snippet')]
|
|
94
|
+
if snippets.length > 1
|
|
95
|
+
snippets_content << render(partial: 'catalog/snippets_more',
|
|
96
|
+
locals: { snippets: snippets.drop(1),
|
|
97
|
+
options: options })
|
|
98
|
+
end
|
|
99
|
+
snippets_content.join("\n").html_safe
|
|
100
|
+
end
|
|
101
|
+
# rubocop:enable Rails/OutputSafety
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# indexes the full text of a Newspaper object
|
|
2
|
+
module NewspaperWorks
|
|
3
|
+
module IndexesFullText
|
|
4
|
+
# index full text
|
|
5
|
+
# load text from plain text derivative
|
|
6
|
+
# index as *both* stored (for highlighting) and non-stored (Hyrax default) text field
|
|
7
|
+
#
|
|
8
|
+
# @param work [Newspaper*] an instance of a NewspaperWorks model
|
|
9
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
10
|
+
def index_full_text(work, solr_doc)
|
|
11
|
+
text = NewspaperWorks::Data::WorkDerivatives.new(work).data('txt')
|
|
12
|
+
text = text.gsub(/\n/, ' ').squeeze(' ')
|
|
13
|
+
solr_doc['all_text_timv'] = text
|
|
14
|
+
solr_doc['all_text_tsimv'] = text
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# indexes the place_of_publication field
|
|
2
|
+
module NewspaperWorks
|
|
3
|
+
module IndexesPlaceOfPublication
|
|
4
|
+
# wrapper for methods for indexing place_of_publication values
|
|
5
|
+
#
|
|
6
|
+
# @param object [Newspaper*] an instance of a NewspaperWorks model
|
|
7
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
8
|
+
def index_pop(object, solr_doc)
|
|
9
|
+
return unless object.respond_to?(:place_of_publication)
|
|
10
|
+
object.place_of_publication.each do |pop|
|
|
11
|
+
next unless pop.is_a?(ActiveTriples::Resource)
|
|
12
|
+
geonames_id = pop.id.match(/[\d]{4,}/).to_s
|
|
13
|
+
geodata = get_geodata(geonames_id)
|
|
14
|
+
return false if geodata.blank?
|
|
15
|
+
add_geodata_fields(solr_doc)
|
|
16
|
+
index_pop_geodata(geodata, solr_doc)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# adds empty placeholder fields to solr_doc for incoming geodata
|
|
21
|
+
#
|
|
22
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
23
|
+
def add_geodata_fields(solr_doc)
|
|
24
|
+
%w[city county state country].each do |place|
|
|
25
|
+
solr_doc["place_of_publication_#{place}_sim"] ||= []
|
|
26
|
+
end
|
|
27
|
+
solr_doc['place_of_publication_label_tesim'] ||= []
|
|
28
|
+
solr_doc['place_of_publication_label_sim'] ||= []
|
|
29
|
+
solr_doc['place_of_publication_llsim'] ||= []
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# adds geographic data to solr_doc Hash, with fields for
|
|
33
|
+
# city, county, state, country, coordinates
|
|
34
|
+
#
|
|
35
|
+
# @param geodata [Hash] hash of GeoNames data returned by #get_geodata
|
|
36
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
37
|
+
def index_pop_geodata(geodata, solr_doc)
|
|
38
|
+
city = geodata['name']
|
|
39
|
+
county = geodata['adminName2']
|
|
40
|
+
state = geodata['adminName1']
|
|
41
|
+
country = geodata['countryName']
|
|
42
|
+
solr_doc['place_of_publication_city_sim'] << city
|
|
43
|
+
solr_doc['place_of_publication_county_sim'] << county
|
|
44
|
+
solr_doc['place_of_publication_state_sim'] << state
|
|
45
|
+
solr_doc['place_of_publication_country_sim'] << country
|
|
46
|
+
display_name = [city, state, country].compact.join(', ')
|
|
47
|
+
solr_doc['place_of_publication_label_tesim'] << display_name
|
|
48
|
+
solr_doc['place_of_publication_label_sim'] << display_name
|
|
49
|
+
return unless geodata['lat'] && geodata['lng']
|
|
50
|
+
# TODO: this should use a Solr location_rpt field type
|
|
51
|
+
solr_doc['place_of_publication_llsim'] << "#{geodata['lat']},#{geodata['lng']}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# fetch data from GeoNames API
|
|
55
|
+
#
|
|
56
|
+
# @param geoname_id [String] GeoNames id of geographic entity
|
|
57
|
+
# @return [Hash] GeoNames API response as Hash
|
|
58
|
+
def get_geodata(geoname_id)
|
|
59
|
+
return false if geoname_id.to_i.zero?
|
|
60
|
+
geonames_un = Qa::Authorities::Geonames.username
|
|
61
|
+
return false unless geonames_un
|
|
62
|
+
geonames_url = "http://api.geonames.org/getJSON?geonameId=#{geoname_id}&username=#{geonames_un}"
|
|
63
|
+
resp = Faraday.new(geonames_url).get
|
|
64
|
+
JSON.parse(resp.body)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# indexes the publication_date_start and _end fields
|
|
2
|
+
module NewspaperWorks
|
|
3
|
+
module IndexesPublicationDateRange
|
|
4
|
+
# adds publication date start to solr_doc Hash in Solr datetime format
|
|
5
|
+
#
|
|
6
|
+
# @param pubdate [String] publication start date
|
|
7
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
8
|
+
def index_pubdate_start(pubdate, solr_doc)
|
|
9
|
+
case pubdate
|
|
10
|
+
when /\A\d{4}\z/
|
|
11
|
+
solr_doc['publication_date_start_dtsi'] = "#{pubdate}-01-01".to_datetime
|
|
12
|
+
when /\A\d{4}-\d{2}\z/
|
|
13
|
+
solr_doc['publication_date_start_dtsi'] = "#{pubdate}-01".to_datetime
|
|
14
|
+
end
|
|
15
|
+
solr_doc['publication_date_start_ssi'] = nil
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# adds publication date end to solr_doc Hash in Solr datetime format
|
|
19
|
+
#
|
|
20
|
+
# @param pubdate [String] publication end date
|
|
21
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
22
|
+
def index_pubdate_end(pubdate, solr_doc)
|
|
23
|
+
end_time = 'T23:59:59Z'
|
|
24
|
+
case pubdate
|
|
25
|
+
when /\A\d{4}\z/
|
|
26
|
+
solr_doc['publication_date_end_dtsi'] = "#{pubdate}-12-31#{end_time}"
|
|
27
|
+
when /\A\d{4}-\d{2}\z/
|
|
28
|
+
date_split = pubdate.split('-')
|
|
29
|
+
end_day = Date.new(date_split[0].to_i, date_split[1].to_i, -1).strftime('%d')
|
|
30
|
+
solr_doc['publication_date_end_dtsi'] = "#{pubdate}-#{end_day}#{end_time}"
|
|
31
|
+
end
|
|
32
|
+
solr_doc['publication_date_end_ssi'] = nil
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# indexes parent relationships e.g. issue->title, page->issue, etc
|
|
2
|
+
module NewspaperWorks
|
|
3
|
+
module IndexesRelationships
|
|
4
|
+
# index relationships
|
|
5
|
+
#
|
|
6
|
+
# @param object [Newspaper*] an instance of a NewspaperWorks model
|
|
7
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
8
|
+
def index_relationships(object, solr_doc)
|
|
9
|
+
index_publication(object, solr_doc) unless object.is_a?(NewspaperTitle)
|
|
10
|
+
case object
|
|
11
|
+
when NewspaperPage
|
|
12
|
+
index_issue(object, solr_doc)
|
|
13
|
+
index_container(object, solr_doc)
|
|
14
|
+
index_articles(object, solr_doc)
|
|
15
|
+
index_siblings(object, solr_doc)
|
|
16
|
+
when NewspaperArticle
|
|
17
|
+
index_issue(object, solr_doc)
|
|
18
|
+
index_pages(object, solr_doc)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# index the publication info
|
|
23
|
+
#
|
|
24
|
+
# @param object [Newspaper*] an instance of a NewspaperWorks model
|
|
25
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
26
|
+
def index_publication(object, solr_doc)
|
|
27
|
+
newspaper_title = object.publication
|
|
28
|
+
return unless newspaper_title.is_a?(NewspaperTitle)
|
|
29
|
+
solr_doc['publication_id_ssi'] = newspaper_title.id
|
|
30
|
+
solr_doc['publication_title_ssi'] = newspaper_title.title.first
|
|
31
|
+
publication_unique_id = newspaper_title.send(NewspaperWorks.config.publication_unique_id_property)
|
|
32
|
+
solr_doc['publication_unique_id_ssi'] = publication_unique_id
|
|
33
|
+
index_parent_facets(newspaper_title, solr_doc)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# index the container info
|
|
37
|
+
#
|
|
38
|
+
# @param page [NewspaperPage]
|
|
39
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
40
|
+
def index_container(page, solr_doc)
|
|
41
|
+
newspaper_container = page.container
|
|
42
|
+
return unless newspaper_container.is_a?(NewspaperContainer)
|
|
43
|
+
solr_doc['container_id_ssi'] = newspaper_container.id
|
|
44
|
+
solr_doc['container_title_ssi'] = newspaper_container.title.first
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# index the issue info
|
|
48
|
+
#
|
|
49
|
+
# @param object [NewspaperPage||NewspaperArticle]
|
|
50
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
51
|
+
def index_issue(object, solr_doc)
|
|
52
|
+
newspaper_issue = object.issue
|
|
53
|
+
return unless newspaper_issue.is_a?(NewspaperIssue)
|
|
54
|
+
solr_doc['issue_id_ssi'] = newspaper_issue.id
|
|
55
|
+
solr_doc['issue_title_ssi'] = newspaper_issue.title.first
|
|
56
|
+
solr_doc['publication_date_dtsi'] ||= newspaper_issue.publication_date.to_datetime if newspaper_issue.publication_date.present?
|
|
57
|
+
solr_doc['issue_volume_ssi'] = newspaper_issue.volume
|
|
58
|
+
solr_doc['issue_edition_number_ssi'] = newspaper_issue.edition_number || '1'
|
|
59
|
+
solr_doc['issue_number_ssi'] = newspaper_issue.issue_number
|
|
60
|
+
index_parent_facets(newspaper_issue, solr_doc)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# index the pages info
|
|
64
|
+
#
|
|
65
|
+
# @param article [NewspaperArticle]
|
|
66
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
67
|
+
def index_pages(article, solr_doc)
|
|
68
|
+
newspaper_pages = article.pages
|
|
69
|
+
return if newspaper_pages.blank? || !newspaper_pages.first.is_a?(NewspaperPage)
|
|
70
|
+
solr_doc['page_ids_ssim'] = []
|
|
71
|
+
solr_doc['page_titles_ssim'] = []
|
|
72
|
+
newspaper_pages.each do |n_page|
|
|
73
|
+
solr_doc['page_ids_ssim'] << n_page.id
|
|
74
|
+
solr_doc['page_titles_ssim'] << n_page.title.first
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# index previous/next siblings info
|
|
79
|
+
#
|
|
80
|
+
# @param page [NewspaperPage]
|
|
81
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
82
|
+
def index_siblings(page, solr_doc)
|
|
83
|
+
newspaper_issue = page.issue
|
|
84
|
+
return unless newspaper_issue.is_a?(NewspaperIssue)
|
|
85
|
+
page_ids = newspaper_issue.ordered_page_ids
|
|
86
|
+
this_page_index = page_ids.index(page.id)
|
|
87
|
+
return unless this_page_index
|
|
88
|
+
solr_doc['is_following_page_of_ssi'] = page_ids[this_page_index - 1].presence unless this_page_index.zero?
|
|
89
|
+
solr_doc['is_preceding_page_of_ssi'] = page_ids[this_page_index + 1].presence
|
|
90
|
+
solr_doc['first_page_bsi'] = true if this_page_index.zero?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# index the articles info
|
|
94
|
+
#
|
|
95
|
+
# @param page [NewspaperPage]
|
|
96
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
97
|
+
def index_articles(page, solr_doc)
|
|
98
|
+
newspaper_articles = page.articles
|
|
99
|
+
return if newspaper_articles.blank? || !newspaper_articles.first.is_a?(NewspaperArticle)
|
|
100
|
+
solr_doc['article_ids_ssim'] = []
|
|
101
|
+
solr_doc['article_titles_ssim'] = []
|
|
102
|
+
newspaper_articles.each do |n_article|
|
|
103
|
+
solr_doc['article_ids_ssim'] << n_article.id
|
|
104
|
+
solr_doc['article_titles_ssim'] << n_article.title.first
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# index common facet properties
|
|
109
|
+
# TODO: this could probably be DRY'd out a bit,
|
|
110
|
+
# overlaps with IndexesPlaceOfPublication#index_pop
|
|
111
|
+
#
|
|
112
|
+
# @param parent [NewspaperTitle||NewspaperIssue]
|
|
113
|
+
# @param solr_doc [Hash] the hash of field data to be pushed to Solr
|
|
114
|
+
def index_parent_facets(parent, solr_doc)
|
|
115
|
+
parent_doc = parent.to_solr
|
|
116
|
+
fields = %w[language_sim place_of_publication_label_sim
|
|
117
|
+
place_of_publication_city_sim place_of_publication_county_sim
|
|
118
|
+
place_of_publication_state_sim place_of_publication_country_sim
|
|
119
|
+
place_of_publication_llsim place_of_publication_label_tesim]
|
|
120
|
+
fields.each do |field|
|
|
121
|
+
solr_doc[field] ||= parent_doc[field]
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperArticle`
|
|
3
|
+
class NewspaperArticleIndexer < NewspaperWorks::NewspaperCoreIndexer
|
|
4
|
+
def generate_solr_document
|
|
5
|
+
super.tap do |solr_doc|
|
|
6
|
+
# index the labels for the genre URIs, as searchable and facetable
|
|
7
|
+
article_genre_service = Hyrax::ArticleGenreService.new
|
|
8
|
+
genre_labels = []
|
|
9
|
+
object.genre.each do |value|
|
|
10
|
+
genre_labels << article_genre_service.label(value) { value }
|
|
11
|
+
end
|
|
12
|
+
solr_doc['genre_tesim'] = genre_labels.presence
|
|
13
|
+
solr_doc['genre_sim'] = genre_labels.presence
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperContainer`
|
|
3
|
+
class NewspaperContainerIndexer < NewspaperWorks::NewspaperCoreIndexer
|
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
|
5
|
+
# provide your own metadata and indexing.
|
|
6
|
+
# include Hyrax::IndexesBasicMetadata
|
|
7
|
+
|
|
8
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
|
9
|
+
# this behavior
|
|
10
|
+
# include Hyrax::IndexesLinkedMetadata
|
|
11
|
+
|
|
12
|
+
# Uncomment this block if you want to add custom indexing behavior:
|
|
13
|
+
# def generate_solr_document
|
|
14
|
+
# super.tap do |solr_doc|
|
|
15
|
+
# solr_doc['my_custom_field_ssim'] = object.my_custom_property
|
|
16
|
+
# end
|
|
17
|
+
# end
|
|
18
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperIssue`
|
|
3
|
+
class NewspaperIssueIndexer < NewspaperWorks::NewspaperCoreIndexer
|
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
|
5
|
+
# provide your own metadata and indexing.
|
|
6
|
+
# include Hyrax::IndexesBasicMetadata
|
|
7
|
+
|
|
8
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
|
9
|
+
# this behavior
|
|
10
|
+
# include Hyrax::IndexesLinkedMetadata
|
|
11
|
+
|
|
12
|
+
# Uncomment this block if you want to add custom indexing behavior:
|
|
13
|
+
def generate_solr_document
|
|
14
|
+
super.tap do |solr_doc|
|
|
15
|
+
# set manually to ensure correct field type (_dtsi)
|
|
16
|
+
if object.publication_date =~ /\A\d{4}-\d{2}-\d{2}\z/
|
|
17
|
+
solr_doc['publication_date_ssi'] = nil
|
|
18
|
+
solr_doc['publication_date_dtsi'] = object.publication_date.to_datetime
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# if edition number is not set, add a default
|
|
22
|
+
# to support ChronAm-style URL pattern linking
|
|
23
|
+
solr_doc['edition_number_tesim'] ||= '1'
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Generated via
|
|
2
|
+
# `rails generate hyrax:work NewspaperTitle`
|
|
3
|
+
class NewspaperTitleIndexer < NewspaperWorks::NewspaperCoreIndexer
|
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
|
5
|
+
# provide your own metadata and indexing.
|
|
6
|
+
# include Hyrax::IndexesBasicMetadata
|
|
7
|
+
include NewspaperWorks::IndexesPublicationDateRange
|
|
8
|
+
|
|
9
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
|
10
|
+
# this behavior
|
|
11
|
+
# include Hyrax::IndexesLinkedMetadata
|
|
12
|
+
|
|
13
|
+
def generate_solr_document
|
|
14
|
+
super.tap do |solr_doc|
|
|
15
|
+
index_pubdate_start(object.publication_date_start, solr_doc) if object.publication_date_start.present?
|
|
16
|
+
index_pubdate_end(object.publication_date_end, solr_doc) if object.publication_date_end.present?
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Core indexer for newspaper work types
|
|
2
|
+
module NewspaperWorks
|
|
3
|
+
class NewspaperCoreIndexer < Hyrax::WorkIndexer
|
|
4
|
+
# This indexes the default metadata. You can remove it if you want to
|
|
5
|
+
# provide your own metadata and indexing.
|
|
6
|
+
include Hyrax::IndexesBasicMetadata
|
|
7
|
+
include NewspaperWorks::IndexesPlaceOfPublication
|
|
8
|
+
include NewspaperWorks::IndexesRelationships
|
|
9
|
+
|
|
10
|
+
# Fetch remote labels for based_near. You can remove this if you don't want
|
|
11
|
+
# this behavior
|
|
12
|
+
# include Hyrax::IndexesLinkedMetadata
|
|
13
|
+
|
|
14
|
+
def generate_solr_document
|
|
15
|
+
super.tap do |solr_doc|
|
|
16
|
+
index_pop(object, solr_doc)
|
|
17
|
+
index_relationships(object, solr_doc)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|