newspaper_works 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'ndnp_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe NewspaperWorks::Ingest::NDNP::IssueIngester do
|
|
5
|
+
include_context "ndnp fixture setup"
|
|
6
|
+
|
|
7
|
+
# Source data:
|
|
8
|
+
let(:issue_data) do
|
|
9
|
+
NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue1)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
let(:metadata) { issue_data.metadata }
|
|
13
|
+
|
|
14
|
+
# IssueIngester adapter does the work we are testing:
|
|
15
|
+
let(:adapter) { described_class.new(issue_data) }
|
|
16
|
+
|
|
17
|
+
describe "adapter and asset construction" do
|
|
18
|
+
def expect_issue_import_logging(adapter)
|
|
19
|
+
expect(adapter).to receive(:write_log).with(
|
|
20
|
+
satisfy { |v| v.include?('Saved metadata to new NewspaperIssue') }
|
|
21
|
+
).once
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# remove publication asset from repository for LCCN, when re-creating
|
|
25
|
+
# is desired test behavior
|
|
26
|
+
def clear_publication(lccn)
|
|
27
|
+
NewspaperTitle.where(lccn: lccn).delete_all
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "constructs adapter with issue source" do
|
|
31
|
+
expect(adapter.issue).to be issue_data
|
|
32
|
+
expect(adapter.path).to eq issue_data.path
|
|
33
|
+
# initially nil target:
|
|
34
|
+
expect(adapter.target).to be_nil
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "constructs adapter with hash options" do
|
|
38
|
+
user = User.batch_user.user_key
|
|
39
|
+
adapter = described_class.new(
|
|
40
|
+
issue_data,
|
|
41
|
+
depositor: user
|
|
42
|
+
)
|
|
43
|
+
expect(adapter.opts[:depositor]).to eq user
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "constructs NewspaperIssue with adapter" do
|
|
47
|
+
# construct_issue is only the first part of ingest, create issue
|
|
48
|
+
# and find-or-link publication NewspaperTitle;
|
|
49
|
+
# this does not trigger creation of child pages.
|
|
50
|
+
clear_publication(issue_data.metadata.lccn)
|
|
51
|
+
expect_issue_import_logging(adapter)
|
|
52
|
+
expect(adapter).to receive(:write_log).with(
|
|
53
|
+
satisfy do |v|
|
|
54
|
+
v.include?('Created NewspaperTitle work') ||
|
|
55
|
+
v.include?('Found existing NewspaperTitle')
|
|
56
|
+
end
|
|
57
|
+
).once
|
|
58
|
+
expect(adapter).to receive(:write_log).with(
|
|
59
|
+
satisfy { |v| v.include?('Linked NewspaperIssue') }
|
|
60
|
+
).once
|
|
61
|
+
adapter.construct_issue
|
|
62
|
+
issue = adapter.target
|
|
63
|
+
expect(issue).to be_a NewspaperIssue
|
|
64
|
+
expect(issue.id).not_to be_nil
|
|
65
|
+
# check parent publication
|
|
66
|
+
publication = issue.publication
|
|
67
|
+
expect(publication.lccn).to eq issue_data.metadata.lccn
|
|
68
|
+
expect(publication.title).to contain_exactly 'The Park Record'
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "creates new NewspaperTitle without place of publication" do
|
|
72
|
+
# clear any existing publications from previous testing
|
|
73
|
+
lccn = issue_data.metadata.lccn
|
|
74
|
+
clear_publication(lccn)
|
|
75
|
+
# construct with title, this time no username set for geonames:
|
|
76
|
+
Qa::Authorities::Geonames.username = ''
|
|
77
|
+
adapter.construct_issue
|
|
78
|
+
expect(adapter.target.publication.place_of_publication).to be_empty
|
|
79
|
+
Qa::Authorities::Geonames.username = 'newspaper_works'
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "creates new NewspaperTitle with place of publication" do
|
|
83
|
+
# clear any existing publications from previous testing
|
|
84
|
+
lccn = issue_data.metadata.lccn
|
|
85
|
+
clear_publication(lccn)
|
|
86
|
+
# construct with title, this time with username set for geonames:
|
|
87
|
+
Qa::Authorities::Geonames.username = 'newspaper_works'
|
|
88
|
+
adapter.construct_issue
|
|
89
|
+
pop = adapter.target.publication.place_of_publication.map do |v|
|
|
90
|
+
v.to_uri.to_s
|
|
91
|
+
end
|
|
92
|
+
expect(pop).not_to be_empty
|
|
93
|
+
expect(pop[0]).to start_with 'http://sws.geonames.org/'
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
describe "metadata access/setting" do
|
|
98
|
+
def normalized_pubtitle(issue_data)
|
|
99
|
+
issue_data.metadata.publication_title.strip.split(/ \(/)[0]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def expected_title(issue_data)
|
|
103
|
+
metadata = issue_data.metadata
|
|
104
|
+
d = DateTime.iso8601(metadata.publication_date).strftime('%B %-d, %Y')
|
|
105
|
+
"#{normalized_pubtitle(issue_data)}: #{d}"
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "copies metadata to NewspaperIssue" do
|
|
109
|
+
adapter.construct_issue
|
|
110
|
+
issue = adapter.target
|
|
111
|
+
metadata = issue_data.metadata
|
|
112
|
+
expect(issue.title).to contain_exactly expected_title(issue_data)
|
|
113
|
+
expect(issue.lccn).to eq metadata.lccn
|
|
114
|
+
expect(issue.volume).to eq metadata.volume
|
|
115
|
+
expect(issue.publication_date).to eq metadata.publication_date
|
|
116
|
+
expect(issue.issue_number).to eq metadata.issue_number
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it "sets default administrative metadata with default construction" do
|
|
120
|
+
adapter.construct_issue
|
|
121
|
+
issue_asset = adapter.target
|
|
122
|
+
expect(issue_asset.depositor).to eq User.batch_user.user_key
|
|
123
|
+
expect(issue_asset.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
|
|
124
|
+
expect(issue_asset.visibility).to eq 'open'
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
it "sets custom administrative metadata for issue" do
|
|
128
|
+
# test one exemplary/representative option:
|
|
129
|
+
adapter = described_class.new(issue_data, visibility: 'open')
|
|
130
|
+
adapter.construct_issue
|
|
131
|
+
expect(adapter.target.visibility).to eq 'open'
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "sets custom administrative metadata for constructed publication" do
|
|
135
|
+
# test one exemplary/representative option:
|
|
136
|
+
adapter = described_class.new(issue_data, visibility: 'open')
|
|
137
|
+
adapter.construct_issue
|
|
138
|
+
publication_asset = adapter.target.publication
|
|
139
|
+
expect(publication_asset).not_to be_nil
|
|
140
|
+
expect(publication_asset.visibility).to eq 'open'
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
describe "child page creation" do
|
|
145
|
+
it "creates child pages on ingest of issue" do
|
|
146
|
+
# calling ingest without invoking the ususal async jobs should
|
|
147
|
+
# create child pages without additional work of attaching files
|
|
148
|
+
# to them, which we don't need to test here (tested elsewhere).
|
|
149
|
+
adapter.ingest
|
|
150
|
+
adapter.target.pages.each do |page|
|
|
151
|
+
expect(page.issue.id).to eq adapter.target.id
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'ndnp_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe NewspaperWorks::Ingest::NDNP::IssueMetadata do
|
|
5
|
+
include_context "ndnp fixture setup"
|
|
6
|
+
|
|
7
|
+
describe "sample fixture 'batch_local'" do
|
|
8
|
+
let(:issue) { described_class.new(issue1) }
|
|
9
|
+
|
|
10
|
+
it "gets lccn" do
|
|
11
|
+
expect(issue.lccn).to eq "sn85058233"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "gets volume" do
|
|
15
|
+
expect(issue.volume).to eq "56"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "gets issue" do
|
|
19
|
+
expect(issue.issue_number).to eq "27"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "gets edition fields" do
|
|
23
|
+
expect(issue.edition_name).to eq "Main Edition"
|
|
24
|
+
expect(issue.edition_number).to eq "1"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "gets publication date" do
|
|
28
|
+
expect(issue.publication_date).to eq "1935-08-02"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "gets publication title via //mets/@LABEL" do
|
|
32
|
+
expect(issue.publication_title).to eq 'The Park Record (Park City, UT)'
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it "gets held_by" do
|
|
36
|
+
expect(issue.held_by).to eq "University of Utah; Salt Lake City, UT"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe "sample fixture 'batch_test_ver01" do
|
|
41
|
+
let(:issue) { described_class.new(issue2) }
|
|
42
|
+
let(:issue_ingest) do
|
|
43
|
+
NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "gets lccn" do
|
|
47
|
+
expect(issue.lccn).to eq "sn85025202"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "gets volume" do
|
|
51
|
+
expect(issue.volume).to eq "2"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "gets issue" do
|
|
55
|
+
expect(issue.issue_number).to eq "4"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "gets edition fields" do
|
|
59
|
+
expect(issue.edition_name).to be_nil
|
|
60
|
+
expect(issue.edition_number).to eq "1"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "gets publication date" do
|
|
64
|
+
expect(issue.publication_date).to eq "1857-02-14"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it "gets publication title via label, when reel unavailable" do
|
|
68
|
+
expect(issue.publication_title).to \
|
|
69
|
+
eq 'Weekly Trinity journal (Weaverville, Calif.)'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# integration test for reel context publication title:
|
|
73
|
+
it "gets publication title via label, from reel" do
|
|
74
|
+
expect(issue_ingest.metadata.publication_title).to \
|
|
75
|
+
eq 'Weekly Trinity journal (Weaverville, Calif.)'
|
|
76
|
+
expect(issue_ingest.metadata.publication_title).to \
|
|
77
|
+
eq issue_ingest.container.metadata.title
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it "gets held_by" do
|
|
81
|
+
expect(issue.held_by).to eq "University of Utah, Salt Lake City, UT"
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'ndnp_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe NewspaperWorks::Ingest::NDNP::PageIngest do
|
|
5
|
+
include_context "ndnp fixture setup"
|
|
6
|
+
|
|
7
|
+
def file_type?(path, ext)
|
|
8
|
+
path.split('/')[-1].split('.')[-1].casecmp(ext).zero?
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def includes_file_type(files, ext)
|
|
12
|
+
files.any? { |path| file_type?(path, ext) }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def check_expected_files(page, extensions)
|
|
16
|
+
files = page.files
|
|
17
|
+
expect(files.size).to eq extensions.size
|
|
18
|
+
files.each do |filepath|
|
|
19
|
+
# each path is normalized to absolute path
|
|
20
|
+
expect(filepath.start_with?('/')).to be true
|
|
21
|
+
end
|
|
22
|
+
extensions.each do |ext|
|
|
23
|
+
expect(includes_file_type(files, ext)).to be true
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
describe "sample fixture 'batch_local'" do
|
|
28
|
+
let(:page) { described_class.new(issue1, 'pageModsBib8') }
|
|
29
|
+
|
|
30
|
+
it "gets metadata" do
|
|
31
|
+
expect(page.metadata).to be_a NewspaperWorks::Ingest::NDNP::PageMetadata
|
|
32
|
+
# uses same Nokogiri document context:
|
|
33
|
+
expect(page.metadata.doc).to be page.doc
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "gets expected files" do
|
|
37
|
+
check_expected_files(page, ['tif', 'jp2', 'pdf', 'xml'])
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "gets nil container for page without reel XML" do
|
|
41
|
+
reel = page.container
|
|
42
|
+
expect(reel).to be_nil
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
describe "sample fixture 'batch_test_ver01'" do
|
|
47
|
+
let(:page) { described_class.new(issue2, 'pageModsBib1') }
|
|
48
|
+
|
|
49
|
+
it "gets metadata" do
|
|
50
|
+
expect(page.metadata).to be_a NewspaperWorks::Ingest::NDNP::PageMetadata
|
|
51
|
+
# uses same Nokogiri document context:
|
|
52
|
+
expect(page.metadata.doc).to be page.doc
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "gets expected files" do
|
|
56
|
+
check_expected_files(page, ['tif', 'jp2', 'pdf', 'xml'])
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "gets a ContainerIngest for reel providing page" do
|
|
60
|
+
reel = page.container
|
|
61
|
+
expect(reel).to be_a NewspaperWorks::Ingest::NDNP::ContainerIngest
|
|
62
|
+
expect(reel.path).to end_with '_1.xml'
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
describe "sample fixture reel xml (for control images)" do
|
|
67
|
+
let(:page) { described_class.new(reel1, 'targetModsBib1') }
|
|
68
|
+
|
|
69
|
+
it "gets metadata" do
|
|
70
|
+
expect(page.metadata).to be_a NewspaperWorks::Ingest::NDNP::PageMetadata
|
|
71
|
+
# uses same Nokogiri document context:
|
|
72
|
+
expect(page.metadata.doc).to be page.doc
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "gets expected files" do
|
|
76
|
+
check_expected_files(page, ['tif', 'jp2', 'pdf', 'xml'])
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'ndnp_shared'
|
|
3
|
+
require 'misc_shared'
|
|
4
|
+
|
|
5
|
+
RSpec.describe NewspaperWorks::Ingest::NDNP::PageIngester do
|
|
6
|
+
include_context "ndnp fixture setup"
|
|
7
|
+
include_context "shared setup"
|
|
8
|
+
|
|
9
|
+
# use FactoryBot issue factory for a NewspaperIssue object for page:
|
|
10
|
+
let(:issue) { create(:newspaper_issue) }
|
|
11
|
+
|
|
12
|
+
# We need page source data as PageIngest
|
|
13
|
+
let(:page_data) do
|
|
14
|
+
NewspaperWorks::Ingest::NDNP::PageIngest.new(issue1, 'pageModsBib8')
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
let(:metadata) { page_data.metadata }
|
|
18
|
+
|
|
19
|
+
# PageIngester adapter does the work we are testing:
|
|
20
|
+
let(:adapter) { described_class.new(page_data, issue) }
|
|
21
|
+
|
|
22
|
+
describe "adapter and asset construction" do
|
|
23
|
+
it "constructs adapter with page source, issue context" do
|
|
24
|
+
expect(adapter.page).to be page_data
|
|
25
|
+
expect(adapter.issue).to be issue
|
|
26
|
+
expect(adapter.path).to eq page_data.path
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "constructs NewspaperPage with adapter" do
|
|
30
|
+
# construct_page is ingest of metadata only, without importing files:
|
|
31
|
+
adapter.construct_page
|
|
32
|
+
page = adapter.target
|
|
33
|
+
expect(page).to be_a NewspaperPage
|
|
34
|
+
expect(page.id).not_to be_nil
|
|
35
|
+
expect(issue.members).to include page
|
|
36
|
+
expect(issue.ordered_members.to_a).to include page
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "constructs adapter with hash options" do
|
|
40
|
+
user = User.batch_user.user_key
|
|
41
|
+
adapter = described_class.new(
|
|
42
|
+
page_data,
|
|
43
|
+
issue,
|
|
44
|
+
depositor: user
|
|
45
|
+
)
|
|
46
|
+
expect(adapter.opts[:depositor]).to eq user
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
describe "metadata access/setting" do
|
|
51
|
+
let(:expected_title) do
|
|
52
|
+
"#{issue.title.first}: Page #{metadata.page_number}"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "sets default administrative metadata with default construction" do
|
|
56
|
+
adapter.construct_page
|
|
57
|
+
asset = adapter.target
|
|
58
|
+
expect(asset.depositor).to eq User.batch_user.user_key
|
|
59
|
+
expect(asset.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
|
|
60
|
+
expect(asset.visibility).to eq 'open'
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it "sets custom administrative metadata" do
|
|
64
|
+
# test one exemplary/representative option:
|
|
65
|
+
adapter = described_class.new(page_data, issue, visibility: 'open')
|
|
66
|
+
adapter.construct_page
|
|
67
|
+
expect(adapter.target.visibility).to eq 'open'
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "copies metadata to NewspaperPage" do
|
|
71
|
+
adapter.construct_page
|
|
72
|
+
page = adapter.target
|
|
73
|
+
expect(page.title).to contain_exactly expected_title
|
|
74
|
+
expect(page.width).to eq metadata.width
|
|
75
|
+
expect(page.height).to eq metadata.height
|
|
76
|
+
expect(page.page_number).to eq metadata.page_number
|
|
77
|
+
expect(page.identifier).to contain_exactly metadata.identifier
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
describe "reel/container linking" do
|
|
82
|
+
# need publication, title, and reel to use for page data context:
|
|
83
|
+
let(:publication) { create(:newspaper_title) }
|
|
84
|
+
|
|
85
|
+
let(:issue) do
|
|
86
|
+
issue = create(:newspaper_issue)
|
|
87
|
+
publication.members << issue
|
|
88
|
+
publication.save!
|
|
89
|
+
issue
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
let(:issue_data) do
|
|
93
|
+
NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
let(:page_data) do
|
|
97
|
+
data = issue_data.to_a[0]
|
|
98
|
+
# some NDNP samples missing TIFF, put dummy in place of missing, as needed
|
|
99
|
+
data.files = data.files.map do |path|
|
|
100
|
+
File.exist?(path) ? path : File.join(fixture_path, 'ocr_gray.tiff')
|
|
101
|
+
end
|
|
102
|
+
data
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
let(:adapter) { described_class.new(page_data, issue) }
|
|
106
|
+
|
|
107
|
+
it "links page to reel" do
|
|
108
|
+
# construct_page + link_reel ~= ingest without files import:
|
|
109
|
+
adapter.construct_page
|
|
110
|
+
adapter.link_reel
|
|
111
|
+
page = adapter.target
|
|
112
|
+
page.reload
|
|
113
|
+
expect(page.container).not_to be_nil
|
|
114
|
+
expect(page.container.ordered_members.to_a.map(&:id)).to include page.id
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
describe "file import integration" do
|
|
119
|
+
do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
|
|
120
|
+
|
|
121
|
+
let(:issue_data) do
|
|
122
|
+
NewspaperWorks::Ingest::NDNP::IssueIngest.new(issue2)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
let(:page_data_minus_tiff) { issue_data.to_a[0] }
|
|
126
|
+
|
|
127
|
+
def check_fileset(page)
|
|
128
|
+
fileset = page.members.select { |m| m.class == FileSet }[0]
|
|
129
|
+
# Reload fileset because jobs have modified:
|
|
130
|
+
fileset.reload
|
|
131
|
+
expect(fileset).not_to be_nil
|
|
132
|
+
expect(fileset.original_file).not_to be_nil
|
|
133
|
+
expect(fileset.original_file.mime_type).to eq 'image/tiff'
|
|
134
|
+
expect(fileset.original_file.size).to be > 0
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def expect_file_assignment_logging(adapter)
|
|
138
|
+
expect(adapter).to receive(:write_log).with(
|
|
139
|
+
satisfy { |v| v.include?('Assigned primary file to work') }
|
|
140
|
+
).once
|
|
141
|
+
expect(adapter).to receive(:write_log).with(
|
|
142
|
+
satisfy { |v| v.include?('Assigned derivative file to work') }
|
|
143
|
+
).exactly(3).times
|
|
144
|
+
expect(adapter).to receive(:write_log).with(
|
|
145
|
+
satisfy { |v| v.include?('Beginning file attachment') }
|
|
146
|
+
).once
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def expect_page_import_logging(adapter)
|
|
150
|
+
expect(adapter).to receive(:write_log).with(
|
|
151
|
+
satisfy { |v| v.include?('Created NewspaperPage work') }
|
|
152
|
+
).once
|
|
153
|
+
expect(adapter).to receive(:write_log).with(
|
|
154
|
+
satisfy { |v| v.include?('Saved metadata to NewspaperPage work') }
|
|
155
|
+
).once
|
|
156
|
+
expect(adapter).to receive(:write_log).with(
|
|
157
|
+
satisfy { |v| v.include?('Linked NewspaperIssue') }
|
|
158
|
+
).once
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
it "attaches primary, derivative files", perform_enqueued: do_now_jobs do
|
|
162
|
+
expect_page_import_logging(adapter)
|
|
163
|
+
expect_file_assignment_logging(adapter)
|
|
164
|
+
adapter.ingest
|
|
165
|
+
page = adapter.target
|
|
166
|
+
check_fileset(page)
|
|
167
|
+
derivatives = NewspaperWorks::Data::WorkDerivatives.new(page)
|
|
168
|
+
expect(derivatives.keys).to match_array ["jp2", "xml", "pdf"]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# support this use-case for evaluation purposes
|
|
172
|
+
it "generates TIFF when missing from page", perform_enqueued: do_now_jobs do
|
|
173
|
+
adapter = described_class.new(page_data_minus_tiff, issue)
|
|
174
|
+
expect_page_import_logging(adapter)
|
|
175
|
+
expect(adapter).to receive(:write_log).with(
|
|
176
|
+
satisfy { |arg| arg.include?('Creating TIFF') },
|
|
177
|
+
Logger::WARN
|
|
178
|
+
).exactly(1).times
|
|
179
|
+
expect_file_assignment_logging(adapter)
|
|
180
|
+
expect { adapter.ingest }.not_to raise_error
|
|
181
|
+
check_fileset(adapter.target)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'ndnp_shared'
|
|
3
|
+
|
|
4
|
+
RSpec.describe NewspaperWorks::Ingest::NDNP::PageMetadata do
|
|
5
|
+
include_context "ndnp fixture setup"
|
|
6
|
+
|
|
7
|
+
describe "sample fixture 'batch_local'" do
|
|
8
|
+
let(:page1) { described_class.new(issue1, nil, 'pageModsBib8') }
|
|
9
|
+
let(:page2) { described_class.new(issue1, nil, 'pageModsBib6') }
|
|
10
|
+
|
|
11
|
+
it "gets expected page number as String" do
|
|
12
|
+
expect(page1.page_number).to eq "1"
|
|
13
|
+
expect(page2.page_number).to eq "2"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "gets expected sequence number as Integer" do
|
|
17
|
+
expect(page1.page_sequence_number).to eq 1
|
|
18
|
+
expect(page2.page_sequence_number).to eq 2
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "gets expected width from ALTO as Integer " do
|
|
22
|
+
expect(page1.width).to eq 18_352
|
|
23
|
+
expect(page2.width).to eq 18_200
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "gets expected height from ALTO as Integer " do
|
|
27
|
+
expect(page1.height).to eq 28_632
|
|
28
|
+
expect(page2.height).to eq 28_872
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "gets identifier from ALTO as primary file name" do
|
|
32
|
+
expect(page1.identifier).to eq "0657b"
|
|
33
|
+
expect(page2.identifier).to eq "0656a"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
describe "sample fixture 'batch_test_ver01" do
|
|
38
|
+
let(:page) { described_class.new(issue2, nil, 'pageModsBib1') }
|
|
39
|
+
|
|
40
|
+
it "fallback to sequence number on page without page number" do
|
|
41
|
+
expect(page.page_number).to eq page.page_sequence_number.to_s
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it "gets expected sequence number as Integer" do
|
|
45
|
+
expect(page.page_sequence_number).to eq 1
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "gets expected width from ALTO as Integer " do
|
|
49
|
+
expect(page.width).to eq 21_464
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "gets expected height from ALTO as Integer " do
|
|
53
|
+
expect(page.height).to eq 30_268
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it "gets identifier from ALTO as primary file name" do
|
|
57
|
+
expect(page.identifier).to eq "0225"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
describe "sample fixture via Reel XML" do
|
|
62
|
+
let(:page) { described_class.new(reel1, nil, 'targetModsBib1') }
|
|
63
|
+
|
|
64
|
+
it "return nil page number when page and sequence missing" do
|
|
65
|
+
expect(page.page_number).to eq nil
|
|
66
|
+
expect(page.page_sequence_number).to eq nil
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "gets expected sequence number as Integer" do
|
|
70
|
+
expect(page.page_sequence_number).to eq nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "gets expected width from ALTO as Integer " do
|
|
74
|
+
expect(page.width).to eq 30_176
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it "gets expected height from ALTO as Integer " do
|
|
78
|
+
expect(page.height).to eq 29_152
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "gets identifier from ALTO as primary file name" do
|
|
82
|
+
expect(page.identifier).to eq "0001"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
# test NewspaperIssueIngest against a NewspaperIssue
|
|
4
|
+
RSpec.describe NewspaperWorks::Ingest::NewspaperIssueIngest do
|
|
5
|
+
# define the path to the file we will use for multiple examples
|
|
6
|
+
let(:path) do
|
|
7
|
+
fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
|
|
8
|
+
Hyrax.config.whitelisted_ingest_dirs.push(fixtures)
|
|
9
|
+
File.join(fixtures, 'sample-4page-issue.pdf')
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
let(:path2) do
|
|
13
|
+
fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
|
|
14
|
+
File.join(fixtures, 'ndnp-sample1.pdf')
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it_behaves_like('ingest adapter IO')
|
|
18
|
+
|
|
19
|
+
describe "file import and attachment" do
|
|
20
|
+
do_now_jobs = [
|
|
21
|
+
IngestJob,
|
|
22
|
+
IngestLocalFileJob,
|
|
23
|
+
InheritPermissionsJob,
|
|
24
|
+
VisibilityCopyJob
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
PERMISSION_METHODS = [
|
|
28
|
+
:edit_users,
|
|
29
|
+
:read_users,
|
|
30
|
+
:discover_users,
|
|
31
|
+
:edit_groups,
|
|
32
|
+
:read_groups,
|
|
33
|
+
:discover_groups
|
|
34
|
+
].freeze
|
|
35
|
+
|
|
36
|
+
def check_equivalent_permissions(obj1, obj2)
|
|
37
|
+
PERMISSION_METHODS.each do |m|
|
|
38
|
+
expect(obj1.send(m)).to match_array obj2.send(m)
|
|
39
|
+
end
|
|
40
|
+
expect(obj1.visibility).to eq obj2.visibility
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def check_page_metadata(page)
|
|
44
|
+
expect(page.date_uploaded).not_to be nil
|
|
45
|
+
expect(page.date_modified).not_to be nil
|
|
46
|
+
# title: issue title plus page qualifier expected:
|
|
47
|
+
expect(page.title).to contain_exactly "Here and There: Page 1"
|
|
48
|
+
# page number is sequence number, expressed as String
|
|
49
|
+
expect(page.page_number).to be_a String
|
|
50
|
+
expect(page.page_number).to match(/^[0-9]+$/)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def assign_custom_permissions(work)
|
|
54
|
+
# read_groups ['public'] <==> "open" visibility
|
|
55
|
+
work.read_groups = ['public']
|
|
56
|
+
# add a permission to issue, that is not default:
|
|
57
|
+
work.read_users = ['peanutbutter@example.com']
|
|
58
|
+
work.save!
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "ingests work and creates child page works" do
|
|
62
|
+
adapter = build(:newspaper_issue_ingest)
|
|
63
|
+
adapter.ingest(path)
|
|
64
|
+
child_pages = adapter.work.members.select { |w| w.class == NewspaperPage }
|
|
65
|
+
expect(child_pages.length).to eq 4
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# For created child pages, date and permission attributes are side-effect
|
|
69
|
+
# of file attachment process (`NewspaperWorks::Data::WorkFiles`)
|
|
70
|
+
# manipulating the work through the Hyrax actor stack create pipeline.
|
|
71
|
+
it "sets work attributes on created pages via file attachment",
|
|
72
|
+
peform_enqueued: do_now_jobs do
|
|
73
|
+
adapter = build(:newspaper_issue_ingest)
|
|
74
|
+
assign_custom_permissions(adapter.work)
|
|
75
|
+
adapter.ingest(path2)
|
|
76
|
+
child_pages = adapter.work.members.select { |w| w.class == NewspaperPage }
|
|
77
|
+
page = child_pages[0]
|
|
78
|
+
check_page_metadata(page)
|
|
79
|
+
# permissions:
|
|
80
|
+
check_equivalent_permissions(adapter.work, page)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|