newspaper_works 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'misc_shared'
|
|
3
|
+
require 'faraday'
|
|
4
|
+
|
|
5
|
+
# test NewspaperPageIngest against work
|
|
6
|
+
RSpec.describe NewspaperWorks::Ingest::NewspaperPageIngest do
|
|
7
|
+
include_context "shared setup"
|
|
8
|
+
|
|
9
|
+
# define the path to the file we will use for multiple examples
|
|
10
|
+
let(:path) do
|
|
11
|
+
File.join(fixture_path, 'page1.tiff')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it_behaves_like('ingest adapter IO')
|
|
15
|
+
|
|
16
|
+
describe "file import and attachment" do
|
|
17
|
+
do_now_jobs = [
|
|
18
|
+
IngestJob,
|
|
19
|
+
IngestLocalFileJob,
|
|
20
|
+
InheritPermissionsJob,
|
|
21
|
+
VisibilityCopyJob
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
permission_methods = [
|
|
25
|
+
:edit_users,
|
|
26
|
+
:read_users,
|
|
27
|
+
:discover_users,
|
|
28
|
+
:edit_groups,
|
|
29
|
+
:read_groups,
|
|
30
|
+
:discover_groups
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
def verify_pcdm_fileset(fileset)
|
|
34
|
+
# Hyrax always sets label (if not title) on fileset:
|
|
35
|
+
expect(fileset.label).to eq 'page1.tiff'
|
|
36
|
+
# reload file set and check on original file
|
|
37
|
+
fileset.reload
|
|
38
|
+
file = fileset.original_file
|
|
39
|
+
expect(file).to be_a Hydra::PCDM::File
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def verify_attached_file(work, path)
|
|
43
|
+
work.reload
|
|
44
|
+
files = NewspaperWorks::Data::WorkFiles.of(work)
|
|
45
|
+
expect(files.keys.size).to eq 1
|
|
46
|
+
expect(File.exist?(files.values[0].path)).to be true
|
|
47
|
+
expect(files.values[0].size).to eq File.size(path)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it "ingests file data and saves", perform_enqueued: do_now_jobs do
|
|
51
|
+
adapter = build(:newspaper_page_ingest)
|
|
52
|
+
adapter.ingest(path)
|
|
53
|
+
file_sets = adapter.work.members.select { |w| w.class == FileSet }
|
|
54
|
+
expect(file_sets.size).to eq 1
|
|
55
|
+
verify_pcdm_fileset(file_sets[0])
|
|
56
|
+
verify_attached_file(adapter.work, path)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# For (minimal) page objects constructed in tests, PDF ingest,
|
|
60
|
+
# permission setting on fileset happens as result of attachment
|
|
61
|
+
# process (`NewspaperWorks::Data::WorkFiles`), via the
|
|
62
|
+
# `NewspaperWorks::Data::handle_after_create_fileset` method,
|
|
63
|
+
# since the CreateWithRemoteFilesActor in Hyrax unfortunately
|
|
64
|
+
# does not invoke InheritPermissionJob.
|
|
65
|
+
it "copies work permissions to fileset", perform_enqueued: do_now_jobs do
|
|
66
|
+
adapter = build(:newspaper_page_ingest)
|
|
67
|
+
adapter.ingest(path)
|
|
68
|
+
adapter.work.reload
|
|
69
|
+
file_sets = adapter.work.members.select { |w| w.class == FileSet }
|
|
70
|
+
fileset = file_sets[0]
|
|
71
|
+
permission_methods.each do |m|
|
|
72
|
+
expect(fileset.send(m)).to match_array adapter.work.send(m)
|
|
73
|
+
end
|
|
74
|
+
expect(fileset.visibility).to eq adapter.work.visibility
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::Ingest::PageImage do
|
|
4
|
+
include_context 'ingest test fixtures'
|
|
5
|
+
|
|
6
|
+
let(:lccn) { 'sn93059126' }
|
|
7
|
+
|
|
8
|
+
let(:issue_path) { File.join(tiff_fixtures, lccn, '1853060401') }
|
|
9
|
+
|
|
10
|
+
let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
|
|
11
|
+
|
|
12
|
+
let(:issue) do
|
|
13
|
+
NewspaperWorks::Ingest::IssueImages.new(issue_path, publication)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe "page construction and metadata" do
|
|
17
|
+
it "validates path to page image file" do
|
|
18
|
+
garbage_path = '/path/to/nonexistent'
|
|
19
|
+
expect { described_class.new(garbage_path, issue, 1) }.to raise_error ArgumentError
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "extracts page number, title from image filename" do
|
|
23
|
+
path = issue.keys[0]
|
|
24
|
+
page = described_class.new(path, issue, 1)
|
|
25
|
+
expect(page.page_number).to eq "1"
|
|
26
|
+
expect(page.title).to contain_exactly "The weekly journal: June 4, 1853: Page #{page.page_number}"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::Ingest::PdfImages do
|
|
4
|
+
let(:path) do
|
|
5
|
+
base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
|
|
6
|
+
base.join('sample-4page-issue.pdf').to_s
|
|
7
|
+
end
|
|
8
|
+
let(:pdfimages) { described_class.new(path) }
|
|
9
|
+
|
|
10
|
+
describe "get image sizing from PDF" do
|
|
11
|
+
it "gets width" do
|
|
12
|
+
expect(pdfimages.width).to be 7200
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "gets height" do
|
|
16
|
+
expect(pdfimages.height).to be 9600
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "gets ppi" do
|
|
20
|
+
expect(pdfimages.ppi).to be 400
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe "get image info from PDF" do
|
|
25
|
+
it "gets color info" do
|
|
26
|
+
color, channels, bits = pdfimages.color
|
|
27
|
+
expect(color).to eq 'gray'
|
|
28
|
+
expect(channels).to be 1
|
|
29
|
+
expect(bits).to be 1
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::Ingest::PDFIssue do
|
|
4
|
+
include_context 'ingest test fixtures'
|
|
5
|
+
|
|
6
|
+
let(:lccn) { 'sn93059126' }
|
|
7
|
+
|
|
8
|
+
let(:pdf_path) { File.join(pdf_fixtures, lccn, '1853060401.pdf') }
|
|
9
|
+
|
|
10
|
+
let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
|
|
11
|
+
|
|
12
|
+
describe "issue construction and metadata" do
|
|
13
|
+
it "constructs with path and publication" do
|
|
14
|
+
issue = described_class.new(pdf_path, publication)
|
|
15
|
+
expect(issue.path).to eq pdf_path
|
|
16
|
+
expect(issue.filename).to eq File.basename(pdf_path)
|
|
17
|
+
expect(issue.publication).to be publication
|
|
18
|
+
expect(issue.lccn).to eq lccn
|
|
19
|
+
expect(issue.publication.lccn).to eq lccn
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "extracts date, edition, title from filename" do
|
|
23
|
+
issue = described_class.new(pdf_path, publication)
|
|
24
|
+
expect(issue.publication_date).to eq '1853-06-04'
|
|
25
|
+
expect(issue.edition_number).to eq 1
|
|
26
|
+
expect(issue.title).to contain_exactly 'The weekly journal: June 4, 1853'
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::Ingest::PDFIssues do
|
|
4
|
+
include_context 'ingest test fixtures'
|
|
5
|
+
|
|
6
|
+
let(:lccn) { 'sn93059126' }
|
|
7
|
+
|
|
8
|
+
let(:publication) { NewspaperWorks::Ingest::PublicationInfo.new(lccn) }
|
|
9
|
+
|
|
10
|
+
let(:pub_path) { File.join(pdf_fixtures, lccn) }
|
|
11
|
+
|
|
12
|
+
describe " construction and metadata" do
|
|
13
|
+
it "constructs with path and publication" do
|
|
14
|
+
issues = described_class.new(pub_path, publication)
|
|
15
|
+
expect(issues.path).to eq pub_path
|
|
16
|
+
expect(issues.publication).to be publication
|
|
17
|
+
expect(issues.lccn).to eq lccn
|
|
18
|
+
expect(issues.publication.lccn).to eq lccn
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "enumerates valid pdfs as PDFIssue objects" do
|
|
22
|
+
issues = described_class.new(pub_path, publication)
|
|
23
|
+
expect(issues.size).to eq 5
|
|
24
|
+
enumerated = issues.values
|
|
25
|
+
expect(enumerated.size).to eq issues.size
|
|
26
|
+
sample = enumerated[0]
|
|
27
|
+
expect(sample).to be_a NewspaperWorks::Ingest::PDFIssue
|
|
28
|
+
expect(File.dirname(sample.path)).to eq pub_path
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "presents hash-like mapping behavior" do
|
|
32
|
+
issues = described_class.new(pub_path, publication)
|
|
33
|
+
expected_paths = Dir.entries(pub_path).map { |p| File.join(pub_path, p) }
|
|
34
|
+
expected_paths = expected_paths.select { |p| p.end_with?('.pdf') }
|
|
35
|
+
# Keys are paths to file:
|
|
36
|
+
expect(issues.keys).to match_array expected_paths
|
|
37
|
+
# info and [] methods get PDFIssue for given path key:
|
|
38
|
+
issue1 = issues[issues.keys[0]]
|
|
39
|
+
issue2 = issues.info(issues.keys[1])
|
|
40
|
+
expect(issue1).to be_a NewspaperWorks::Ingest::PDFIssue
|
|
41
|
+
expect(issue2).to be_a NewspaperWorks::Ingest::PDFIssue
|
|
42
|
+
expect(issue1.path).to eq issues.keys[0]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "enumerates pairs like a hash" do
|
|
46
|
+
issues = described_class.new(pub_path, publication)
|
|
47
|
+
expected_paths = Dir.entries(pub_path).map { |p| File.join(pub_path, p) }
|
|
48
|
+
issues.each_value do |v|
|
|
49
|
+
expect(v).to be_a NewspaperWorks::Ingest::PDFIssue
|
|
50
|
+
end
|
|
51
|
+
issues.each_key do |k|
|
|
52
|
+
expect(expected_paths).to include k
|
|
53
|
+
end
|
|
54
|
+
issues.each do |path, info|
|
|
55
|
+
expect(expected_paths).to include path
|
|
56
|
+
expect(info).to be_a NewspaperWorks::Ingest::PDFIssue
|
|
57
|
+
expect(info.path).to eq path
|
|
58
|
+
end
|
|
59
|
+
expect(issues.to_a.size).to eq 5
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
require 'mini_magick'
|
|
3
|
+
|
|
4
|
+
RSpec.describe NewspaperWorks::Ingest::PdfPages do
|
|
5
|
+
let(:sample1) do
|
|
6
|
+
base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
|
|
7
|
+
base.join('sample-4page-issue.pdf').to_s
|
|
8
|
+
end
|
|
9
|
+
let(:sample2) do
|
|
10
|
+
base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
|
|
11
|
+
base.join('sample-color-newsletter.pdf').to_s
|
|
12
|
+
end
|
|
13
|
+
let(:sample3) do
|
|
14
|
+
base = Pathname.new(NewspaperWorks::GEM_PATH).join('spec/fixtures/files')
|
|
15
|
+
base.join('ndnp-sample1.pdf').to_s
|
|
16
|
+
end
|
|
17
|
+
let(:onebitpages) { described_class.new(sample1) }
|
|
18
|
+
let(:colorpages) { described_class.new(sample2) }
|
|
19
|
+
let(:graypages) { described_class.new(sample3) }
|
|
20
|
+
|
|
21
|
+
describe "implementation details" do
|
|
22
|
+
it "pdfinfo gets PdfImages, memoized" do
|
|
23
|
+
pdfimages = onebitpages.pdfinfo
|
|
24
|
+
expect(pdfimages).to be_a(NewspaperWorks::Ingest::PdfImages)
|
|
25
|
+
pdfimages2 = onebitpages.pdfinfo
|
|
26
|
+
# same object, method only fetches once:
|
|
27
|
+
expect(pdfimages2).to equal pdfimages
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "gets correct Ghostscript TIFF output" do
|
|
31
|
+
expect(onebitpages.gsdevice).to eq 'tiffg4'
|
|
32
|
+
expect(colorpages.gsdevice).to eq 'tiff24nc'
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it "gets text elements saved in PDF" do
|
|
36
|
+
# should be little to nothing in scanned work, besides
|
|
37
|
+
# output of Ghostscript banner:
|
|
38
|
+
expect(onebitpages.gstext.length).to eq 0
|
|
39
|
+
# the color sample is born-digital and thus has text in PDF;
|
|
40
|
+
# this checks for > 160 (non-trivial) text, though this text
|
|
41
|
+
# stream is at least 6k, if you strip out excess whitespace.
|
|
42
|
+
expect(colorpages.gstext.length).to be > 160
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "gets reasonable ppi" do
|
|
46
|
+
# 400 ppi native:
|
|
47
|
+
expect(onebitpages.ppi).to eq 400
|
|
48
|
+
# sourced from scan:
|
|
49
|
+
expect(onebitpages.ppi).to eq onebitpages.pdfinfo.ppi
|
|
50
|
+
# digital native content gets forced to 400 ppi...
|
|
51
|
+
expect(colorpages.ppi).to eq 400
|
|
52
|
+
# ...because the images in this sample are not reasonably
|
|
53
|
+
# representative, due to low PPI (not scans of whole pages):
|
|
54
|
+
expect(colorpages.ppi).to be > colorpages.pdfinfo.ppi
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
describe "splits PDF into pages with TIFF tmpfiles" do
|
|
59
|
+
it "page filenames of TIFF files are ordered" do
|
|
60
|
+
pages = colorpages.entries
|
|
61
|
+
pages.each_with_index do |path, idx|
|
|
62
|
+
n = idx + 1
|
|
63
|
+
expect(path).to match(/page#{n}.tiff/)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it "color sample splits into color TIFF per page" do
|
|
68
|
+
pages = colorpages.entries
|
|
69
|
+
pages.each do |path|
|
|
70
|
+
image = MiniMagick::Image.open(path)
|
|
71
|
+
expect(image.mime_type).to eq 'image/tiff'
|
|
72
|
+
expect(image.colorspace).to start_with 'DirectClass sRGB'
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
it "one bit sample splits into Group 4 TIFF per page" do
|
|
77
|
+
pages = onebitpages.entries
|
|
78
|
+
pages.each do |path|
|
|
79
|
+
Open3.popen3("identify #{path}") do |_stdin, stdout, _stderr, _wait_thr|
|
|
80
|
+
output = stdout.read
|
|
81
|
+
expect(output).to include '1-bit'
|
|
82
|
+
expect(output).to include 'Bilevel'
|
|
83
|
+
expect(output).to include 'TIFF'
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it "one bit sample is 7200x9600 scan, verify" do
|
|
89
|
+
pages = onebitpages.entries
|
|
90
|
+
pages.each do |path|
|
|
91
|
+
image = MiniMagick::Image.open(path)
|
|
92
|
+
expect(image.width).to eq 7200
|
|
93
|
+
expect(image.height).to eq 9600
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it "processes Grayscale NDNP PDF correctly" do
|
|
98
|
+
pages = graypages.entries
|
|
99
|
+
expect(pages.length).to eq 1
|
|
100
|
+
pages.each do |path|
|
|
101
|
+
Open3.popen3("identify #{path}") do |_stdin, stdout, _stderr, _wait_thr|
|
|
102
|
+
output = stdout.read
|
|
103
|
+
expect(output).to include 'Grayscale'
|
|
104
|
+
expect(output).to include '8-bit'
|
|
105
|
+
expect(output).to include 'TIFF'
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::Ingest::PubFinder do
|
|
4
|
+
describe "mixin publication find-or-create module" do
|
|
5
|
+
let(:klass) do
|
|
6
|
+
Class.new do
|
|
7
|
+
include NewspaperWorks::Ingest::PubFinder
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
before do
|
|
12
|
+
['sn2099999999', 'sn2036999999', 'sn82014496'].each do |lccn|
|
|
13
|
+
NewspaperTitle.where(lccn: lccn).delete_all
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# use factory for saved NewspaperIssue:
|
|
18
|
+
let(:issue) { create(:newspaper_issue) }
|
|
19
|
+
|
|
20
|
+
let(:ingester) { klass.new }
|
|
21
|
+
|
|
22
|
+
let(:publication) { create(:newspaper_title) }
|
|
23
|
+
|
|
24
|
+
it "finds existing publication, if it exists" do
|
|
25
|
+
lccn = publication.lccn
|
|
26
|
+
expect(ingester.find_publication(lccn)).to be_a NewspaperTitle
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "links existing publication on find-or-create" do
|
|
30
|
+
lccn = publication.lccn
|
|
31
|
+
ingester.find_or_create_publication_for_issue(issue, lccn, nil, {})
|
|
32
|
+
publication.reload
|
|
33
|
+
expect(publication.members.to_a).to include issue
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "links issue to new publication" do
|
|
37
|
+
lccn = 'sn2099999999'
|
|
38
|
+
expect(ingester.find_publication(lccn)).to be_nil
|
|
39
|
+
ingester.find_or_create_publication_for_issue(issue, lccn, nil, {})
|
|
40
|
+
publication = ingester.find_publication(lccn)
|
|
41
|
+
expect(publication).to be_a NewspaperTitle
|
|
42
|
+
expect(publication.members.to_a).to include issue
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "copies metadata for created publication" do
|
|
46
|
+
lccn = 'sn82014496'
|
|
47
|
+
expect(ingester.find_publication(lccn)).to be_nil
|
|
48
|
+
publication = ingester.create_publication(lccn, nil, {})
|
|
49
|
+
expect(publication.title).to contain_exactly "Rocky Mountain news"
|
|
50
|
+
expect(publication.place_of_publication.map { |v| v.to_uri.to_s }).to \
|
|
51
|
+
contain_exactly(
|
|
52
|
+
"http://sws.geonames.org/5419384/"
|
|
53
|
+
)
|
|
54
|
+
expect(publication.language).to contain_exactly 'eng'
|
|
55
|
+
expect(publication.oclcnum).to eq 'ocm03946163'
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::Ingest::PublicationInfo do
|
|
4
|
+
# prefers lccn.loc.gov:
|
|
5
|
+
let(:lccn1) { 'sn83021453' }
|
|
6
|
+
let(:lccn2) { 'sn83045396' }
|
|
7
|
+
# prefers ChronAm:
|
|
8
|
+
let(:lccn3) { 'sn94051019' }
|
|
9
|
+
let(:bad_lccn) { 'sn99999999' }
|
|
10
|
+
|
|
11
|
+
describe "gets metadata" do
|
|
12
|
+
it "gets simple metadata" do
|
|
13
|
+
meta = described_class.new(lccn1)
|
|
14
|
+
expect(meta.title).to eq 'Salt Lake daily tribune'
|
|
15
|
+
expect(meta.issn).to be_nil
|
|
16
|
+
expect(meta.oclcnum).to eq 'ocm10170377'
|
|
17
|
+
expect(meta.place_name).to eq 'Salt Lake City, Utah'
|
|
18
|
+
expect(meta.place_of_publication).to eq 'http://sws.geonames.org/5780993/'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "gets related item metadata" do
|
|
22
|
+
meta1 = described_class.new(lccn1)
|
|
23
|
+
meta2 = described_class.new(lccn2)
|
|
24
|
+
# lccn2 succeeds lccn1, favors lccn.loc.gov URL as authoritative:
|
|
25
|
+
expect(meta1.succeeded_by).to eq "https://lccn.loc.gov/#{lccn2}"
|
|
26
|
+
# lccn1 precedes lccn2, favors lccn.loc.gov URL as authoritative:
|
|
27
|
+
expect(meta2.preceded_by).to eq "https://lccn.loc.gov/sn83021453"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
describe "backing authority choice" do
|
|
32
|
+
it "picks default authority of lccn.loc.gov" do
|
|
33
|
+
meta = described_class.new(lccn1)
|
|
34
|
+
expect(meta.implementation).to be_a NewspaperWorks::Ingest::LCPublicationInfo
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "picks chronam implementation when lccn.loc.gov empty for LCCN" do
|
|
38
|
+
meta = described_class.new(lccn3)
|
|
39
|
+
expect(meta.implementation).to be_a NewspaperWorks::Ingest::ChronAmPublicationInfo
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it "responds to known metadata" do
|
|
43
|
+
meta = described_class.new(lccn3)
|
|
44
|
+
expect(meta).to respond_to(:lccn)
|
|
45
|
+
expect(meta).to respond_to(:issn)
|
|
46
|
+
expect(meta).to respond_to(:title)
|
|
47
|
+
expect(meta).to respond_to(:oclcnum)
|
|
48
|
+
expect(meta).to respond_to(:place_name)
|
|
49
|
+
expect(meta).to respond_to(:place_of_publication)
|
|
50
|
+
expect(meta).to respond_to(:preceded_by)
|
|
51
|
+
expect(meta).to respond_to(:succeeded_by)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
describe "error handling" do
|
|
56
|
+
it "handles unknown LCCN (empty mods)" do
|
|
57
|
+
meta = described_class.new(bad_lccn)
|
|
58
|
+
expect(meta.empty?).to be true
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe NewspaperWorks::Ingest do
|
|
4
|
+
describe "Ingest module methods" do
|
|
5
|
+
it "gets default admin set" do
|
|
6
|
+
admin_set = described_class.find_admin_set
|
|
7
|
+
expect(admin_set).to be_an AdminSet
|
|
8
|
+
expect(admin_set.id).to eq AdminSet::DEFAULT_ID
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# initial expectations of a just-created work without administrative
|
|
12
|
+
# metadata set yet; AKA the "before" picture
|
|
13
|
+
def expect_initial_work_state(work)
|
|
14
|
+
expect(work.admin_set).to be_nil
|
|
15
|
+
expect(work.depositor).to be_nil
|
|
16
|
+
expect(work.visibility).to eq 'restricted'
|
|
17
|
+
expect(work.date_modified).to be_nil
|
|
18
|
+
expect(work.date_uploaded).to be_nil
|
|
19
|
+
expect(work.resource_type).to be_empty
|
|
20
|
+
expect(work.state).to be_nil
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "sets default assigned metadata for a work" do
|
|
24
|
+
work = NewspaperTitle.create!(title: ["hello"])
|
|
25
|
+
expect_initial_work_state(work)
|
|
26
|
+
described_class.assign_administrative_metadata(work)
|
|
27
|
+
expect(work.admin_set).to eq AdminSet.find(AdminSet::DEFAULT_ID)
|
|
28
|
+
expect(work.depositor).to eq User.batch_user.user_key
|
|
29
|
+
expect(work.visibility).to eq 'open'
|
|
30
|
+
expect(work.state).to be_an ActiveTriples::Resource
|
|
31
|
+
expect(work.state.to_uri.to_s).to eq \
|
|
32
|
+
'http://fedora.info/definitions/1/0/access/ObjState#active'
|
|
33
|
+
expect(work.date_uploaded).to be_a DateTime
|
|
34
|
+
expect(work.date_modified).to eq work.date_uploaded
|
|
35
|
+
expect(work.resource_type).to match_array ['Newspapers']
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it "has method to get publication metadata for lccn" do
|
|
39
|
+
lccn = 'sn84038814'
|
|
40
|
+
metadata = described_class.publication_metadata(lccn)
|
|
41
|
+
expect(metadata).to be_a NewspaperWorks::Ingest::PublicationInfo
|
|
42
|
+
expect(metadata.lccn).to eq lccn
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe NewspaperWorks::IssuePDFComposer do
|
|
4
|
+
let(:bare_issue) do
|
|
5
|
+
build(:newspaper_issue)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
let(:fixtures_path) do
|
|
9
|
+
fixtures = File.join(NewspaperWorks::GEM_PATH, 'spec/fixtures/files')
|
|
10
|
+
Hyrax.config.whitelisted_ingest_dirs.push(fixtures)
|
|
11
|
+
fixtures
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
let(:pdf_path) do
|
|
15
|
+
File.join(fixtures_path, 'minimal-1-page.pdf')
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
let(:broken_pdf) do
|
|
19
|
+
File.join(fixtures_path, 'broken-truncated.pdf')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def page_with_pdf(name, path)
|
|
23
|
+
# empty+saved fileset: only need id, no primary file, to attach derivatives
|
|
24
|
+
fs = FileSet.create!
|
|
25
|
+
page = NewspaperPage.create!(title: [name])
|
|
26
|
+
page.members << fs
|
|
27
|
+
page.save!
|
|
28
|
+
derivatives = NewspaperWorks::Data::WorkDerivatives.of(page)
|
|
29
|
+
derivatives.assign(path)
|
|
30
|
+
derivatives.commit!
|
|
31
|
+
page
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
let(:page1_with_pdf) { page_with_pdf('Page 1', pdf_path) }
|
|
35
|
+
let(:page2_with_pdf) { page_with_pdf('Page 2', pdf_path) }
|
|
36
|
+
|
|
37
|
+
let(:broken_page) { page_with_pdf('Broken Page', broken_pdf) }
|
|
38
|
+
|
|
39
|
+
let(:two_page_issue) do
|
|
40
|
+
issue = NewspaperIssue.create(title: ['Issue Test'])
|
|
41
|
+
issue.ordered_members << page1_with_pdf
|
|
42
|
+
issue.ordered_members << page2_with_pdf
|
|
43
|
+
issue.save!
|
|
44
|
+
issue
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
let(:unfinished_issue) do
|
|
48
|
+
issue = NewspaperIssue.create(title: ['Unfinished issue'])
|
|
49
|
+
issue.members << FileSet.create!
|
|
50
|
+
issue.save!
|
|
51
|
+
issue.ordered_members << broken_page
|
|
52
|
+
issue.save!
|
|
53
|
+
issue
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe "adapter construction" do
|
|
57
|
+
it "constructs adapter" do
|
|
58
|
+
composer = described_class.new(bare_issue)
|
|
59
|
+
expect(composer.issue).to be bare_issue
|
|
60
|
+
expect(composer.page_pdfs).to match_array []
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
describe "Validation and handling of not-yet-ready pages" do
|
|
65
|
+
it "validates PDFs" do
|
|
66
|
+
# we can fake issue context with nil on construction to call validate_pdf
|
|
67
|
+
composer = described_class.new(nil)
|
|
68
|
+
expect(composer.validate_pdf(broken_pdf)).to be false
|
|
69
|
+
expect(composer.validate_pdf(pdf_path)).to be true
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it "raises NewspaperWorks::PagesNotReady on incomplete PDF" do
|
|
73
|
+
composer = described_class.new(unfinished_issue)
|
|
74
|
+
expect { composer.compose }.to raise_error(NewspaperWorks::PagesNotReady)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
describe "Construction, attachment of combined PDF" do
|
|
79
|
+
do_now_jobs = [IngestLocalFileJob, IngestJob, InheritPermissionsJob]
|
|
80
|
+
|
|
81
|
+
def files_of(work)
|
|
82
|
+
NewspaperWorks::Data::WorkFiles.of(work)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
it "creates issue PDF from sources", perform_enqueued: do_now_jobs do
|
|
86
|
+
composer = described_class.new(two_page_issue)
|
|
87
|
+
# no (primary) files attached to issue yet:
|
|
88
|
+
expect(files_of(two_page_issue).keys.size).to eq 0
|
|
89
|
+
# Make the mulit-page-pdf with IssuePDFComposer#compose:
|
|
90
|
+
composer.compose
|
|
91
|
+
# reload issue files, as they have been updated; check for PDF:
|
|
92
|
+
two_page_issue.reload
|
|
93
|
+
files = files_of(two_page_issue)
|
|
94
|
+
expect(files.keys.size).to eq 1
|
|
95
|
+
# getting path initiates a repository checkout of file:
|
|
96
|
+
path = files.values[0].path
|
|
97
|
+
# we found a PDF, simple check only extension (not validating):
|
|
98
|
+
expect(path.end_with?('pdf')).to be true
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe NewspaperWorks::Logging do
|
|
4
|
+
describe "mixin logging module" do
|
|
5
|
+
let(:klass) do
|
|
6
|
+
Class.new do
|
|
7
|
+
include NewspaperWorks::Logging
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
let(:loggable) { klass.new }
|
|
12
|
+
|
|
13
|
+
let(:configured) do
|
|
14
|
+
obj = loggable
|
|
15
|
+
# expectation is that this is called by consuming class constructor:
|
|
16
|
+
obj.configure_logger('ingest-test')
|
|
17
|
+
obj
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "requires configuration by consuming class" do
|
|
21
|
+
name = 'random_testing_logname'
|
|
22
|
+
expect(loggable.instance_variable_get(:@logger)).to be_nil
|
|
23
|
+
expect(described_class.configured).not_to include name
|
|
24
|
+
loggable.configure_logger(name)
|
|
25
|
+
expect(loggable.instance_variable_get(:@logger)).not_to be_nil
|
|
26
|
+
expect(described_class.configured).to include name
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "logs formatted message to rails logger with write_log" do
|
|
30
|
+
message = "FYI: heads-up, this is a message"
|
|
31
|
+
expect(Rails.logger).to receive(:add).with(
|
|
32
|
+
Logger::INFO,
|
|
33
|
+
configured.message_format(message),
|
|
34
|
+
nil
|
|
35
|
+
)
|
|
36
|
+
configured.write_log(message)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "writes to named log file" do
|
|
40
|
+
# need to reset global de-dupe state for additional logger, just for
|
|
41
|
+
# purposes of this test
|
|
42
|
+
described_class.configured = []
|
|
43
|
+
message = "Instant coffee"
|
|
44
|
+
named_log = configured.instance_variable_get(:@named_log)
|
|
45
|
+
expect(named_log).to receive(:add).with(
|
|
46
|
+
Logger::INFO,
|
|
47
|
+
configured.message_format(message),
|
|
48
|
+
nil
|
|
49
|
+
)
|
|
50
|
+
configured.write_log(message)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|