newspaper_works 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module NewspaperWorks
|
4
|
+
class JP2DerivativeService < NewspaperPageDerivativeService
|
5
|
+
# OpenJPEG 2000 Command to make NDNP-compliant grayscale JP2:
|
6
|
+
CMD_GRAY = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
|
7
|
+
'-d 0,0 -b 64,64 -n 6 -p RLCP -t 1024,1024 -I -M 1 ' \
|
8
|
+
'-r 64,53.821,45.249,40,32,26.911,22.630,20,16,14.286,' \
|
9
|
+
'11.364,10,8,6.667,5.556,4.762,4,3.333,2.857,2.500,2,' \
|
10
|
+
'1.667,1.429,1.190,1'.freeze
|
11
|
+
|
12
|
+
# OpenJPEG 2000 Command to make RGB JP2:
|
13
|
+
CMD_COLOR = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
|
14
|
+
'-d 0,0 -b 64,64 -n 6 -p RPCL -t 1024,1024 -I -M 1 '\
|
15
|
+
'-r 2.4,1.48331273,.91673033,.56657224,.35016049,.21641118,' \
|
16
|
+
'.13374944,.0944,.08266171'.freeze
|
17
|
+
|
18
|
+
# OpenJPEG 1.x command replacement for 2.x opj_compress, takes same options;
|
19
|
+
# this is necessary on Ubuntu Trusty (e.g. Travis CI)
|
20
|
+
CMD_1X = 'image_to_j2k'.freeze
|
21
|
+
|
22
|
+
# Target file extension of this service plugin:
|
23
|
+
TARGET_EXT = 'jp2'.freeze
|
24
|
+
|
25
|
+
attr_accessor :source_meta
|
26
|
+
attr_reader :file_set
|
27
|
+
delegate :uri, :mime_type, to: :file_set
|
28
|
+
|
29
|
+
def initialize(file_set)
|
30
|
+
# cached result string for imagemagick `identify` command
|
31
|
+
@source_meta = nil
|
32
|
+
@command = nil
|
33
|
+
@unlink_after_creation = []
|
34
|
+
super(file_set)
|
35
|
+
end
|
36
|
+
|
37
|
+
def create_derivatives(filename)
|
38
|
+
# Base class takes care of loading @source_path, @dest_path
|
39
|
+
super(filename)
|
40
|
+
|
41
|
+
# no creation if jp2 master => deemed unnecessary/duplicative
|
42
|
+
return if mime_type == 'image/jp2'
|
43
|
+
|
44
|
+
# if we have a non-TIFF source, or a 1-bit monochrome source, we need
|
45
|
+
# to make a NetPBM-based intermediate (temporary) file for OpenJPEG
|
46
|
+
# to consume.
|
47
|
+
needs_intermediate = !tiff_source? || one_bit?
|
48
|
+
|
49
|
+
# We use either intermediate temp file, or temp symlink (to work
|
50
|
+
# around OpenJPEG 2000 file naming quirk).
|
51
|
+
needs_intermediate ? make_intermediate_source : make_symlink
|
52
|
+
|
53
|
+
# Get OpenJPEG command, rendered with source, destination, appropriate
|
54
|
+
# to either color or grayscale source
|
55
|
+
render_cmd = opj_command
|
56
|
+
|
57
|
+
# Run the generated command to make derivative file at @dest_path
|
58
|
+
`#{render_cmd}`
|
59
|
+
|
60
|
+
# Clean up any intermediate files or symlinks used during creation
|
61
|
+
cleanup_intermediate
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
# source introspection:
|
67
|
+
|
68
|
+
def tiff_source?
|
69
|
+
identify.include?('TIFF')
|
70
|
+
end
|
71
|
+
|
72
|
+
def make_symlink
|
73
|
+
# OpenJPEG binaries have annoying quirk of only using TIFF input
|
74
|
+
# files whose name ends in .TIF or .tif (three letter); for all
|
75
|
+
# non-monochrome TIFF files, we just assume we need to symlink
|
76
|
+
# to such a filename.
|
77
|
+
tmpname = File.join(Dir.tmpdir, "#{SecureRandom.uuid}.tif")
|
78
|
+
FileUtils.ln_s(@source_path, tmpname)
|
79
|
+
@unlink_after_creation.push(tmpname)
|
80
|
+
# finally, point @source_path for command at intermediate link:
|
81
|
+
@source_path = tmpname
|
82
|
+
end
|
83
|
+
|
84
|
+
def make_intermediate_source
|
85
|
+
# generate a random filename to be made, with appropriate extension,
|
86
|
+
# inside /tmp dir:
|
87
|
+
tmpname = File.join(
|
88
|
+
Dir.tmpdir,
|
89
|
+
format(
|
90
|
+
"#{SecureRandom.uuid}.%<ext>s",
|
91
|
+
ext: use_color? ? 'ppm' : 'pgm'
|
92
|
+
)
|
93
|
+
)
|
94
|
+
# if pdf source, get only first page
|
95
|
+
source_path = @source_path
|
96
|
+
source_path += '[0]' if @source_path.ends_with?('pdf')
|
97
|
+
# Use ImageMagick `convert` to create intermediate bitmap:
|
98
|
+
`convert #{source_path} #{tmpname}`
|
99
|
+
@unlink_after_creation.push(tmpname)
|
100
|
+
# finally, point @source_path for command at intermediate file:
|
101
|
+
@source_path = tmpname
|
102
|
+
end
|
103
|
+
|
104
|
+
def opj_command
|
105
|
+
# Get a command template appropriate to OpenJPEG 1.x or 2.x
|
106
|
+
use_openjpeg_1x = `which opj_compress`.empty?
|
107
|
+
cmd = use_color? ? CMD_COLOR : CMD_GRAY
|
108
|
+
cmd = cmd.sub('opj_compress', 'image_to_j2k') if use_openjpeg_1x
|
109
|
+
# return command with source and destination file names injected
|
110
|
+
format(cmd, source_file: @source_path, out_file: @dest_path)
|
111
|
+
end
|
112
|
+
|
113
|
+
def cleanup_intermediate
|
114
|
+
# remove symlink or intermediate file once we no longer need
|
115
|
+
@unlink_after_creation.each do |path|
|
116
|
+
FileUtils.rm(path)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
# Base type for derivative services specific to NewspaperPage only
|
3
|
+
class NewspaperPageDerivativeService
|
4
|
+
attr_reader :file_set, :master_format
|
5
|
+
delegate :uri, :mime_type, to: :file_set
|
6
|
+
|
7
|
+
TARGET_EXT = nil
|
8
|
+
|
9
|
+
def self.target_ext
|
10
|
+
self::TARGET_EXT
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(file_set)
|
14
|
+
@file_set = file_set
|
15
|
+
@dest_path = nil
|
16
|
+
@source_path = nil
|
17
|
+
@source_meta = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def valid?
|
21
|
+
parent = file_set.in_works[0]
|
22
|
+
# fallback to Fedora-stored relationships if work's aggregation of
|
23
|
+
# file set is not indexed in Solr
|
24
|
+
parent = file_set.member_of.select(&:work?)[0] if parent.nil?
|
25
|
+
parent.class == NewspaperPage
|
26
|
+
end
|
27
|
+
|
28
|
+
def derivative_path_factory
|
29
|
+
Hyrax::DerivativePath
|
30
|
+
end
|
31
|
+
|
32
|
+
# prepare full path for passed extension/destination name, return path
|
33
|
+
def prepare_path(extension)
|
34
|
+
dest_path = derivative_path_factory.derivative_path_for_reference(
|
35
|
+
@file_set,
|
36
|
+
extension
|
37
|
+
)
|
38
|
+
dir = File.join(dest_path.split('/')[0..-2])
|
39
|
+
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
40
|
+
dest_path
|
41
|
+
end
|
42
|
+
|
43
|
+
# calculate and ensure directory components for singular @dest_path
|
44
|
+
# should only be used by subclasses producing a single derivative
|
45
|
+
def load_destpath
|
46
|
+
@dest_path = prepare_path(self.class.target_ext)
|
47
|
+
end
|
48
|
+
|
49
|
+
def identify
|
50
|
+
if @source_meta.nil?
|
51
|
+
path = @source_path
|
52
|
+
cmd = "identify #{path}"
|
53
|
+
# fallback to graphicsmagick if source is jp2, as Ubuntu 16.10
|
54
|
+
# ImageMagick has no jp2 support.
|
55
|
+
cmd = 'gm ' + cmd if path.ends_with?('jp2')
|
56
|
+
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
57
|
+
@source_meta = stdout.read
|
58
|
+
end
|
59
|
+
end
|
60
|
+
@source_meta
|
61
|
+
end
|
62
|
+
|
63
|
+
def use_color?
|
64
|
+
# imagemagick `identify` output describes color space:
|
65
|
+
!(identify.include?('Gray') || one_bit?)
|
66
|
+
end
|
67
|
+
|
68
|
+
# is source one-bit monochrome?
|
69
|
+
def one_bit?
|
70
|
+
identify.include?('1-bit')
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_derivatives(filename)
|
74
|
+
# presuming that filename is full path to source file
|
75
|
+
@source_path = filename
|
76
|
+
|
77
|
+
# Get destination path from Hyrax for file extension defined in
|
78
|
+
# TARGET_EXT constant on respective derivative service subclass.
|
79
|
+
load_destpath
|
80
|
+
end
|
81
|
+
|
82
|
+
def cleanup_derivatives(*args)
|
83
|
+
target_ext = args && args[0] ? args[0] : self.class.target_ext
|
84
|
+
derivative_path_factory.derivatives_for_reference(file_set).each do |path|
|
85
|
+
FileUtils.rm_f(path) if path.ends_with?(target_ext)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# def cleanup_derivatives; end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module NewspaperWorks
|
4
|
+
class PDFDerivativeService < NewspaperPageDerivativeService
|
5
|
+
TARGET_EXT = 'pdf'.freeze
|
6
|
+
|
7
|
+
# PDF (JPEG, 8 bit grayscale), 150ppi
|
8
|
+
GRAY_PDF_CMD = 'convert %<source_file>s ' \
|
9
|
+
'-resize 1800 -density 150 ' \
|
10
|
+
'-depth 8 -colorspace Gray ' \
|
11
|
+
'-compress jpeg %<out_file>s'.freeze
|
12
|
+
|
13
|
+
# sRBG color PDF (JPEG, 8 bits per channel), 150ppi
|
14
|
+
COLOR_PDF_CMD = 'convert %<source_file>s ' \
|
15
|
+
'-resize 1800 -density 150 ' \
|
16
|
+
'-depth 8 ' \
|
17
|
+
'-compress jpeg %<out_file>s'.freeze
|
18
|
+
|
19
|
+
# graphicsmagick prefix, may be needed for jp2 source on Ubuntu
|
20
|
+
GM_PREFX = 'gm '.freeze
|
21
|
+
|
22
|
+
def initialize(file_set)
|
23
|
+
super(file_set)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get conversion command; command varies on whether or not we have
|
27
|
+
# JP2 source, and whether we have color or grayscale material.
|
28
|
+
def convert_cmd
|
29
|
+
template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
|
30
|
+
cmd = format(template, source_file: @source_path, out_file: @dest_path)
|
31
|
+
@source_path.ends_with?('jp2') ? GM_PREFIX + cmd : cmd
|
32
|
+
end
|
33
|
+
|
34
|
+
def create_derivatives(filename)
|
35
|
+
# Base class takes care of loading @source_path, @dest_path
|
36
|
+
super(filename)
|
37
|
+
|
38
|
+
# no creation if pdf master
|
39
|
+
return if mime_type == 'application/pdf'
|
40
|
+
|
41
|
+
# Get and run imagemagick or graphicsmagick command
|
42
|
+
`#{convert_cmd}`
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# General derivative service for NewspaperWorks, which is meant to wrap
|
2
|
+
# and replace the stock Hyrax::FileSetDerivativeService with a proxy
|
3
|
+
# that runs one or more derivative service "plugin" components.
|
4
|
+
#
|
5
|
+
# Note: Hyrax::DerivativeService consumes this, instead of (directly)
|
6
|
+
# consuming Hyrax::FileSetDerivativeService.
|
7
|
+
#
|
8
|
+
# Unlike the "run the first valid plugin" arrangement that the
|
9
|
+
# Hyrax::DerivativeService uses to run an actual derivative creation
|
10
|
+
# service component, this component is:
|
11
|
+
#
|
12
|
+
# (a) Consumed by Hyrax::DerivativeService as that first valid plugin;
|
13
|
+
#
|
14
|
+
# (b) Wraps and runs 0..* plugins, not just the first.
|
15
|
+
#
|
16
|
+
# This should be registered to take precedence over default by:
|
17
|
+
# Hyrax::DerivativeService.services.unshift(
|
18
|
+
# NewspaperWorks::PluggableDerivativeService
|
19
|
+
# )
|
20
|
+
#
|
21
|
+
# Modify NewspaperWorks::PluggableDerivativeService.plugins
|
22
|
+
# to add, remove, or reorder plugin (derivative service) classes.
|
23
|
+
#
|
24
|
+
class NewspaperWorks::PluggableDerivativeService
|
25
|
+
attr_reader :file_set
|
26
|
+
delegate :uri, :mime_type, to: :file_set
|
27
|
+
|
28
|
+
# default plugin Hyrax OOTB, makes thumbnails and sometimes extracts text:
|
29
|
+
default_plugin = Hyrax::FileSetDerivativesService
|
30
|
+
|
31
|
+
# make and expose an array of plugins
|
32
|
+
@plugins = [default_plugin]
|
33
|
+
@allowed_methods = [:cleanup_derivatives, :create_derivatives]
|
34
|
+
class << self
|
35
|
+
attr_accessor :plugins, :allowed_methods
|
36
|
+
end
|
37
|
+
|
38
|
+
def plugins
|
39
|
+
self.class.plugins
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(file_set)
|
43
|
+
@file_set = file_set
|
44
|
+
end
|
45
|
+
|
46
|
+
def valid?
|
47
|
+
# this wrapper/proxy/composite is always valid, but it may compose
|
48
|
+
# multiple plugins, some of which may or may not be valid, so
|
49
|
+
# validity checks happen within as well.
|
50
|
+
true
|
51
|
+
end
|
52
|
+
|
53
|
+
def respond_to_missing?(method_name)
|
54
|
+
self.class.allowed_methods.include?(method_name) || super
|
55
|
+
end
|
56
|
+
|
57
|
+
# get derivative services relevant to method name and file_set context
|
58
|
+
# -- omits plugins if particular destination exists or will soon.
|
59
|
+
def services(method_name)
|
60
|
+
result = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
|
61
|
+
result.select do |plugin|
|
62
|
+
dest = nil
|
63
|
+
dest = plugin.class.target_ext if plugin.class.respond_to?(:target_ext)
|
64
|
+
!skip_destination?(method_name, dest)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def method_missing(name, *args, **opts, &block)
|
69
|
+
if respond_to_missing?(name)
|
70
|
+
# we have an allowed method, construct services and include all valid
|
71
|
+
# services for the file_set
|
72
|
+
# services = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
|
73
|
+
# run all valid services, in order:
|
74
|
+
services(name).each do |plugin|
|
75
|
+
plugin.send(name, *args)
|
76
|
+
end
|
77
|
+
else
|
78
|
+
super
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def skip_destination?(method_name, destination_name)
|
85
|
+
return false if file_set.id.nil? || destination_name.nil?
|
86
|
+
return false unless method_name == :create_derivatives
|
87
|
+
# skip :create_derivatives if existing --> do not re-create
|
88
|
+
existing_derivative?(destination_name) ||
|
89
|
+
impending_derivative?(destination_name)
|
90
|
+
end
|
91
|
+
|
92
|
+
def existing_derivative?(name)
|
93
|
+
path = derivative_path_factory.derivative_path_for_reference(
|
94
|
+
file_set,
|
95
|
+
name
|
96
|
+
)
|
97
|
+
File.exist?(path)
|
98
|
+
end
|
99
|
+
|
100
|
+
# is there an impending attachment from ingest logged to db?
|
101
|
+
# -- avoids stomping over pre-made derivative
|
102
|
+
# for which an attachment is still in-progress.
|
103
|
+
def impending_derivative?(name)
|
104
|
+
result = NewspaperWorks::DerivativeAttachment.find_by(
|
105
|
+
fileset_id: file_set.id,
|
106
|
+
destination_name: name
|
107
|
+
)
|
108
|
+
!result.nil?
|
109
|
+
end
|
110
|
+
|
111
|
+
def derivative_path_factory
|
112
|
+
Hyrax::DerivativePath
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
class TextExtractionDerivativeService < NewspaperPageDerivativeService
|
3
|
+
def initialize(file_set)
|
4
|
+
super(file_set)
|
5
|
+
@alto_path = nil
|
6
|
+
@txt_path = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def create_derivatives(src)
|
10
|
+
from_alto = NewspaperWorks::TextFormatsFromALTOService.new(
|
11
|
+
file_set
|
12
|
+
)
|
13
|
+
return from_alto.create_derivatives(src) unless from_alto.alto_path.nil?
|
14
|
+
create_derivatives_from_ocr(src)
|
15
|
+
end
|
16
|
+
|
17
|
+
def create_derivatives_from_ocr(filename)
|
18
|
+
@source_path = filename
|
19
|
+
# prepare destination directory for ALTO (as .xml files):
|
20
|
+
@alto_path = prepare_path('xml')
|
21
|
+
# prepare destination directory for plain text (as .txt files):
|
22
|
+
@txt_path = prepare_path('txt')
|
23
|
+
# prepare destination directory for flat JSON (as .json files):
|
24
|
+
@json_path = prepare_path('json')
|
25
|
+
ocr = NewspaperWorks::TextExtraction::PageOCR.new(filename)
|
26
|
+
# OCR will run once, on first method call to either .alto or .plain:
|
27
|
+
write_plain_text(ocr.plain)
|
28
|
+
write_alto(ocr.alto)
|
29
|
+
write_json(ocr.word_json)
|
30
|
+
end
|
31
|
+
|
32
|
+
def write_alto(xml)
|
33
|
+
File.open(@alto_path, 'w') do |outfile|
|
34
|
+
outfile.write(xml)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def write_plain_text(text)
|
39
|
+
File.open(@txt_path, 'w') do |outfile|
|
40
|
+
outfile.write(text)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def write_json(text)
|
45
|
+
File.open(@json_path, 'w') do |outfile|
|
46
|
+
outfile.write(text)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def cleanup_derivatives
|
51
|
+
super('txt')
|
52
|
+
super('xml')
|
53
|
+
super('json')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
# Plugin to make text format derviatives (JSON, plain-text) from ALTO,
|
3
|
+
# either existing derivative, or an impending attachment.
|
4
|
+
# NOTE: to keep this from conflicting with TextExtractionDerivativeService,
|
5
|
+
# this class should be invoked by it, not PluggableDerivativeService.
|
6
|
+
class TextFormatsFromALTOService < NewspaperPageDerivativeService
|
7
|
+
TARGET_EXT = 'tiff'.freeze
|
8
|
+
|
9
|
+
def save_derivative(destination, data)
|
10
|
+
# Load/prepare base of "pairtree" dir structure for extension, fileset
|
11
|
+
prepare_path(destination)
|
12
|
+
#
|
13
|
+
save_path = derivative_path_factory.derivative_path_for_reference(
|
14
|
+
@file_set,
|
15
|
+
destination
|
16
|
+
)
|
17
|
+
# Write data as UTF-8 encoded text
|
18
|
+
File.open(save_path, "w:UTF-8") do |f|
|
19
|
+
f.write(data)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def nonempty_file?(path)
|
24
|
+
return false if path.nil?
|
25
|
+
return false unless File.exist?(path)
|
26
|
+
!File.size(path).zero?
|
27
|
+
end
|
28
|
+
|
29
|
+
# if there was no derivative yet, there might be one in-transit from
|
30
|
+
# an ingest, so check for that, and use its source if applicable:
|
31
|
+
def incoming_alto_path
|
32
|
+
path = NewspaperWorks::DerivativeAttachment.where(
|
33
|
+
fileset_id: @file_set.id,
|
34
|
+
destination_name: 'xml'
|
35
|
+
).pluck(:path).uniq.first
|
36
|
+
path if nonempty_file?(path)
|
37
|
+
end
|
38
|
+
|
39
|
+
def alto_path
|
40
|
+
# check first for existing, non-empty derivative data:
|
41
|
+
path = derivative_path_factory.derivative_path_for_reference(
|
42
|
+
@file_set,
|
43
|
+
'xml'
|
44
|
+
)
|
45
|
+
return path if nonempty_file?(path)
|
46
|
+
incoming_alto_path
|
47
|
+
end
|
48
|
+
|
49
|
+
def alto
|
50
|
+
path = alto_path
|
51
|
+
File.read(path, encoding: 'UTF-8') unless path.nil?
|
52
|
+
end
|
53
|
+
|
54
|
+
def create_derivatives(_filename)
|
55
|
+
# as this plugin makes derivatives of derivative, _filename is ignored
|
56
|
+
source_file = alto
|
57
|
+
return if source_file.nil?
|
58
|
+
# Image width from characterized primary file helps ensure proper scaling:
|
59
|
+
file = @file_set.original_file
|
60
|
+
width = file.nil? ? nil : file.width[0].to_i
|
61
|
+
height = file.nil? ? nil : file.height[0].to_i
|
62
|
+
# ALTOReader is responsible for transcoding, this class just saves result
|
63
|
+
reader = NewspaperWorks::TextExtraction::AltoReader.new(
|
64
|
+
source_file,
|
65
|
+
width,
|
66
|
+
height
|
67
|
+
)
|
68
|
+
save_derivative('json', reader.json)
|
69
|
+
save_derivative('txt', reader.text)
|
70
|
+
end
|
71
|
+
|
72
|
+
def cleanup_derivatives(*args)
|
73
|
+
# do nothing here; NewspaperWorks::TextExtractionDerivativeService
|
74
|
+
# has this job instead for cleaning ALTO, JSON, TXT.
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module NewspaperWorks
|
4
|
+
class TIFFDerivativeService < NewspaperPageDerivativeService
|
5
|
+
TARGET_EXT = 'tiff'.freeze
|
6
|
+
|
7
|
+
# For imagemagick commands, the output type is determined by the
|
8
|
+
# output file's extension.
|
9
|
+
# TIFF (LZW, 8 bit grayscale)
|
10
|
+
GRAY_CMD = 'convert %<source_file>s ' \
|
11
|
+
'-depth 8 -colorspace Gray ' \
|
12
|
+
'-compress lzw %<out_file>s'.freeze
|
13
|
+
|
14
|
+
# Monochrome one-bit black/white TIFF, Group 4 compressed:
|
15
|
+
MONO_CMD = 'convert %<source_file>s ' \
|
16
|
+
'-depth 1 -monochrome -compress Group4 -type bilevel ' \
|
17
|
+
'%<out_file>s'.freeze
|
18
|
+
|
19
|
+
# sRBG color TIFF (8 bits per channel, lzw)
|
20
|
+
COLOR_CMD = 'convert %<source_file>s ' \
|
21
|
+
'-depth 24 ' \
|
22
|
+
'-compress lzw %<out_file>s'.freeze
|
23
|
+
|
24
|
+
# graphicsmagick prefix, may be needed for jp2 source on Ubuntu
|
25
|
+
GM_PREFX = 'gm '.freeze
|
26
|
+
|
27
|
+
def initialize(file_set)
|
28
|
+
super(file_set)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Get conversion command; command varies on whether or not we have
|
32
|
+
# JP2 source, and whether we have color or grayscale material.
|
33
|
+
def convert_cmd
|
34
|
+
source_path = @source_path
|
35
|
+
source_path += '[0]' if @source_path.ends_with?('pdf')
|
36
|
+
template = use_color? ? COLOR_CMD : GRAY_CMD
|
37
|
+
template = MONO_CMD if one_bit?
|
38
|
+
cmd = format(template, source_file: source_path, out_file: @dest_path)
|
39
|
+
# normalization of command based on source
|
40
|
+
@source_path.ends_with?('jp2') ? GM_PREFIX + cmd : cmd
|
41
|
+
end
|
42
|
+
|
43
|
+
def create_derivatives(filename)
|
44
|
+
# Base class takes care of loading @source_path, @dest_path
|
45
|
+
super(filename)
|
46
|
+
|
47
|
+
# no creation if pdf master
|
48
|
+
return if mime_type == 'image/tiff'
|
49
|
+
|
50
|
+
# Get and run imagemagick or graphicsmagick command
|
51
|
+
`#{convert_cmd}`
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
# validates start and end date are properly formatted and end date comes after
|
3
|
+
# or on the same date as the start date.
|
4
|
+
class PublicationDateStartEndValidator < ActiveModel::Validator
|
5
|
+
DATE_RANGE_REGEX = /\A\d{4}(-((0[1-9])|(1[0-2])))?(-(([0-2][1-9])|3[0-1]))?\z/
|
6
|
+
|
7
|
+
def validate(record)
|
8
|
+
start_date = record.publication_date_start
|
9
|
+
end_date = record.publication_date_end
|
10
|
+
valid_dates?(start_date, end_date, record) && start_before_end?(start_date, end_date, record)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def publication_date_valid?(pub_date)
|
16
|
+
return false unless DATE_RANGE_REGEX.match(pub_date)
|
17
|
+
date_split = pub_date.split("-").map(&:to_i)
|
18
|
+
return false if date_split.length == 3 &&
|
19
|
+
!Date.valid_date?(date_split[0], date_split[1], date_split[2])
|
20
|
+
true
|
21
|
+
end
|
22
|
+
|
23
|
+
def start_before_end?(start_date, end_date, record)
|
24
|
+
return true unless start_date && end_date
|
25
|
+
date_error = "Publication start date must be earlier or the same as end date."
|
26
|
+
pub_start = start_date.split("-")
|
27
|
+
pub_end = end_date.split("-")
|
28
|
+
(0..2).each do |i|
|
29
|
+
if pub_start[i] && pub_end[i] && pub_end[i] < pub_start[i]
|
30
|
+
record.errors[:publication_date_start] << date_error
|
31
|
+
break
|
32
|
+
end
|
33
|
+
end
|
34
|
+
record.errors[:publication_date_start].blank?
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid_dates?(start_date, end_date, record)
|
38
|
+
date_error = "Incorrect Date. Date input should be formatted yyyy[-mm][-dd] and be a valid date."
|
39
|
+
if start_date
|
40
|
+
record.errors[:publication_date_start] << date_error unless publication_date_valid?(start_date)
|
41
|
+
end
|
42
|
+
if end_date
|
43
|
+
record.errors[:publication_date_end] << date_error unless publication_date_valid?(end_date)
|
44
|
+
end
|
45
|
+
record.errors[:publication_date_start].blank? && record.errors[:publication_date_end].blank?
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module NewspaperWorks
|
2
|
+
# validates that a properly formatted date has been entered
|
3
|
+
class PublicationDateValidator < ActiveModel::Validator
|
4
|
+
DATE_REGEX = /\A\d{4}-((0[1-9])|(1[0-2]))-((0[1-9])|([1-2][0-9])|(3[0-1]))\z/
|
5
|
+
def validate(record)
|
6
|
+
error_msg = "Incorrect Date. Date input should be formatted yyyy-mm-dd and be a valid date."
|
7
|
+
return unless record.publication_date.present?
|
8
|
+
unless DATE_REGEX.match(record.publication_date)
|
9
|
+
record.errors[:publication_date] << error_msg
|
10
|
+
return
|
11
|
+
end
|
12
|
+
date_split = record.publication_date.split("-").map(&:to_i)
|
13
|
+
record.errors[:publication_date] << error_msg unless Date.valid_date?(date_split[0], date_split[1], date_split[2])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<div class="document col-xs-6 col-md-3">
|
2
|
+
<div class="thumbnail" data-fileset="<%= document.file_set_ids&.first %>" data-query="<%= highlight_matches(document, 'all_text_tsimv', 'em') || search_query(current_search_session.query_params) %>">
|
3
|
+
<%= render_newspaper_thumbnail_tag(document,
|
4
|
+
current_search_session.query_params) %>
|
5
|
+
<div class="caption">
|
6
|
+
<%= render_document_partials document, blacklight_config.view_config(:gallery).partials, :document_counter => document_counter %>
|
7
|
+
</div>
|
8
|
+
</div>
|
9
|
+
</div>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<div class="document col-xs-6 col-md-3">
|
2
|
+
<div class="thumbnail" data-fileset="<%= document.file_set_ids&.first %>" data-query="<%= highlight_matches(document, 'all_text_tsimv', 'em') || search_query(current_search_session.query_params) %>">
|
3
|
+
<%= render_newspaper_thumbnail_tag(document,
|
4
|
+
current_search_session.query_params) %>
|
5
|
+
<div class="caption">
|
6
|
+
<%= render_document_partials document, blacklight_config.view_config(:gallery).partials, :document_counter => document_counter %>
|
7
|
+
</div>
|
8
|
+
</div>
|
9
|
+
</div>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
<%# based on blacklight/app/views/catalog/_index_header_default.html.erb %>
|
2
|
+
<%# header bar for doc items in index view -%>
|
3
|
+
<div class="documentHeader row">
|
4
|
+
<%# main title container for doc partial view
|
5
|
+
How many bootstrap columns need to be reserved
|
6
|
+
for bookmarks control depends on size.
|
7
|
+
-%>
|
8
|
+
<% document_actions = capture do %>
|
9
|
+
<% # bookmark functions for items/docs -%>
|
10
|
+
<%= render_index_doc_actions document, wrapping_class: "index-document-functions col-sm-3 col-lg-2" %>
|
11
|
+
<% end %>
|
12
|
+
<h3 class="index_title document-title-heading <%= document_actions.present? ? "col-sm-9 col-lg-10" : "col-md-12" %>">
|
13
|
+
<% if counter = document_counter_with_offset(document_counter) %>
|
14
|
+
<span class="document-counter">
|
15
|
+
<%= t('blacklight.search.documents.counter', counter: counter) %>
|
16
|
+
</span>
|
17
|
+
<% end %>
|
18
|
+
<%= link_to document.title_or_label,
|
19
|
+
hyrax_newspaper_article_path(document.id,
|
20
|
+
anchor: iiif_search_anchor(current_search_session.query_params)) %>
|
21
|
+
</h3>
|
22
|
+
<%= document_actions %>
|
23
|
+
</div>
|