newspaper_works 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
|
|
3
|
+
module NewspaperWorks
|
|
4
|
+
class JP2DerivativeService < NewspaperPageDerivativeService
|
|
5
|
+
# OpenJPEG 2000 Command to make NDNP-compliant grayscale JP2:
|
|
6
|
+
CMD_GRAY = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
|
|
7
|
+
'-d 0,0 -b 64,64 -n 6 -p RLCP -t 1024,1024 -I -M 1 ' \
|
|
8
|
+
'-r 64,53.821,45.249,40,32,26.911,22.630,20,16,14.286,' \
|
|
9
|
+
'11.364,10,8,6.667,5.556,4.762,4,3.333,2.857,2.500,2,' \
|
|
10
|
+
'1.667,1.429,1.190,1'.freeze
|
|
11
|
+
|
|
12
|
+
# OpenJPEG 2000 Command to make RGB JP2:
|
|
13
|
+
CMD_COLOR = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
|
|
14
|
+
'-d 0,0 -b 64,64 -n 6 -p RPCL -t 1024,1024 -I -M 1 '\
|
|
15
|
+
'-r 2.4,1.48331273,.91673033,.56657224,.35016049,.21641118,' \
|
|
16
|
+
'.13374944,.0944,.08266171'.freeze
|
|
17
|
+
|
|
18
|
+
# OpenJPEG 1.x command replacement for 2.x opj_compress, takes same options;
|
|
19
|
+
# this is necessary on Ubuntu Trusty (e.g. Travis CI)
|
|
20
|
+
CMD_1X = 'image_to_j2k'.freeze
|
|
21
|
+
|
|
22
|
+
# Target file extension of this service plugin:
|
|
23
|
+
TARGET_EXT = 'jp2'.freeze
|
|
24
|
+
|
|
25
|
+
attr_accessor :source_meta
|
|
26
|
+
attr_reader :file_set
|
|
27
|
+
delegate :uri, :mime_type, to: :file_set
|
|
28
|
+
|
|
29
|
+
def initialize(file_set)
|
|
30
|
+
# cached result string for imagemagick `identify` command
|
|
31
|
+
@source_meta = nil
|
|
32
|
+
@command = nil
|
|
33
|
+
@unlink_after_creation = []
|
|
34
|
+
super(file_set)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def create_derivatives(filename)
|
|
38
|
+
# Base class takes care of loading @source_path, @dest_path
|
|
39
|
+
super(filename)
|
|
40
|
+
|
|
41
|
+
# no creation if jp2 master => deemed unnecessary/duplicative
|
|
42
|
+
return if mime_type == 'image/jp2'
|
|
43
|
+
|
|
44
|
+
# if we have a non-TIFF source, or a 1-bit monochrome source, we need
|
|
45
|
+
# to make a NetPBM-based intermediate (temporary) file for OpenJPEG
|
|
46
|
+
# to consume.
|
|
47
|
+
needs_intermediate = !tiff_source? || one_bit?
|
|
48
|
+
|
|
49
|
+
# We use either intermediate temp file, or temp symlink (to work
|
|
50
|
+
# around OpenJPEG 2000 file naming quirk).
|
|
51
|
+
needs_intermediate ? make_intermediate_source : make_symlink
|
|
52
|
+
|
|
53
|
+
# Get OpenJPEG command, rendered with source, destination, appropriate
|
|
54
|
+
# to either color or grayscale source
|
|
55
|
+
render_cmd = opj_command
|
|
56
|
+
|
|
57
|
+
# Run the generated command to make derivative file at @dest_path
|
|
58
|
+
`#{render_cmd}`
|
|
59
|
+
|
|
60
|
+
# Clean up any intermediate files or symlinks used during creation
|
|
61
|
+
cleanup_intermediate
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
# source introspection:
|
|
67
|
+
|
|
68
|
+
def tiff_source?
|
|
69
|
+
identify.include?('TIFF')
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def make_symlink
|
|
73
|
+
# OpenJPEG binaries have annoying quirk of only using TIFF input
|
|
74
|
+
# files whose name ends in .TIF or .tif (three letter); for all
|
|
75
|
+
# non-monochrome TIFF files, we just assume we need to symlink
|
|
76
|
+
# to such a filename.
|
|
77
|
+
tmpname = File.join(Dir.tmpdir, "#{SecureRandom.uuid}.tif")
|
|
78
|
+
FileUtils.ln_s(@source_path, tmpname)
|
|
79
|
+
@unlink_after_creation.push(tmpname)
|
|
80
|
+
# finally, point @source_path for command at intermediate link:
|
|
81
|
+
@source_path = tmpname
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def make_intermediate_source
|
|
85
|
+
# generate a random filename to be made, with appropriate extension,
|
|
86
|
+
# inside /tmp dir:
|
|
87
|
+
tmpname = File.join(
|
|
88
|
+
Dir.tmpdir,
|
|
89
|
+
format(
|
|
90
|
+
"#{SecureRandom.uuid}.%<ext>s",
|
|
91
|
+
ext: use_color? ? 'ppm' : 'pgm'
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
# if pdf source, get only first page
|
|
95
|
+
source_path = @source_path
|
|
96
|
+
source_path += '[0]' if @source_path.ends_with?('pdf')
|
|
97
|
+
# Use ImageMagick `convert` to create intermediate bitmap:
|
|
98
|
+
`convert #{source_path} #{tmpname}`
|
|
99
|
+
@unlink_after_creation.push(tmpname)
|
|
100
|
+
# finally, point @source_path for command at intermediate file:
|
|
101
|
+
@source_path = tmpname
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def opj_command
|
|
105
|
+
# Get a command template appropriate to OpenJPEG 1.x or 2.x
|
|
106
|
+
use_openjpeg_1x = `which opj_compress`.empty?
|
|
107
|
+
cmd = use_color? ? CMD_COLOR : CMD_GRAY
|
|
108
|
+
cmd = cmd.sub('opj_compress', 'image_to_j2k') if use_openjpeg_1x
|
|
109
|
+
# return command with source and destination file names injected
|
|
110
|
+
format(cmd, source_file: @source_path, out_file: @dest_path)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def cleanup_intermediate
|
|
114
|
+
# remove symlink or intermediate file once we no longer need
|
|
115
|
+
@unlink_after_creation.each do |path|
|
|
116
|
+
FileUtils.rm(path)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
# Base type for derivative services specific to NewspaperPage only
|
|
3
|
+
class NewspaperPageDerivativeService
|
|
4
|
+
attr_reader :file_set, :master_format
|
|
5
|
+
delegate :uri, :mime_type, to: :file_set
|
|
6
|
+
|
|
7
|
+
TARGET_EXT = nil
|
|
8
|
+
|
|
9
|
+
def self.target_ext
|
|
10
|
+
self::TARGET_EXT
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(file_set)
|
|
14
|
+
@file_set = file_set
|
|
15
|
+
@dest_path = nil
|
|
16
|
+
@source_path = nil
|
|
17
|
+
@source_meta = nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def valid?
|
|
21
|
+
parent = file_set.in_works[0]
|
|
22
|
+
# fallback to Fedora-stored relationships if work's aggregation of
|
|
23
|
+
# file set is not indexed in Solr
|
|
24
|
+
parent = file_set.member_of.select(&:work?)[0] if parent.nil?
|
|
25
|
+
parent.class == NewspaperPage
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def derivative_path_factory
|
|
29
|
+
Hyrax::DerivativePath
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# prepare full path for passed extension/destination name, return path
|
|
33
|
+
def prepare_path(extension)
|
|
34
|
+
dest_path = derivative_path_factory.derivative_path_for_reference(
|
|
35
|
+
@file_set,
|
|
36
|
+
extension
|
|
37
|
+
)
|
|
38
|
+
dir = File.join(dest_path.split('/')[0..-2])
|
|
39
|
+
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
|
40
|
+
dest_path
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# calculate and ensure directory components for singular @dest_path
|
|
44
|
+
# should only be used by subclasses producing a single derivative
|
|
45
|
+
def load_destpath
|
|
46
|
+
@dest_path = prepare_path(self.class.target_ext)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def identify
|
|
50
|
+
if @source_meta.nil?
|
|
51
|
+
path = @source_path
|
|
52
|
+
cmd = "identify #{path}"
|
|
53
|
+
# fallback to graphicsmagick if source is jp2, as Ubuntu 16.10
|
|
54
|
+
# ImageMagick has no jp2 support.
|
|
55
|
+
cmd = 'gm ' + cmd if path.ends_with?('jp2')
|
|
56
|
+
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
|
57
|
+
@source_meta = stdout.read
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
@source_meta
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def use_color?
|
|
64
|
+
# imagemagick `identify` output describes color space:
|
|
65
|
+
!(identify.include?('Gray') || one_bit?)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# is source one-bit monochrome?
|
|
69
|
+
def one_bit?
|
|
70
|
+
identify.include?('1-bit')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def create_derivatives(filename)
|
|
74
|
+
# presuming that filename is full path to source file
|
|
75
|
+
@source_path = filename
|
|
76
|
+
|
|
77
|
+
# Get destination path from Hyrax for file extension defined in
|
|
78
|
+
# TARGET_EXT constant on respective derivative service subclass.
|
|
79
|
+
load_destpath
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def cleanup_derivatives(*args)
|
|
83
|
+
target_ext = args && args[0] ? args[0] : self.class.target_ext
|
|
84
|
+
derivative_path_factory.derivatives_for_reference(file_set).each do |path|
|
|
85
|
+
FileUtils.rm_f(path) if path.ends_with?(target_ext)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# def cleanup_derivatives; end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
|
|
3
|
+
module NewspaperWorks
|
|
4
|
+
class PDFDerivativeService < NewspaperPageDerivativeService
|
|
5
|
+
TARGET_EXT = 'pdf'.freeze
|
|
6
|
+
|
|
7
|
+
# PDF (JPEG, 8 bit grayscale), 150ppi
|
|
8
|
+
GRAY_PDF_CMD = 'convert %<source_file>s ' \
|
|
9
|
+
'-resize 1800 -density 150 ' \
|
|
10
|
+
'-depth 8 -colorspace Gray ' \
|
|
11
|
+
'-compress jpeg %<out_file>s'.freeze
|
|
12
|
+
|
|
13
|
+
# sRBG color PDF (JPEG, 8 bits per channel), 150ppi
|
|
14
|
+
COLOR_PDF_CMD = 'convert %<source_file>s ' \
|
|
15
|
+
'-resize 1800 -density 150 ' \
|
|
16
|
+
'-depth 8 ' \
|
|
17
|
+
'-compress jpeg %<out_file>s'.freeze
|
|
18
|
+
|
|
19
|
+
# graphicsmagick prefix, may be needed for jp2 source on Ubuntu
|
|
20
|
+
GM_PREFX = 'gm '.freeze
|
|
21
|
+
|
|
22
|
+
def initialize(file_set)
|
|
23
|
+
super(file_set)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Get conversion command; command varies on whether or not we have
|
|
27
|
+
# JP2 source, and whether we have color or grayscale material.
|
|
28
|
+
def convert_cmd
|
|
29
|
+
template = use_color? ? COLOR_PDF_CMD : GRAY_PDF_CMD
|
|
30
|
+
cmd = format(template, source_file: @source_path, out_file: @dest_path)
|
|
31
|
+
@source_path.ends_with?('jp2') ? GM_PREFIX + cmd : cmd
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def create_derivatives(filename)
|
|
35
|
+
# Base class takes care of loading @source_path, @dest_path
|
|
36
|
+
super(filename)
|
|
37
|
+
|
|
38
|
+
# no creation if pdf master
|
|
39
|
+
return if mime_type == 'application/pdf'
|
|
40
|
+
|
|
41
|
+
# Get and run imagemagick or graphicsmagick command
|
|
42
|
+
`#{convert_cmd}`
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# General derivative service for NewspaperWorks, which is meant to wrap
|
|
2
|
+
# and replace the stock Hyrax::FileSetDerivativeService with a proxy
|
|
3
|
+
# that runs one or more derivative service "plugin" components.
|
|
4
|
+
#
|
|
5
|
+
# Note: Hyrax::DerivativeService consumes this, instead of (directly)
|
|
6
|
+
# consuming Hyrax::FileSetDerivativeService.
|
|
7
|
+
#
|
|
8
|
+
# Unlike the "run the first valid plugin" arrangement that the
|
|
9
|
+
# Hyrax::DerivativeService uses to run an actual derivative creation
|
|
10
|
+
# service component, this component is:
|
|
11
|
+
#
|
|
12
|
+
# (a) Consumed by Hyrax::DerivativeService as that first valid plugin;
|
|
13
|
+
#
|
|
14
|
+
# (b) Wraps and runs 0..* plugins, not just the first.
|
|
15
|
+
#
|
|
16
|
+
# This should be registered to take precedence over default by:
|
|
17
|
+
# Hyrax::DerivativeService.services.unshift(
|
|
18
|
+
# NewspaperWorks::PluggableDerivativeService
|
|
19
|
+
# )
|
|
20
|
+
#
|
|
21
|
+
# Modify NewspaperWorks::PluggableDerivativeService.plugins
|
|
22
|
+
# to add, remove, or reorder plugin (derivative service) classes.
|
|
23
|
+
#
|
|
24
|
+
class NewspaperWorks::PluggableDerivativeService
|
|
25
|
+
attr_reader :file_set
|
|
26
|
+
delegate :uri, :mime_type, to: :file_set
|
|
27
|
+
|
|
28
|
+
# default plugin Hyrax OOTB, makes thumbnails and sometimes extracts text:
|
|
29
|
+
default_plugin = Hyrax::FileSetDerivativesService
|
|
30
|
+
|
|
31
|
+
# make and expose an array of plugins
|
|
32
|
+
@plugins = [default_plugin]
|
|
33
|
+
@allowed_methods = [:cleanup_derivatives, :create_derivatives]
|
|
34
|
+
class << self
|
|
35
|
+
attr_accessor :plugins, :allowed_methods
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def plugins
|
|
39
|
+
self.class.plugins
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def initialize(file_set)
|
|
43
|
+
@file_set = file_set
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def valid?
|
|
47
|
+
# this wrapper/proxy/composite is always valid, but it may compose
|
|
48
|
+
# multiple plugins, some of which may or may not be valid, so
|
|
49
|
+
# validity checks happen within as well.
|
|
50
|
+
true
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def respond_to_missing?(method_name)
|
|
54
|
+
self.class.allowed_methods.include?(method_name) || super
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# get derivative services relevant to method name and file_set context
|
|
58
|
+
# -- omits plugins if particular destination exists or will soon.
|
|
59
|
+
def services(method_name)
|
|
60
|
+
result = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
|
|
61
|
+
result.select do |plugin|
|
|
62
|
+
dest = nil
|
|
63
|
+
dest = plugin.class.target_ext if plugin.class.respond_to?(:target_ext)
|
|
64
|
+
!skip_destination?(method_name, dest)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def method_missing(name, *args, **opts, &block)
|
|
69
|
+
if respond_to_missing?(name)
|
|
70
|
+
# we have an allowed method, construct services and include all valid
|
|
71
|
+
# services for the file_set
|
|
72
|
+
# services = plugins.map { |plugin| plugin.new(file_set) }.select(&:valid?)
|
|
73
|
+
# run all valid services, in order:
|
|
74
|
+
services(name).each do |plugin|
|
|
75
|
+
plugin.send(name, *args)
|
|
76
|
+
end
|
|
77
|
+
else
|
|
78
|
+
super
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def skip_destination?(method_name, destination_name)
|
|
85
|
+
return false if file_set.id.nil? || destination_name.nil?
|
|
86
|
+
return false unless method_name == :create_derivatives
|
|
87
|
+
# skip :create_derivatives if existing --> do not re-create
|
|
88
|
+
existing_derivative?(destination_name) ||
|
|
89
|
+
impending_derivative?(destination_name)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def existing_derivative?(name)
|
|
93
|
+
path = derivative_path_factory.derivative_path_for_reference(
|
|
94
|
+
file_set,
|
|
95
|
+
name
|
|
96
|
+
)
|
|
97
|
+
File.exist?(path)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# is there an impending attachment from ingest logged to db?
|
|
101
|
+
# -- avoids stomping over pre-made derivative
|
|
102
|
+
# for which an attachment is still in-progress.
|
|
103
|
+
def impending_derivative?(name)
|
|
104
|
+
result = NewspaperWorks::DerivativeAttachment.find_by(
|
|
105
|
+
fileset_id: file_set.id,
|
|
106
|
+
destination_name: name
|
|
107
|
+
)
|
|
108
|
+
!result.nil?
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def derivative_path_factory
|
|
112
|
+
Hyrax::DerivativePath
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
class TextExtractionDerivativeService < NewspaperPageDerivativeService
|
|
3
|
+
def initialize(file_set)
|
|
4
|
+
super(file_set)
|
|
5
|
+
@alto_path = nil
|
|
6
|
+
@txt_path = nil
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def create_derivatives(src)
|
|
10
|
+
from_alto = NewspaperWorks::TextFormatsFromALTOService.new(
|
|
11
|
+
file_set
|
|
12
|
+
)
|
|
13
|
+
return from_alto.create_derivatives(src) unless from_alto.alto_path.nil?
|
|
14
|
+
create_derivatives_from_ocr(src)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def create_derivatives_from_ocr(filename)
|
|
18
|
+
@source_path = filename
|
|
19
|
+
# prepare destination directory for ALTO (as .xml files):
|
|
20
|
+
@alto_path = prepare_path('xml')
|
|
21
|
+
# prepare destination directory for plain text (as .txt files):
|
|
22
|
+
@txt_path = prepare_path('txt')
|
|
23
|
+
# prepare destination directory for flat JSON (as .json files):
|
|
24
|
+
@json_path = prepare_path('json')
|
|
25
|
+
ocr = NewspaperWorks::TextExtraction::PageOCR.new(filename)
|
|
26
|
+
# OCR will run once, on first method call to either .alto or .plain:
|
|
27
|
+
write_plain_text(ocr.plain)
|
|
28
|
+
write_alto(ocr.alto)
|
|
29
|
+
write_json(ocr.word_json)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def write_alto(xml)
|
|
33
|
+
File.open(@alto_path, 'w') do |outfile|
|
|
34
|
+
outfile.write(xml)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def write_plain_text(text)
|
|
39
|
+
File.open(@txt_path, 'w') do |outfile|
|
|
40
|
+
outfile.write(text)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def write_json(text)
|
|
45
|
+
File.open(@json_path, 'w') do |outfile|
|
|
46
|
+
outfile.write(text)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def cleanup_derivatives
|
|
51
|
+
super('txt')
|
|
52
|
+
super('xml')
|
|
53
|
+
super('json')
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
# Plugin to make text format derviatives (JSON, plain-text) from ALTO,
|
|
3
|
+
# either existing derivative, or an impending attachment.
|
|
4
|
+
# NOTE: to keep this from conflicting with TextExtractionDerivativeService,
|
|
5
|
+
# this class should be invoked by it, not PluggableDerivativeService.
|
|
6
|
+
class TextFormatsFromALTOService < NewspaperPageDerivativeService
|
|
7
|
+
TARGET_EXT = 'tiff'.freeze
|
|
8
|
+
|
|
9
|
+
def save_derivative(destination, data)
|
|
10
|
+
# Load/prepare base of "pairtree" dir structure for extension, fileset
|
|
11
|
+
prepare_path(destination)
|
|
12
|
+
#
|
|
13
|
+
save_path = derivative_path_factory.derivative_path_for_reference(
|
|
14
|
+
@file_set,
|
|
15
|
+
destination
|
|
16
|
+
)
|
|
17
|
+
# Write data as UTF-8 encoded text
|
|
18
|
+
File.open(save_path, "w:UTF-8") do |f|
|
|
19
|
+
f.write(data)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def nonempty_file?(path)
|
|
24
|
+
return false if path.nil?
|
|
25
|
+
return false unless File.exist?(path)
|
|
26
|
+
!File.size(path).zero?
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# if there was no derivative yet, there might be one in-transit from
|
|
30
|
+
# an ingest, so check for that, and use its source if applicable:
|
|
31
|
+
def incoming_alto_path
|
|
32
|
+
path = NewspaperWorks::DerivativeAttachment.where(
|
|
33
|
+
fileset_id: @file_set.id,
|
|
34
|
+
destination_name: 'xml'
|
|
35
|
+
).pluck(:path).uniq.first
|
|
36
|
+
path if nonempty_file?(path)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def alto_path
|
|
40
|
+
# check first for existing, non-empty derivative data:
|
|
41
|
+
path = derivative_path_factory.derivative_path_for_reference(
|
|
42
|
+
@file_set,
|
|
43
|
+
'xml'
|
|
44
|
+
)
|
|
45
|
+
return path if nonempty_file?(path)
|
|
46
|
+
incoming_alto_path
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def alto
|
|
50
|
+
path = alto_path
|
|
51
|
+
File.read(path, encoding: 'UTF-8') unless path.nil?
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def create_derivatives(_filename)
|
|
55
|
+
# as this plugin makes derivatives of derivative, _filename is ignored
|
|
56
|
+
source_file = alto
|
|
57
|
+
return if source_file.nil?
|
|
58
|
+
# Image width from characterized primary file helps ensure proper scaling:
|
|
59
|
+
file = @file_set.original_file
|
|
60
|
+
width = file.nil? ? nil : file.width[0].to_i
|
|
61
|
+
height = file.nil? ? nil : file.height[0].to_i
|
|
62
|
+
# ALTOReader is responsible for transcoding, this class just saves result
|
|
63
|
+
reader = NewspaperWorks::TextExtraction::AltoReader.new(
|
|
64
|
+
source_file,
|
|
65
|
+
width,
|
|
66
|
+
height
|
|
67
|
+
)
|
|
68
|
+
save_derivative('json', reader.json)
|
|
69
|
+
save_derivative('txt', reader.text)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def cleanup_derivatives(*args)
|
|
73
|
+
# do nothing here; NewspaperWorks::TextExtractionDerivativeService
|
|
74
|
+
# has this job instead for cleaning ALTO, JSON, TXT.
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
|
|
3
|
+
module NewspaperWorks
|
|
4
|
+
class TIFFDerivativeService < NewspaperPageDerivativeService
|
|
5
|
+
TARGET_EXT = 'tiff'.freeze
|
|
6
|
+
|
|
7
|
+
# For imagemagick commands, the output type is determined by the
|
|
8
|
+
# output file's extension.
|
|
9
|
+
# TIFF (LZW, 8 bit grayscale)
|
|
10
|
+
GRAY_CMD = 'convert %<source_file>s ' \
|
|
11
|
+
'-depth 8 -colorspace Gray ' \
|
|
12
|
+
'-compress lzw %<out_file>s'.freeze
|
|
13
|
+
|
|
14
|
+
# Monochrome one-bit black/white TIFF, Group 4 compressed:
|
|
15
|
+
MONO_CMD = 'convert %<source_file>s ' \
|
|
16
|
+
'-depth 1 -monochrome -compress Group4 -type bilevel ' \
|
|
17
|
+
'%<out_file>s'.freeze
|
|
18
|
+
|
|
19
|
+
# sRBG color TIFF (8 bits per channel, lzw)
|
|
20
|
+
COLOR_CMD = 'convert %<source_file>s ' \
|
|
21
|
+
'-depth 24 ' \
|
|
22
|
+
'-compress lzw %<out_file>s'.freeze
|
|
23
|
+
|
|
24
|
+
# graphicsmagick prefix, may be needed for jp2 source on Ubuntu
|
|
25
|
+
GM_PREFX = 'gm '.freeze
|
|
26
|
+
|
|
27
|
+
def initialize(file_set)
|
|
28
|
+
super(file_set)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get conversion command; command varies on whether or not we have
|
|
32
|
+
# JP2 source, and whether we have color or grayscale material.
|
|
33
|
+
def convert_cmd
|
|
34
|
+
source_path = @source_path
|
|
35
|
+
source_path += '[0]' if @source_path.ends_with?('pdf')
|
|
36
|
+
template = use_color? ? COLOR_CMD : GRAY_CMD
|
|
37
|
+
template = MONO_CMD if one_bit?
|
|
38
|
+
cmd = format(template, source_file: source_path, out_file: @dest_path)
|
|
39
|
+
# normalization of command based on source
|
|
40
|
+
@source_path.ends_with?('jp2') ? GM_PREFIX + cmd : cmd
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def create_derivatives(filename)
|
|
44
|
+
# Base class takes care of loading @source_path, @dest_path
|
|
45
|
+
super(filename)
|
|
46
|
+
|
|
47
|
+
# no creation if pdf master
|
|
48
|
+
return if mime_type == 'image/tiff'
|
|
49
|
+
|
|
50
|
+
# Get and run imagemagick or graphicsmagick command
|
|
51
|
+
`#{convert_cmd}`
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
# validates start and end date are properly formatted and end date comes after
|
|
3
|
+
# or on the same date as the start date.
|
|
4
|
+
class PublicationDateStartEndValidator < ActiveModel::Validator
|
|
5
|
+
DATE_RANGE_REGEX = /\A\d{4}(-((0[1-9])|(1[0-2])))?(-(([0-2][1-9])|3[0-1]))?\z/
|
|
6
|
+
|
|
7
|
+
def validate(record)
|
|
8
|
+
start_date = record.publication_date_start
|
|
9
|
+
end_date = record.publication_date_end
|
|
10
|
+
valid_dates?(start_date, end_date, record) && start_before_end?(start_date, end_date, record)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def publication_date_valid?(pub_date)
|
|
16
|
+
return false unless DATE_RANGE_REGEX.match(pub_date)
|
|
17
|
+
date_split = pub_date.split("-").map(&:to_i)
|
|
18
|
+
return false if date_split.length == 3 &&
|
|
19
|
+
!Date.valid_date?(date_split[0], date_split[1], date_split[2])
|
|
20
|
+
true
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def start_before_end?(start_date, end_date, record)
|
|
24
|
+
return true unless start_date && end_date
|
|
25
|
+
date_error = "Publication start date must be earlier or the same as end date."
|
|
26
|
+
pub_start = start_date.split("-")
|
|
27
|
+
pub_end = end_date.split("-")
|
|
28
|
+
(0..2).each do |i|
|
|
29
|
+
if pub_start[i] && pub_end[i] && pub_end[i] < pub_start[i]
|
|
30
|
+
record.errors[:publication_date_start] << date_error
|
|
31
|
+
break
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
record.errors[:publication_date_start].blank?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def valid_dates?(start_date, end_date, record)
|
|
38
|
+
date_error = "Incorrect Date. Date input should be formatted yyyy[-mm][-dd] and be a valid date."
|
|
39
|
+
if start_date
|
|
40
|
+
record.errors[:publication_date_start] << date_error unless publication_date_valid?(start_date)
|
|
41
|
+
end
|
|
42
|
+
if end_date
|
|
43
|
+
record.errors[:publication_date_end] << date_error unless publication_date_valid?(end_date)
|
|
44
|
+
end
|
|
45
|
+
record.errors[:publication_date_start].blank? && record.errors[:publication_date_end].blank?
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
# validates that a properly formatted date has been entered
|
|
3
|
+
class PublicationDateValidator < ActiveModel::Validator
|
|
4
|
+
DATE_REGEX = /\A\d{4}-((0[1-9])|(1[0-2]))-((0[1-9])|([1-2][0-9])|(3[0-1]))\z/
|
|
5
|
+
def validate(record)
|
|
6
|
+
error_msg = "Incorrect Date. Date input should be formatted yyyy-mm-dd and be a valid date."
|
|
7
|
+
return unless record.publication_date.present?
|
|
8
|
+
unless DATE_REGEX.match(record.publication_date)
|
|
9
|
+
record.errors[:publication_date] << error_msg
|
|
10
|
+
return
|
|
11
|
+
end
|
|
12
|
+
date_split = record.publication_date.split("-").map(&:to_i)
|
|
13
|
+
record.errors[:publication_date] << error_msg unless Date.valid_date?(date_split[0], date_split[1], date_split[2])
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
<div class="document col-xs-6 col-md-3">
|
|
2
|
+
<div class="thumbnail" data-fileset="<%= document.file_set_ids&.first %>" data-query="<%= highlight_matches(document, 'all_text_tsimv', 'em') || search_query(current_search_session.query_params) %>">
|
|
3
|
+
<%= render_newspaper_thumbnail_tag(document,
|
|
4
|
+
current_search_session.query_params) %>
|
|
5
|
+
<div class="caption">
|
|
6
|
+
<%= render_document_partials document, blacklight_config.view_config(:gallery).partials, :document_counter => document_counter %>
|
|
7
|
+
</div>
|
|
8
|
+
</div>
|
|
9
|
+
</div>
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
<div class="document col-xs-6 col-md-3">
|
|
2
|
+
<div class="thumbnail" data-fileset="<%= document.file_set_ids&.first %>" data-query="<%= highlight_matches(document, 'all_text_tsimv', 'em') || search_query(current_search_session.query_params) %>">
|
|
3
|
+
<%= render_newspaper_thumbnail_tag(document,
|
|
4
|
+
current_search_session.query_params) %>
|
|
5
|
+
<div class="caption">
|
|
6
|
+
<%= render_document_partials document, blacklight_config.view_config(:gallery).partials, :document_counter => document_counter %>
|
|
7
|
+
</div>
|
|
8
|
+
</div>
|
|
9
|
+
</div>
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
<%# based on blacklight/app/views/catalog/_index_header_default.html.erb %>
|
|
2
|
+
<%# header bar for doc items in index view -%>
|
|
3
|
+
<div class="documentHeader row">
|
|
4
|
+
<%# main title container for doc partial view
|
|
5
|
+
How many bootstrap columns need to be reserved
|
|
6
|
+
for bookmarks control depends on size.
|
|
7
|
+
-%>
|
|
8
|
+
<% document_actions = capture do %>
|
|
9
|
+
<% # bookmark functions for items/docs -%>
|
|
10
|
+
<%= render_index_doc_actions document, wrapping_class: "index-document-functions col-sm-3 col-lg-2" %>
|
|
11
|
+
<% end %>
|
|
12
|
+
<h3 class="index_title document-title-heading <%= document_actions.present? ? "col-sm-9 col-lg-10" : "col-md-12" %>">
|
|
13
|
+
<% if counter = document_counter_with_offset(document_counter) %>
|
|
14
|
+
<span class="document-counter">
|
|
15
|
+
<%= t('blacklight.search.documents.counter', counter: counter) %>
|
|
16
|
+
</span>
|
|
17
|
+
<% end %>
|
|
18
|
+
<%= link_to document.title_or_label,
|
|
19
|
+
hyrax_newspaper_article_path(document.id,
|
|
20
|
+
anchor: iiif_search_anchor(current_search_session.query_params)) %>
|
|
21
|
+
</h3>
|
|
22
|
+
<%= document_actions %>
|
|
23
|
+
</div>
|