newspaper_works 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fcrepo_wrapper +4 -0
- data/.gitignore +43 -0
- data/.rubocop.yml +143 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +50 -0
- data/Gemfile +47 -0
- data/LICENSE +203 -0
- data/README.md +159 -0
- data/Rakefile +38 -0
- data/app/actors/hyrax/actors/newspaper_article_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_container_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_issue_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_page_actor.rb +8 -0
- data/app/actors/hyrax/actors/newspaper_title_actor.rb +8 -0
- data/app/actors/newspaper_works/actors/newspaper_works_upload_actor.rb +88 -0
- data/app/assets/config/newspaper_works_manifest.js +2 -0
- data/app/assets/images/newspaper_works/.keep +0 -0
- data/app/assets/javascripts/newspaper_works/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/newspaper_works/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/newspaper_works/thumbnail_highlights.js.erb +102 -0
- data/app/assets/javascripts/newspaper_works.js +4 -0
- data/app/assets/stylesheets/newspaper_works/_issue_search.scss +13 -0
- data/app/assets/stylesheets/newspaper_works/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/newspaper_works/_newspaper_works.scss +4 -0
- data/app/assets/stylesheets/newspaper_works/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/newspaper_works/_search_results.scss +12 -0
- data/app/controllers/hyrax/newspaper_articles_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_containers_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_issues_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_pages_controller.rb +14 -0
- data/app/controllers/hyrax/newspaper_titles_controller.rb +13 -0
- data/app/controllers/newspaper_works/newspapers_controller.rb +117 -0
- data/app/controllers/newspaper_works/newspapers_search_controller.rb +26 -0
- data/app/forms/hyrax/newspaper_article_form.rb +11 -0
- data/app/forms/hyrax/newspaper_container_form.rb +11 -0
- data/app/forms/hyrax/newspaper_issue_form.rb +11 -0
- data/app/forms/hyrax/newspaper_page_form.rb +15 -0
- data/app/forms/hyrax/newspaper_title_form.rb +12 -0
- data/app/forms/newspaper_works/newspaper_core_form_data.rb +17 -0
- data/app/helpers/hyrax/newspaper_articles_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_containers_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_issues_helper.rb +5 -0
- data/app/helpers/hyrax/newspaper_pages_helper.rb +5 -0
- data/app/helpers/newspaper_works/application_helper.rb +5 -0
- data/app/helpers/newspaper_works/breadcrumb_helper.rb +92 -0
- data/app/helpers/newspaper_works/newspaper_works_helper_behavior.rb +103 -0
- data/app/helpers/newspaper_works/newspapers_helper.rb +5 -0
- data/app/indexers/concerns/newspaper_works/indexes_full_text.rb +17 -0
- data/app/indexers/concerns/newspaper_works/indexes_place_of_publication.rb +67 -0
- data/app/indexers/concerns/newspaper_works/indexes_publication_date_range.rb +35 -0
- data/app/indexers/concerns/newspaper_works/indexes_relationships.rb +125 -0
- data/app/indexers/newspaper_article_indexer.rb +16 -0
- data/app/indexers/newspaper_container_indexer.rb +18 -0
- data/app/indexers/newspaper_issue_indexer.rb +26 -0
- data/app/indexers/newspaper_page_indexer.rb +9 -0
- data/app/indexers/newspaper_title_indexer.rb +19 -0
- data/app/indexers/newspaper_works/newspaper_core_indexer.rb +21 -0
- data/app/jobs/newspaper_works/application_job.rb +4 -0
- data/app/jobs/newspaper_works/compose_issue_pdf_job.rb +13 -0
- data/app/jobs/newspaper_works/create_issue_pages_job.rb +19 -0
- data/app/mailers/newspaper_works/application_mailer.rb +8 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior.rb +82 -0
- data/app/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior.rb +27 -0
- data/app/models/concerns/newspaper_works/newspaper_core_metadata.rb +67 -0
- data/app/models/concerns/newspaper_works/place_of_publication_behavior.rb +15 -0
- data/app/models/concerns/newspaper_works/scanned_media_metadata.rb +43 -0
- data/app/models/concerns/newspaper_works/solr/document.rb +25 -0
- data/app/models/file_set.rb +10 -0
- data/app/models/newspaper_article.rb +158 -0
- data/app/models/newspaper_container.rb +86 -0
- data/app/models/newspaper_issue.rb +115 -0
- data/app/models/newspaper_page.rb +70 -0
- data/app/models/newspaper_title.rb +111 -0
- data/app/models/newspaper_works/application_record.rb +6 -0
- data/app/models/newspaper_works/derivative_attachment.rb +8 -0
- data/app/models/newspaper_works/ingest_file_relation.rb +14 -0
- data/app/presenters/hyrax/newspaper_article_presenter.rb +38 -0
- data/app/presenters/hyrax/newspaper_container_presenter.rb +11 -0
- data/app/presenters/hyrax/newspaper_issue_presenter.rb +62 -0
- data/app/presenters/hyrax/newspaper_page_presenter.rb +72 -0
- data/app/presenters/hyrax/newspaper_title_presenter.rb +86 -0
- data/app/presenters/newspaper_works/iiif_manifest_presenter_behavior.rb +29 -0
- data/app/presenters/newspaper_works/issue_info_presenter.rb +29 -0
- data/app/presenters/newspaper_works/newspaper_core_presenter.rb +9 -0
- data/app/presenters/newspaper_works/persistent_url_presenter_behavior.rb +16 -0
- data/app/presenters/newspaper_works/place_of_publication_presenter_behavior.rb +8 -0
- data/app/presenters/newspaper_works/scanned_media_presenter.rb +7 -0
- data/app/presenters/newspaper_works/title_info_presenter.rb +13 -0
- data/app/search_builders/concerns/newspaper_works/exclude_models.rb +16 -0
- data/app/search_builders/concerns/newspaper_works/highlight_search_params.rb +14 -0
- data/app/search_builders/newspaper_works/newspapers_search_builder.rb +26 -0
- data/app/services/hyrax/article_genre_service.rb +9 -0
- data/app/services/newspaper_works/jp2_derivative_service.rb +120 -0
- data/app/services/newspaper_works/newspaper_page_derivative_service.rb +91 -0
- data/app/services/newspaper_works/pdf_derivative_service.rb +45 -0
- data/app/services/newspaper_works/pluggable_derivative_service.rb +114 -0
- data/app/services/newspaper_works/text_extraction_derivative_service.rb +56 -0
- data/app/services/newspaper_works/text_formats_from_alto_service.rb +77 -0
- data/app/services/newspaper_works/tiff_derivative_service.rb +54 -0
- data/app/validators/newspaper_works/publication_date_start_end_validator.rb +48 -0
- data/app/validators/newspaper_works/publication_date_validator.rb +16 -0
- data/app/views/catalog/_index_gallery_newspaper_article_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb +9 -0
- data/app/views/catalog/_index_header_gallery_newspaper_article.html.erb +23 -0
- data/app/views/catalog/_index_header_gallery_newspaper_page.html.erb +23 -0
- data/app/views/catalog/_index_header_list_newspaper_article.html.erb +7 -0
- data/app/views/catalog/_index_header_list_newspaper_page.html.erb +7 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/catalog/_thumbnail_list_newspaper_article.html.erb +6 -0
- data/app/views/catalog/_thumbnail_list_newspaper_page.html.erb +6 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/app/views/hyrax/newspaper_articles/_newspaper_article.html.erb +2 -0
- data/app/views/hyrax/newspaper_articles/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_containers/_newspaper_container.html.erb +2 -0
- data/app/views/hyrax/newspaper_containers/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_issues/_newspaper_issue.html.erb +2 -0
- data/app/views/hyrax/newspaper_issues/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_pages/_newspaper_page.html.erb +2 -0
- data/app/views/hyrax/newspaper_pages/show.html.erb +1 -0
- data/app/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb +5 -0
- data/app/views/hyrax/newspaper_titles/_issue_search_form.html.erb +33 -0
- data/app/views/hyrax/newspaper_titles/_issues_calendar.html.erb +63 -0
- data/app/views/hyrax/newspaper_titles/_newspaper_title.html.erb +2 -0
- data/app/views/hyrax/newspaper_titles/show.html.erb +54 -0
- data/app/views/newspaper_works/base/_attribute_rows.html.erb +42 -0
- data/app/views/newspaper_works/base/_attributes.html.erb +16 -0
- data/app/views/newspaper_works/base/_metadata.html.erb +6 -0
- data/app/views/newspaper_works/base/_newspaper_hierarchy.html.erb +14 -0
- data/app/views/newspaper_works/base/_persistent_url.html.erb +1 -0
- data/app/views/newspaper_works/base/_show.html.erb +45 -0
- data/app/views/newspaper_works/newspapers_search/_date_fields.html.erb +29 -0
- data/app/views/newspaper_works/newspapers_search/_facet_layout.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/_facet_limit.html.erb +17 -0
- data/app/views/newspaper_works/newspapers_search/_front_pages_input.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_keyword_input.html.erb +18 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_facets.html.erb +5 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_form.html.erb +13 -0
- data/app/views/newspaper_works/newspapers_search/_newspapers_search_help.html.erb +8 -0
- data/app/views/newspaper_works/newspapers_search/search.html.erb +13 -0
- data/app/views/records/edit_fields/_alternate_title.html.erb +4 -0
- data/app/views/records/edit_fields/_genre.html.erb +4 -0
- data/app/views/records/edit_fields/_place_of_publication.html.erb +14 -0
- data/app/views/records/edit_fields/_subtitle.html.erb +4 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/newspaper_article.de.yml +12 -0
- data/config/locales/newspaper_article.en.yml +12 -0
- data/config/locales/newspaper_article.es.yml +12 -0
- data/config/locales/newspaper_article.fr.yml +12 -0
- data/config/locales/newspaper_article.it.yml +12 -0
- data/config/locales/newspaper_article.pt-BR.yml +12 -0
- data/config/locales/newspaper_article.zh.yml +12 -0
- data/config/locales/newspaper_container.de.yml +8 -0
- data/config/locales/newspaper_container.en.yml +8 -0
- data/config/locales/newspaper_container.es.yml +8 -0
- data/config/locales/newspaper_container.fr.yml +8 -0
- data/config/locales/newspaper_container.it.yml +8 -0
- data/config/locales/newspaper_container.pt-BR.yml +8 -0
- data/config/locales/newspaper_container.zh.yml +8 -0
- data/config/locales/newspaper_issue.de.yml +8 -0
- data/config/locales/newspaper_issue.en.yml +8 -0
- data/config/locales/newspaper_issue.es.yml +8 -0
- data/config/locales/newspaper_issue.fr.yml +8 -0
- data/config/locales/newspaper_issue.it.yml +8 -0
- data/config/locales/newspaper_issue.pt-BR.yml +8 -0
- data/config/locales/newspaper_issue.zh.yml +8 -0
- data/config/locales/newspaper_page.de.yml +15 -0
- data/config/locales/newspaper_page.en.yml +15 -0
- data/config/locales/newspaper_page.es.yml +15 -0
- data/config/locales/newspaper_page.fr.yml +15 -0
- data/config/locales/newspaper_page.it.yml +15 -0
- data/config/locales/newspaper_page.pt-BR.yml +15 -0
- data/config/locales/newspaper_page.zh.yml +15 -0
- data/config/locales/newspaper_title.de.yml +8 -0
- data/config/locales/newspaper_title.en.yml +8 -0
- data/config/locales/newspaper_title.es.yml +8 -0
- data/config/locales/newspaper_title.fr.yml +8 -0
- data/config/locales/newspaper_title.it.yml +8 -0
- data/config/locales/newspaper_title.pt-BR.yml +8 -0
- data/config/locales/newspaper_title.zh.yml +8 -0
- data/config/locales/newspaper_works.de.yml +50 -0
- data/config/locales/newspaper_works.en.yml +52 -0
- data/config/locales/newspaper_works.es.yml +52 -0
- data/config/locales/newspaper_works.fr.yml +52 -0
- data/config/locales/newspaper_works.it.yml +52 -0
- data/config/locales/newspaper_works.pt-BR.yml +52 -0
- data/config/locales/newspaper_works.zh.yml +52 -0
- data/config/routes.rb +9 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_newspaper_works_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_newspaper_works_ingest_file_relations.rb +11 -0
- data/lib/generators/newspaper_works/assets_generator.rb +29 -0
- data/lib/generators/newspaper_works/blacklight_advanced_search_generator.rb +44 -0
- data/lib/generators/newspaper_works/blacklight_iiif_search_generator.rb +41 -0
- data/lib/generators/newspaper_works/catalog_controller_generator.rb +60 -0
- data/lib/generators/newspaper_works/install_generator.rb +97 -0
- data/lib/generators/newspaper_works/templates/annotation_behavior.rb +6 -0
- data/lib/generators/newspaper_works/templates/config/authorities/newspaper_article_genres.yml +86 -0
- data/lib/generators/newspaper_works/templates/config/initializers/newspaper_works.rb +12 -0
- data/lib/generators/newspaper_works/templates/config/initializers/patch_blacklight_advanced_search.rb +74 -0
- data/lib/generators/newspaper_works/templates/custom_search_builder.rb +23 -0
- data/lib/generators/newspaper_works/templates/newspaper_works.scss +1 -0
- data/lib/generators/newspaper_works/templates/newspaper_works_helper.rb +3 -0
- data/lib/generators/newspaper_works/templates/search_behavior.rb +6 -0
- data/lib/newspaper_works/configuration.rb +14 -0
- data/lib/newspaper_works/data/fileset_helper.rb +25 -0
- data/lib/newspaper_works/data/path_helper.rb +40 -0
- data/lib/newspaper_works/data/work_derivatives.rb +314 -0
- data/lib/newspaper_works/data/work_file.rb +92 -0
- data/lib/newspaper_works/data/work_files.rb +181 -0
- data/lib/newspaper_works/data.rb +35 -0
- data/lib/newspaper_works/engine.rb +42 -0
- data/lib/newspaper_works/errors.rb +14 -0
- data/lib/newspaper_works/ingest/base_ingest.rb +69 -0
- data/lib/newspaper_works/ingest/base_publication_info.rb +35 -0
- data/lib/newspaper_works/ingest/batch_ingest_helper.rb +44 -0
- data/lib/newspaper_works/ingest/batch_issue_ingester.rb +129 -0
- data/lib/newspaper_works/ingest/chronam_publication_info.rb +133 -0
- data/lib/newspaper_works/ingest/from_command.rb +52 -0
- data/lib/newspaper_works/ingest/image_ingest_issues.rb +43 -0
- data/lib/newspaper_works/ingest/issue_images.rb +51 -0
- data/lib/newspaper_works/ingest/lc_publication_info.rb +144 -0
- data/lib/newspaper_works/ingest/named_issue_metadata.rb +60 -0
- data/lib/newspaper_works/ingest/ndnp/batch_ingester.rb +64 -0
- data/lib/newspaper_works/ingest/ndnp/batch_xml_ingest.rb +72 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingest.rb +99 -0
- data/lib/newspaper_works/ingest/ndnp/container_ingester.rb +84 -0
- data/lib/newspaper_works/ingest/ndnp/container_metadata.rb +87 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingest.rb +81 -0
- data/lib/newspaper_works/ingest/ndnp/issue_ingester.rb +101 -0
- data/lib/newspaper_works/ingest/ndnp/issue_metadata.rb +96 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_asset_helper.rb +20 -0
- data/lib/newspaper_works/ingest/ndnp/ndnp_mets_helper.rb +70 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingest.rb +47 -0
- data/lib/newspaper_works/ingest/ndnp/page_ingester.rb +157 -0
- data/lib/newspaper_works/ingest/ndnp/page_metadata.rb +112 -0
- data/lib/newspaper_works/ingest/ndnp.rb +21 -0
- data/lib/newspaper_works/ingest/newspaper_issue_ingest.rb +56 -0
- data/lib/newspaper_works/ingest/newspaper_page_ingest.rb +6 -0
- data/lib/newspaper_works/ingest/page_image.rb +52 -0
- data/lib/newspaper_works/ingest/path_enumeration.rb +52 -0
- data/lib/newspaper_works/ingest/pdf_images.rb +85 -0
- data/lib/newspaper_works/ingest/pdf_issue.rb +20 -0
- data/lib/newspaper_works/ingest/pdf_issues.rb +39 -0
- data/lib/newspaper_works/ingest/pdf_pages.rb +114 -0
- data/lib/newspaper_works/ingest/pub_finder.rb +89 -0
- data/lib/newspaper_works/ingest/publication_info.rb +44 -0
- data/lib/newspaper_works/ingest.rb +90 -0
- data/lib/newspaper_works/issue_pdf_composer.rb +111 -0
- data/lib/newspaper_works/logging.rb +54 -0
- data/lib/newspaper_works/page_finder.rb +62 -0
- data/lib/newspaper_works/resource_fetcher.rb +78 -0
- data/lib/newspaper_works/text_extraction/alto_reader.rb +122 -0
- data/lib/newspaper_works/text_extraction/page_ocr.rb +100 -0
- data/lib/newspaper_works/text_extraction/render_alto.rb +84 -0
- data/lib/newspaper_works/text_extraction/word_coords_builder.rb +30 -0
- data/lib/newspaper_works/text_extraction.rb +10 -0
- data/lib/newspaper_works/version.rb +3 -0
- data/lib/newspaper_works.rb +19 -0
- data/lib/tasks/newspaper_works_tasks.rake +39 -0
- data/newspaper_works.gemspec +49 -0
- data/spec/.keep.txt +1 -0
- data/spec/actors/newspaper_works/actors/newspaper_works_upload_actor_spec.rb +69 -0
- data/spec/controllers/catalog_controller_spec.rb +63 -0
- data/spec/controllers/newspaper_works/newspapers_controller_spec.rb +114 -0
- data/spec/controllers/newspaper_works/newspapers_search_controller_spec.rb +21 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_issue_ingest.rb +6 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_ingest.rb +6 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/features/front_pages_for_title_spec.rb +19 -0
- data/spec/features/newspaper_title_search_spec.rb +30 -0
- data/spec/features/newspapers_search_spec.rb +49 -0
- data/spec/features/search_results_thumbnail_highlights_spec.rb +33 -0
- data/spec/features_shared.rb +71 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/http404-expected +0 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn84038814.rdf +1028 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn93059126.rdf +36 -0
- data/spec/fixtures/files/resource_mocks/chronam/sn94051019.rdf +37 -0
- data/spec/fixtures/files/resource_mocks/geonames/Chicopee +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Denver +1104 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/Marysville2 +279 -0
- data/spec/fixtures/files/resource_mocks/geonames/SLC +1104 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn2099999999 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn82014496 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83020109 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83021453 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn83045396 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn84038814 +2 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn93059126 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn94051019 +1 -0
- data/spec/fixtures/files/resource_mocks/lccn/sn99999999 +1 -0
- data/spec/fixtures/files/resource_mocks/urls.json +82 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/forms/hyrax/newspaper_article_form_spec.rb +33 -0
- data/spec/forms/hyrax/newspaper_container_form_spec.rb +30 -0
- data/spec/forms/hyrax/newspaper_issue_form_spec.rb +31 -0
- data/spec/forms/hyrax/newspaper_page_form_spec.rb +28 -0
- data/spec/forms/hyrax/newspaper_title_form_spec.rb +31 -0
- data/spec/forms/newspaper_works/newspaper_core_form_data_spec.rb +12 -0
- data/spec/helpers/newspaper_works/breadcrumb_helper_spec.rb +82 -0
- data/spec/helpers/newspaper_works_helper_spec.rb +57 -0
- data/spec/indexers/concerns/newspaper_works/indexes_full_text_spec.rb +31 -0
- data/spec/indexers/concerns/newspaper_works/indexes_place_of_publication_spec.rb +53 -0
- data/spec/indexers/concerns/newspaper_works/indexes_publication_date_range_spec.rb +39 -0
- data/spec/indexers/concerns/newspaper_works/indexes_relationships_spec.rb +86 -0
- data/spec/indexers/newspaper_article_indexer_spec.rb +29 -0
- data/spec/indexers/newspaper_issue_indexer_spec.rb +19 -0
- data/spec/indexers/newspaper_title_indexer_spec.rb +22 -0
- data/spec/indexers/newspaper_works/newspaper_core_indexer_spec.rb +23 -0
- data/spec/lib/newspaper_works/configuration_spec.rb +18 -0
- data/spec/lib/newspaper_works/data/work_derivatives_spec.rb +245 -0
- data/spec/lib/newspaper_works/data/work_file_spec.rb +99 -0
- data/spec/lib/newspaper_works/data/work_files_spec.rb +224 -0
- data/spec/lib/newspaper_works/ingest/batch_issue_ingester_spec.rb +158 -0
- data/spec/lib/newspaper_works/ingest/chronam_publication_info_spec.rb +35 -0
- data/spec/lib/newspaper_works/ingest/from_command_spec.rb +75 -0
- data/spec/lib/newspaper_works/ingest/image_ingest_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/ingest_shared.rb +75 -0
- data/spec/lib/newspaper_works/ingest/issue_images_spec.rb +65 -0
- data/spec/lib/newspaper_works/ingest/lc_publication_info_spec.rb +34 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_ingester_spec.rb +131 -0
- data/spec/lib/newspaper_works/ingest/ndnp/batch_xml_ingest_spec.rb +64 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingest_spec.rb +44 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_ingester_spec.rb +126 -0
- data/spec/lib/newspaper_works/ingest/ndnp/container_metadata_spec.rb +36 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingest_spec.rb +108 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_ingester_spec.rb +155 -0
- data/spec/lib/newspaper_works/ingest/ndnp/issue_metadata_spec.rb +84 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingest_spec.rb +79 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_ingester_spec.rb +184 -0
- data/spec/lib/newspaper_works/ingest/ndnp/page_metadata_spec.rb +85 -0
- data/spec/lib/newspaper_works/ingest/newspaper_issue_ingest_spec.rb +83 -0
- data/spec/lib/newspaper_works/ingest/newspaper_page_ingest_spec.rb +77 -0
- data/spec/lib/newspaper_works/ingest/page_image_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_images_spec.rb +32 -0
- data/spec/lib/newspaper_works/ingest/pdf_issue_spec.rb +29 -0
- data/spec/lib/newspaper_works/ingest/pdf_issues_spec.rb +62 -0
- data/spec/lib/newspaper_works/ingest/pdf_pages_spec.rb +110 -0
- data/spec/lib/newspaper_works/ingest/pub_finder_spec.rb +58 -0
- data/spec/lib/newspaper_works/ingest/publication_info_spec.rb +61 -0
- data/spec/lib/newspaper_works/ingest_spec.rb +45 -0
- data/spec/lib/newspaper_works/issue_pdf_composer_spec.rb +101 -0
- data/spec/lib/newspaper_works/logging_spec.rb +53 -0
- data/spec/lib/newspaper_works/page_finder_spec.rb +53 -0
- data/spec/lib/newspaper_works/resource_fetcher_spec.rb +65 -0
- data/spec/lib/newspaper_works/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/lib/newspaper_works/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/lib/newspaper_works/text_extraction/render_alto_spec.rb +54 -0
- data/spec/lib/newspaper_works/text_extraction/word_coords_builder_spec.rb +30 -0
- data/spec/lib/tasks/newspaper_works_rake_spec.rb +124 -0
- data/spec/misc_shared.rb +109 -0
- data/spec/model_shared.rb +134 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/annotation_behavior_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/blacklight_iiif_search/search_behavior_spec.rb +27 -0
- data/spec/models/concerns/newspaper_works/newspaper_core_metadata_spec.rb +45 -0
- data/spec/models/concerns/newspaper_works/place_of_publication_behavior_spec.rb +17 -0
- data/spec/models/concerns/newspaper_works/scanned_media_metadata_spec.rb +35 -0
- data/spec/models/newspaper_article_spec.rb +73 -0
- data/spec/models/newspaper_container_spec.rb +111 -0
- data/spec/models/newspaper_issue_spec.rb +91 -0
- data/spec/models/newspaper_page_spec.rb +44 -0
- data/spec/models/newspaper_title_spec.rb +116 -0
- data/spec/models/newspaper_works/derivative_attachment_spec.rb +37 -0
- data/spec/models/newspaper_works/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/ndnp_shared.rb +48 -0
- data/spec/presenters/hyrax/newspaper_article_presenter_spec.rb +53 -0
- data/spec/presenters/hyrax/newspaper_container_presenter_spec.rb +20 -0
- data/spec/presenters/hyrax/newspaper_issue_presenter_spec.rb +65 -0
- data/spec/presenters/hyrax/newspaper_page_presenter_spec.rb +75 -0
- data/spec/presenters/hyrax/newspaper_title_presenter_spec.rb +153 -0
- data/spec/presenters/newspaper_works/iiif_manifest_presenter_behavior_spec.rb +32 -0
- data/spec/presenters/newspaper_works/issue_info_presenter_spec.rb +51 -0
- data/spec/presenters/newspaper_works/newspaper_core_presenter_spec.rb +22 -0
- data/spec/presenters/newspaper_works/persistent_url_presenter_behavior_spec.rb +24 -0
- data/spec/presenters/newspaper_works/place_of_publication_presenter_behavior_spec.rb +17 -0
- data/spec/presenters/newspaper_works/scanned_media_presenter_spec.rb +18 -0
- data/spec/presenters/newspaper_works/title_info_presenter_spec.rb +23 -0
- data/spec/routing/route_spec.rb +52 -0
- data/spec/search_builders/custom_search_builder_spec.rb +34 -0
- data/spec/search_builders/newspaper_works/newspapers_search_builder_spec.rb +33 -0
- data/spec/services/hyrax/article_genre_service_spec.rb +12 -0
- data/spec/services/hyrax/resource_types_service_spec.rb +12 -0
- data/spec/services/newspaper_works/jp2_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/newspaper_page_derivative_service_spec.rb +125 -0
- data/spec/services/newspaper_works/pdf_derivative_service_spec.rb +62 -0
- data/spec/services/newspaper_works/pluggable_derivative_service_spec.rb +204 -0
- data/spec/services/newspaper_works/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/newspaper_works/text_formats_from_alto_service_spec.rb +129 -0
- data/spec/services/newspaper_works/tiff_derivative_service_spec.rb +58 -0
- data/spec/spec_helper.rb +261 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +22 -0
- data/spec/views/catalog/_index_gallery_newspaper_page_wrapper.html.erb_spec.rb +36 -0
- data/spec/views/catalog/_index_header_list_newspaper_page.html.erb_spec.rb +26 -0
- data/spec/views/catalog/_thumbnail_list_newspaper_page.html.erb_spec.rb +35 -0
- data/spec/views/hyrax/newspaper_titles/_all_front_pages_form.html.erb_spec.rb +16 -0
- data/spec/views/hyrax/newspaper_titles/_issue_search_form.html.erb_spec.rb +33 -0
- data/spec/views/hyrax/newspaper_titles/_issues_calendar.html.erb_spec.rb +37 -0
- data/spec/views/hyrax/newspaper_titles/show.html.erb_spec.rb +87 -0
- data/spec/views/newspaper_works/base/_attribute_rows.html.erb_spec.rb +60 -0
- data/spec/views/newspaper_works/base/_newspaper_hierarchy.html.erb_spec.rb +80 -0
- data/spec/views/newspaper_works/base/_show.html.erb_spec.rb +78 -0
- data/spec/views/newspaper_works/newspapers_search/search.html.erb_spec.rb +54 -0
- data/spec/views/records/edit_fields/_place_of_publication.html.erb_spec.rb +26 -0
- data/tasks/newspaperworks_dev.rake +26 -0
- data/test/integration/navigation_test.rb +7 -0
- data/test/lib/generators/newspaper_works/install_generator_test.rb +16 -0
- data/test/newspaper_works_test.rb +7 -0
- data/test/test_helper.rb +17 -0
- data/tmp/.keep +0 -0
- metadata +1037 -0
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
require 'hyrax'
|
|
2
|
+
|
|
3
|
+
module NewspaperWorks
|
|
4
|
+
module Data
|
|
5
|
+
# TODO: consider compositional refactoring (not mixins), but this
|
|
6
|
+
# may make readability/comprehendability higher, and yield
|
|
7
|
+
# higher applied/practical complexity.
|
|
8
|
+
class WorkDerivatives
|
|
9
|
+
include NewspaperWorks::Data::FilesetHelper
|
|
10
|
+
include NewspaperWorks::Data::PathHelper
|
|
11
|
+
|
|
12
|
+
# Work is primary adapted context
|
|
13
|
+
# @return [ActiveFedora::Base] Hyrax work-type object
|
|
14
|
+
attr_accessor :work
|
|
15
|
+
|
|
16
|
+
# FileSet is secondary adapted context
|
|
17
|
+
# @return [FileSet] fileset for work, with regard to these derivatives
|
|
18
|
+
attr_accessor :fileset
|
|
19
|
+
|
|
20
|
+
# Parent pointer to WorkFile object representing fileset
|
|
21
|
+
# @return [NewspaperWorks::Data::WorkFile] WorkFile for fileset, work pair
|
|
22
|
+
attr_accessor :parent
|
|
23
|
+
|
|
24
|
+
# Assigned attachment queue (of paths)
|
|
25
|
+
# @return [Array<String>] list of paths queued for attachment
|
|
26
|
+
attr_accessor :assigned
|
|
27
|
+
|
|
28
|
+
# Assigned deletion queue (of destination names)
|
|
29
|
+
# @return [Array<String>] list of destination names queued for deletion
|
|
30
|
+
attr_accessor :unassigned
|
|
31
|
+
|
|
32
|
+
# mapping of special names Hyrax uses for derivatives, not extension:
|
|
33
|
+
@remap_names = {
|
|
34
|
+
'jpeg' => 'thumbnail'
|
|
35
|
+
}
|
|
36
|
+
class << self
|
|
37
|
+
attr_accessor :remap_names
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# alternate constructor spelling:
|
|
41
|
+
def self.of(work, fileset = nil, parent = nil)
|
|
42
|
+
new(work, fileset, parent)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Adapt work and either specific or first fileset
|
|
46
|
+
def initialize(work, fileset = nil, parent = nil)
|
|
47
|
+
# adapted context usually work, may be string id of FileSet
|
|
48
|
+
@work = work
|
|
49
|
+
@fileset = fileset.nil? ? first_fileset : fileset
|
|
50
|
+
# computed name-to-path mapping, initially nil as sentinel for JIT load
|
|
51
|
+
@paths = nil
|
|
52
|
+
# assignments for attachment
|
|
53
|
+
@assigned = []
|
|
54
|
+
# un-assignments for deletion
|
|
55
|
+
@unassigned = []
|
|
56
|
+
# parent is NewspaperWorks::Data::WorkFile object for derivatives
|
|
57
|
+
@parent = parent
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Assignment state
|
|
61
|
+
# @return [String] A label describing the state of assignment queues
|
|
62
|
+
def state
|
|
63
|
+
load_paths
|
|
64
|
+
return 'dirty' unless @unassigned.empty? && @assigned.empty?
|
|
65
|
+
return 'empty' if @paths.keys.empty?
|
|
66
|
+
'saved'
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Assign a path to assigned queue for attachment
|
|
70
|
+
# @param path [String] Path to source file
|
|
71
|
+
def assign(path)
|
|
72
|
+
path = normalize_path(path)
|
|
73
|
+
validate_path(path)
|
|
74
|
+
@assigned.push(path)
|
|
75
|
+
# We are keeping assignment both in ephemeral, transient @assigned
|
|
76
|
+
# and mirroring to db to share context with other components:
|
|
77
|
+
log_assignment(path, path_destination_name(path))
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Assign a destination name to unassigned queue for deletion -- OR --
|
|
81
|
+
# remove a path from queue of assigned items
|
|
82
|
+
# @param name [String] Destination name (file extension), or source path
|
|
83
|
+
def unassign(name)
|
|
84
|
+
# if name is queued path, remove from @assigned queue:
|
|
85
|
+
if @assigned.include?(name)
|
|
86
|
+
@assigned.delete(name)
|
|
87
|
+
unlog_assignment(name, path_destination_name(name))
|
|
88
|
+
end
|
|
89
|
+
# if name is known destination name, remove
|
|
90
|
+
@unassigned.push(name) if exist?(name)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# commit pending changes to work files
|
|
94
|
+
# beginning with removals, then with new assignments
|
|
95
|
+
def commit!
|
|
96
|
+
@unassigned.each { |name| delete(name) }
|
|
97
|
+
@assigned.each do |path|
|
|
98
|
+
attach(path, path_destination_name(path))
|
|
99
|
+
end
|
|
100
|
+
# reset queues after work is complete
|
|
101
|
+
@assigned = []
|
|
102
|
+
@unassigned = []
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Given a fileset meeting both of the following conditions:
|
|
106
|
+
# 1. a non-nil import_url value;
|
|
107
|
+
# 2. is attached to a work (persisted in Fedora, if not yet in Solr)...
|
|
108
|
+
# ...this method gets associated derivative paths queued and attach all.
|
|
109
|
+
# @param file_set [FileSet] saved file set, attached to work,
|
|
110
|
+
# with identifier, and a non-nil import_url
|
|
111
|
+
def commit_queued!(file_set)
|
|
112
|
+
raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil?
|
|
113
|
+
import_path = file_url_to_path(file_set.import_url)
|
|
114
|
+
work = file_set.member_of.select(&:work?)[0]
|
|
115
|
+
raise ArgumentError, 'Work not found for fileset' if work.nil?
|
|
116
|
+
derivatives = WorkDerivatives.of(work, file_set)
|
|
117
|
+
IngestFileRelation.derivatives_for_file(import_path).each do |path|
|
|
118
|
+
next unless File.exist?(path)
|
|
119
|
+
attachment_record = DerivativeAttachment.where(path: path).first
|
|
120
|
+
derivatives.attach(path, attachment_record.destination_name)
|
|
121
|
+
# update previously nil fileset id
|
|
122
|
+
attachment_record.fileset_id = file_set.id
|
|
123
|
+
attachment_record.save!
|
|
124
|
+
end
|
|
125
|
+
@fileset ||= file_set
|
|
126
|
+
load_paths
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# attach a single derivative file to work
|
|
130
|
+
# @param file [String, IO] path to file or IO object
|
|
131
|
+
# @param name [String] destination name, usually file extension
|
|
132
|
+
def attach(file, name)
|
|
133
|
+
raise 'Cannot save for nil fileset' if fileset.nil?
|
|
134
|
+
mkdir_pairtree
|
|
135
|
+
path = path_factory.derivative_path_for_reference(fileset, name)
|
|
136
|
+
# if file argument is path, copy file
|
|
137
|
+
if file.class == String
|
|
138
|
+
FileUtils.copy(file, path)
|
|
139
|
+
else
|
|
140
|
+
# otherwise, presume file is an IO, read, write it
|
|
141
|
+
# note: does not close input file/IO, presume that is caller's
|
|
142
|
+
# responsibility.
|
|
143
|
+
orig_pos = file.tell
|
|
144
|
+
file.seek(0)
|
|
145
|
+
File.open(path, 'w') { |dstfile| dstfile.write(file.read) }
|
|
146
|
+
file.seek(orig_pos)
|
|
147
|
+
end
|
|
148
|
+
# finally, reload @paths after mutation
|
|
149
|
+
load_paths
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Delete a derivative file from work, by destination name
|
|
153
|
+
# @param name [String] destination name, usually file extension
|
|
154
|
+
def delete(name, force: nil)
|
|
155
|
+
raise 'Cannot save for nil fileset' if fileset.nil?
|
|
156
|
+
path = path_factory.derivative_path_for_reference(fileset, name)
|
|
157
|
+
# will remove file, if it exists; won't remove pairtree, even
|
|
158
|
+
# if it becomes empty, as that is excess scope.
|
|
159
|
+
FileUtils.rm(path, force: force) if File.exist?(path)
|
|
160
|
+
# finally, reload @paths after mutation
|
|
161
|
+
load_paths
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Load all paths/names to @paths once, upon first access
|
|
165
|
+
def load_paths
|
|
166
|
+
fsid = fileset_id
|
|
167
|
+
if fsid.nil?
|
|
168
|
+
@paths = {}
|
|
169
|
+
return
|
|
170
|
+
end
|
|
171
|
+
# list of paths
|
|
172
|
+
paths = path_factory.derivatives_for_reference(fsid)
|
|
173
|
+
# names from paths
|
|
174
|
+
@paths = paths.map { |e| [path_destination_name(e), e] }.to_h
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# path to existing derivative file for destination name
|
|
178
|
+
# @param name [String] destination name, usually file extension
|
|
179
|
+
# @return [String, NilClass] path (or nil)
|
|
180
|
+
def path(name)
|
|
181
|
+
load_paths if @paths.nil?
|
|
182
|
+
result = @paths[name]
|
|
183
|
+
return if result.nil?
|
|
184
|
+
File.exist?(result) ? result : nil
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Run a block in context of the opened derivative file for reading
|
|
188
|
+
# @param name [String] destination name, usually file extension
|
|
189
|
+
# @param block [Proc] block/proc to run in context of file IO
|
|
190
|
+
def with_io(name, &block)
|
|
191
|
+
mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb'
|
|
192
|
+
filepath = path(name)
|
|
193
|
+
return if filepath.nil?
|
|
194
|
+
File.open(filepath, mode, &block)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Get number of derivatives or, if a destination name argument
|
|
198
|
+
# is provided, the size of derivative file
|
|
199
|
+
# @param name [String] optional destination name, usually file extension
|
|
200
|
+
# @return [Integer] size in bytes
|
|
201
|
+
def size(name = nil)
|
|
202
|
+
load_paths if @paths.nil?
|
|
203
|
+
return @paths.size if name.nil?
|
|
204
|
+
File.size(@paths[name])
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Check if derivative file exists for destination name
|
|
208
|
+
# @param name [String] optional destination name, usually file extension
|
|
209
|
+
# @return [TrueClass, FalseClass] boolean
|
|
210
|
+
def exist?(name)
|
|
211
|
+
keys.include?(name) && File.exist?(self[name])
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Get raw binary or encoded text data of file as a String
|
|
215
|
+
# @param name [String] destination name, usually file extension
|
|
216
|
+
# @return [String] Raw bytes, or if text file, a UTF-8 encoded String
|
|
217
|
+
def data(name)
|
|
218
|
+
result = ''
|
|
219
|
+
with_io(name) do |io|
|
|
220
|
+
result += io.read
|
|
221
|
+
end
|
|
222
|
+
result
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
private
|
|
226
|
+
|
|
227
|
+
def primary_file_path
|
|
228
|
+
if fileset.nil?
|
|
229
|
+
# if there is a nil fileset, we look for *intent* in the form
|
|
230
|
+
# of the first assigned file path for single-file work.
|
|
231
|
+
work_file = parent
|
|
232
|
+
return if work_file.nil?
|
|
233
|
+
work_files = work_file.parent
|
|
234
|
+
return if work_files.nil?
|
|
235
|
+
work_files.assigned[0]
|
|
236
|
+
else
|
|
237
|
+
file_url_to_path(fileset.import_url) unless fileset.import_url.nil?
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def file_url_to_path(url)
|
|
242
|
+
url.gsub('file://', '')
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def log_primary_file_relation(path)
|
|
246
|
+
file_path = primary_file_path
|
|
247
|
+
return if file_path.nil?
|
|
248
|
+
NewspaperWorks::IngestFileRelation.create!(
|
|
249
|
+
file_path: file_path,
|
|
250
|
+
derivative_path: path
|
|
251
|
+
)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def log_assignment(path, name)
|
|
255
|
+
NewspaperWorks::DerivativeAttachment.create!(
|
|
256
|
+
fileset_id: fileset_id,
|
|
257
|
+
path: path,
|
|
258
|
+
destination_name: name
|
|
259
|
+
)
|
|
260
|
+
log_primary_file_relation(path)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def unlog_assignment(path, name)
|
|
264
|
+
if fileset_id.nil?
|
|
265
|
+
NewspaperWorks::DerivativeAttachment.where(
|
|
266
|
+
path: path,
|
|
267
|
+
destination_name: name
|
|
268
|
+
).destroy_all
|
|
269
|
+
else
|
|
270
|
+
NewspaperWorks::DerivativeAttachment.where(
|
|
271
|
+
fileset_id: fileset_id,
|
|
272
|
+
path: path,
|
|
273
|
+
destination_name: name
|
|
274
|
+
).destroy_all
|
|
275
|
+
end
|
|
276
|
+
# note: there is deliberately no attempt to "unlog" primary
|
|
277
|
+
# file relation, as leaving it should have no side-effect.
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def path_destination_name(path)
|
|
281
|
+
ext = path.split('.')[-1]
|
|
282
|
+
self.class.remap_names[ext] || ext
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def respond_to_missing?(symbol, include_priv = false)
|
|
286
|
+
{}.respond_to?(symbol, include_priv)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def method_missing(method, *args, &block)
|
|
290
|
+
# if we proxy mapping/hash enumertion methods,
|
|
291
|
+
# make sure @paths loaded, then proxy to it.
|
|
292
|
+
if respond_to_missing?(method)
|
|
293
|
+
load_paths if @paths.nil?
|
|
294
|
+
return @paths.send(method, *args, &block)
|
|
295
|
+
end
|
|
296
|
+
super
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def path_factory
|
|
300
|
+
Hyrax::DerivativePath
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# make shared path for derivatives to live, given
|
|
304
|
+
def mkdir_pairtree
|
|
305
|
+
# Hyrax::DerivativePath has no public method to directly get the
|
|
306
|
+
# bare pairtree path for derivatives for a fileset, but we
|
|
307
|
+
# can infer it...
|
|
308
|
+
path = path_factory.derivative_path_for_reference(fileset, '')
|
|
309
|
+
dir = File.join(path.split('/')[0..-2])
|
|
310
|
+
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# encoding=utf-8
|
|
2
|
+
|
|
3
|
+
require 'hyrax'
|
|
4
|
+
|
|
5
|
+
module NewspaperWorks
|
|
6
|
+
module Data
|
|
7
|
+
# WorkFile is a read-only convenience wrapper for just-in-time
|
|
8
|
+
# file operations, and is the type of values returned by
|
|
9
|
+
# NewspaperWorks::Data::WorkFiles (container) adapter.
|
|
10
|
+
class WorkFile
|
|
11
|
+
# accessors for adaptation relationships:
|
|
12
|
+
attr_accessor :work, :parent, :fileset
|
|
13
|
+
# delegate these metadata properties to @fileset.original_file:
|
|
14
|
+
delegate :size, :date_created, :date_modified, :mime_type, to: :unwrapped
|
|
15
|
+
|
|
16
|
+
# alternate constructor spelling:
|
|
17
|
+
def self.of(work, fileset = nil, parent = nil)
|
|
18
|
+
new(work, fileset, parent)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def initialize(work, fileset = nil, parent = nil)
|
|
22
|
+
@work = work
|
|
23
|
+
# If fileset is nil, presume *first* fileset of work, as in
|
|
24
|
+
# the single-file-per-work use-case:
|
|
25
|
+
@fileset = fileset
|
|
26
|
+
# Parent is WorkFiles (container) object, if applciable:
|
|
27
|
+
@parent = parent
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Get original repository object representing file (not fileset).
|
|
31
|
+
# @return [ActiveFedora::File] repository file persistence object
|
|
32
|
+
def unwrapped
|
|
33
|
+
return nil if @fileset.nil?
|
|
34
|
+
@fileset.original_file
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def ==(other)
|
|
38
|
+
return false if @fileset.nil?
|
|
39
|
+
unwrapped.id == other.unwrapped.id
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Get path to working copy of file on local filesystem;
|
|
43
|
+
# checkout file from repository/source as needed.
|
|
44
|
+
# @return [String] path to working copy of binary
|
|
45
|
+
def path
|
|
46
|
+
return nil if @fileset.nil?
|
|
47
|
+
checkout
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Read data from working copy of file on local filesystem;
|
|
51
|
+
# checkout file from repository/source as needed.
|
|
52
|
+
# @return [String] byte data of binary/file payload
|
|
53
|
+
def data
|
|
54
|
+
return '' if @fileset.nil?
|
|
55
|
+
File.read(path, mode: 'rb')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Run block/proc upon data of file;
|
|
59
|
+
# checkout file from repository/source as needed.
|
|
60
|
+
# @yield [io] read-only IO or File object to block/proc.
|
|
61
|
+
def with_io(&block)
|
|
62
|
+
filepath = path
|
|
63
|
+
return if filepath.nil?
|
|
64
|
+
File.open(filepath, 'rb', &block)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Get filename from stored metadata
|
|
68
|
+
# @return [String] file name stored in repository metadata for file
|
|
69
|
+
def name
|
|
70
|
+
return nil if @fileset.nil?
|
|
71
|
+
unwrapped.original_name
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Derivatives for fileset associated with this primary file object
|
|
75
|
+
# @return [NewspaperWorks::Data::WorkDerviatives] derivatives adapter
|
|
76
|
+
def derivatives
|
|
77
|
+
NewspaperWorks::Data::WorkDerivatives.of(work, fileset, self)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def checkout
|
|
83
|
+
file = @fileset.original_file
|
|
84
|
+
# find_or_retrieve returns path to working copy, but only
|
|
85
|
+
# fetches from Fedora if no working copy exists on filesystem.
|
|
86
|
+
# NOTE: there may be some benefit to memoizing to avoid
|
|
87
|
+
# call and File.exist? IO operation, but YAGNI for now.
|
|
88
|
+
Hyrax::WorkingDirectory.find_or_retrieve(file.id, @fileset.id)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
module Data
|
|
3
|
+
class WorkFiles
|
|
4
|
+
include NewspaperWorks::Data::PathHelper
|
|
5
|
+
|
|
6
|
+
attr_accessor :work, :assigned, :unassigned
|
|
7
|
+
delegate :include?, to: :keys
|
|
8
|
+
|
|
9
|
+
# alternate constructor spelling:
|
|
10
|
+
def self.of(work)
|
|
11
|
+
new(work)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(work)
|
|
15
|
+
@work = work
|
|
16
|
+
@assigned = []
|
|
17
|
+
@unassigned = []
|
|
18
|
+
@derivatives = nil
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Derivatives for specified fileset or first fileset found.
|
|
22
|
+
# The `WorkDerivatives` adapter as assign/commmit! semantics just
|
|
23
|
+
# like `WorkFiles`, and also acts like a hash/mapping of
|
|
24
|
+
# destination names (usually file extension) to path of saved
|
|
25
|
+
# derviative. Always returns same instance (memoized after first
|
|
26
|
+
# use) of `WorkDerivatives`.
|
|
27
|
+
# @return [NewspaperWorks::Data::WorkDerviatives] derivatives adapter
|
|
28
|
+
def derivatives(fileset: nil)
|
|
29
|
+
fileset ||= @fileset
|
|
30
|
+
return @derivatives unless @derivatives.nil?
|
|
31
|
+
if fileset.nil?
|
|
32
|
+
# for the deferred assignement case, we have no fileset yet...
|
|
33
|
+
work_file = NewspaperWorks::Data::WorkFile.of(work, nil, self)
|
|
34
|
+
return work_file.derivatives
|
|
35
|
+
end
|
|
36
|
+
# Otherwise, delegate actual construction to WorkFile.derivatives:
|
|
37
|
+
@derivatives = values[0].derivatives
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Assignment state
|
|
41
|
+
# @return [String] A label describing the state of assignment queues
|
|
42
|
+
def state
|
|
43
|
+
return 'dirty' unless @assigned.empty? && @unassigned.empty?
|
|
44
|
+
return 'empty' if keys.empty?
|
|
45
|
+
# TODO: implement 'pending' as intermediate state between 'dirty'
|
|
46
|
+
# and saved, where we look for saved state that matches what was
|
|
47
|
+
# previously assigned in THIS instance. We can only know that
|
|
48
|
+
# changes initiated by this instance in this thread are pending
|
|
49
|
+
# because there's no global storage for the assignment queue.
|
|
50
|
+
'saved'
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# List of fileset (not file) id keys, presumes system like Hyrax
|
|
54
|
+
# is only keeping a 1:1 between fileset and contained PCDM file,
|
|
55
|
+
# because derivatives are not stored in the FileSet.
|
|
56
|
+
# @return [String] fileset ids
|
|
57
|
+
def keys
|
|
58
|
+
filesets.map(&:id)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# List of WorkFile for each primary file
|
|
62
|
+
# @return [Array<NewspaperWorks::Data::WorkFile>] adapter for persisted
|
|
63
|
+
# primary file
|
|
64
|
+
def values
|
|
65
|
+
keys.map(&method(:get))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Array of [id, WorkFile] for each primary file
|
|
69
|
+
# @return [Array<Array>] key/value pairs for primary files of work
|
|
70
|
+
def entries
|
|
71
|
+
filesets.map { |fs| [fs.id, self[fs.id]] }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# List of local file names for attachments, based on original ingested
|
|
75
|
+
# or uploaded file name.
|
|
76
|
+
# @return [Array<String>]
|
|
77
|
+
def names
|
|
78
|
+
filesets.map(&method(:original_name))
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Get a WorkFile adapter representing primary file, either by name or id
|
|
82
|
+
# @param name_or_id [String] Fileset id or work-local file name
|
|
83
|
+
# @return [NewspaperWorks::Data::WorkFile] adapter for persisted
|
|
84
|
+
# primary file
|
|
85
|
+
def get(name_or_id)
|
|
86
|
+
return get_by_fileset_id(name_or_id) if keys.include?(name_or_id)
|
|
87
|
+
get_by_filename(name_or_id)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Assign a path to assigned queue for attachment
|
|
91
|
+
# @param path [String] Path to source file
|
|
92
|
+
def assign(path)
|
|
93
|
+
path = normalize_path(path)
|
|
94
|
+
validate_path(path)
|
|
95
|
+
@assigned.push(path)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Assign a name or id to unassigned queue for deletion -- OR -- remove a
|
|
99
|
+
# path from queue of assigned items
|
|
100
|
+
# @param name_or_id [String] Fileset id, local file name, or source path
|
|
101
|
+
def unassign(name_or_id)
|
|
102
|
+
# if name_or_id is queued path, remove from @assigned queue:
|
|
103
|
+
@assigned.delete(name_or_id) if @assigned.include?(name_or_id)
|
|
104
|
+
# if name_or_id is known id or name, remove
|
|
105
|
+
@unassigned.push(name_or_id) if include?(name_or_id)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# commit pending changes to work files
|
|
109
|
+
# beginning with removals, then with new assignments
|
|
110
|
+
def commit!
|
|
111
|
+
commit_unassigned
|
|
112
|
+
commit_assigned
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
alias [] :get
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
def get_by_fileset_id(id)
|
|
120
|
+
nil unless keys.include?(id)
|
|
121
|
+
fileset = FileSet.find(id)
|
|
122
|
+
NewspaperWorks::Data::WorkFile.of(work, fileset, self)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Get one WorkFile object based on filename in metadata
|
|
126
|
+
def get_by_filename(name)
|
|
127
|
+
r = filesets.select { |fs| original_name(fs) == name }
|
|
128
|
+
# checkout first match
|
|
129
|
+
r.empty? ? nil : NewspaperWorks::Data::WorkFile.of(work, r[0], self)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def original_name(fileset)
|
|
133
|
+
fileset.original_file.original_name
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def filesets
|
|
137
|
+
# file sets with non-nil original file contained:
|
|
138
|
+
work.members.select { |m| m.class == FileSet && m.original_file }
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def user
|
|
142
|
+
return User.find_by(email: work.depositor) unless work.depositor.nil?
|
|
143
|
+
defined?(current_user) ? current_user : User.batch_user
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def ensure_depositor
|
|
147
|
+
return unless @work.depositor.nil?
|
|
148
|
+
@work.depositor = user.user_key
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def commit_unassigned
|
|
152
|
+
# for each (name or) id to be removed from work, use actor to destroy
|
|
153
|
+
@unassigned.each do |id|
|
|
154
|
+
# "actor" here is simply a multi-adapter of Fileset, User
|
|
155
|
+
# Calling destroy will:
|
|
156
|
+
# 1. unlink fileset from work, and save work
|
|
157
|
+
# 2. Destroy fileset:
|
|
158
|
+
# - :before_destroy callback will delegate derivative cleanup
|
|
159
|
+
# to derivatives service component(s).
|
|
160
|
+
# - Remove fileset from storage/persistence layers
|
|
161
|
+
# - Invoke (logging or other) :after_destroy callback
|
|
162
|
+
Hyrax::Actors::FileSetActor.new(get(id).fileset, user).destroy
|
|
163
|
+
work.reload
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def commit_assigned
|
|
168
|
+
return if @assigned.nil? || @assigned.empty?
|
|
169
|
+
ensure_depositor
|
|
170
|
+
remote_files = @assigned.map do |path|
|
|
171
|
+
{ url: path_to_uri(path), file_name: File.basename(path) }
|
|
172
|
+
end
|
|
173
|
+
attrs = { remote_files: remote_files }
|
|
174
|
+
# Create an environment for actor stack:
|
|
175
|
+
env = Hyrax::Actors::Environment.new(@work, Ability.new(user), attrs)
|
|
176
|
+
# Invoke default Hyrax actor stack middleware:
|
|
177
|
+
Hyrax::CurationConcern.actor.create(env)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'newspaper_works/data/fileset_helper'
|
|
2
|
+
require 'newspaper_works/data/path_helper'
|
|
3
|
+
require 'newspaper_works/data/work_derivatives'
|
|
4
|
+
require 'newspaper_works/data/work_files'
|
|
5
|
+
require 'newspaper_works/data/work_file'
|
|
6
|
+
|
|
7
|
+
module NewspaperWorks
|
|
8
|
+
# Module for data access helper / adapter classes supporting, enhancing
|
|
9
|
+
# NewspaperWorks work models
|
|
10
|
+
module Data
|
|
11
|
+
# Handler for after_create_fileset, to be called by block subscribing to
|
|
12
|
+
# and overriding default Hyrax `:after_create_fileset` handler, via
|
|
13
|
+
# app integrating newspaper_works.
|
|
14
|
+
def self.handle_after_create_fileset(file_set, user)
|
|
15
|
+
handle_queued_derivative_attachments(file_set)
|
|
16
|
+
# Hyrax queues this job by default, and since newspaper_works
|
|
17
|
+
# overrides the single subscriber Hyrax uses to do so, we
|
|
18
|
+
# must call this here:
|
|
19
|
+
FileSetAttachedEventJob.perform_later(file_set, user)
|
|
20
|
+
work = file_set.member_of[0]
|
|
21
|
+
# Hyrax CreateWithRemoteFilesActor has glaring omission re: this job,
|
|
22
|
+
# so we call it here, once we have a fileset to copy permissions to.
|
|
23
|
+
InheritPermissionsJob.perform_later(work) unless work.nil?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.handle_queued_derivative_attachments(file_set)
|
|
27
|
+
return if file_set.import_url.nil?
|
|
28
|
+
work = file_set.member_of.select(&:work?)[0]
|
|
29
|
+
derivatives = NewspaperWorks::Data::WorkDerivatives.of(work)
|
|
30
|
+
# For now, becuase this is IO-bound operation, it makes sense to have
|
|
31
|
+
# this not be a job, but run inline:
|
|
32
|
+
derivatives.commit_queued!(file_set)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require 'active_fedora'
|
|
2
|
+
require 'hyrax'
|
|
3
|
+
require 'blacklight_iiif_search'
|
|
4
|
+
require 'blacklight_advanced_search'
|
|
5
|
+
|
|
6
|
+
module NewspaperWorks
|
|
7
|
+
# module constants:
|
|
8
|
+
GEM_PATH = Gem::Specification.find_by_name("newspaper_works").gem_dir
|
|
9
|
+
|
|
10
|
+
# Engine Class
|
|
11
|
+
class Engine < ::Rails::Engine
|
|
12
|
+
isolate_namespace NewspaperWorks
|
|
13
|
+
|
|
14
|
+
config.to_prepare do
|
|
15
|
+
# Inject PluggableDerivativeService ahead of Hyrax default.
|
|
16
|
+
# This wraps Hyrax default, but allows multiple valid services
|
|
17
|
+
# to be configured, instead of just the _first_ valid service.
|
|
18
|
+
#
|
|
19
|
+
# To configure specific services, inject each service, in desired order
|
|
20
|
+
# to NewspaperWorks::PluggableDerivativeService.plugins array.
|
|
21
|
+
|
|
22
|
+
Hyrax::DerivativeService.services.unshift(
|
|
23
|
+
NewspaperWorks::PluggableDerivativeService
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Register specific derivative services to be considered by
|
|
27
|
+
# PluggableDerivativeService:
|
|
28
|
+
[
|
|
29
|
+
NewspaperWorks::JP2DerivativeService,
|
|
30
|
+
NewspaperWorks::PDFDerivativeService,
|
|
31
|
+
NewspaperWorks::TextExtractionDerivativeService,
|
|
32
|
+
NewspaperWorks::TIFFDerivativeService
|
|
33
|
+
].each do |plugin|
|
|
34
|
+
NewspaperWorks::PluggableDerivativeService.plugins.push plugin
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Register actor to handle any NewspaperWorks upload behaviors before
|
|
38
|
+
# CreateWithFilesActor gets to them:
|
|
39
|
+
Hyrax::CurationConcern.actor_factory.insert_before Hyrax::Actors::CreateWithFilesActor, NewspaperWorks::Actors::NewspaperWorksUploadActor
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module NewspaperWorks
|
|
2
|
+
# generic/base NewspaperWorks-specific exception:
|
|
3
|
+
class NewspaperWorksError < StandardError
|
|
4
|
+
end
|
|
5
|
+
|
|
6
|
+
# Data transformation or read-error:
|
|
7
|
+
class DataError < NewspaperWorksError
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Specific exception for temporary state where one or more PDF page source
|
|
11
|
+
# files are not ready, for which a retry at a later time is warranted.
|
|
12
|
+
class PagesNotReady < DataError
|
|
13
|
+
end
|
|
14
|
+
end
|