iiif_print 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.env +5 -0
- data/.fcrepo_wrapper +4 -0
- data/.github/release.yml +20 -0
- data/.github/workflows/branches.yml +24 -0
- data/.github/workflows/build-lint-test-action.yaml +33 -0
- data/.github/workflows/release_labels.yml +25 -0
- data/.gitignore +52 -0
- data/.rubocop.yml +177 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +49 -0
- data/CONTRIBUTING.md +181 -0
- data/Dockerfile +15 -0
- data/Gemfile +52 -0
- data/LICENSE +203 -0
- data/README.md +203 -0
- data/Rakefile +38 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
- data/app/assets/config/iiif_print_manifest.js +2 -0
- data/app/assets/images/iiif_print/.keep +0 -0
- data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/iiif_print.js +3 -0
- data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
- data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
- data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
- data/app/helpers/hyrax/iiif_helper.rb +22 -0
- data/app/helpers/iiif_print/application_helper.rb +5 -0
- data/app/helpers/iiif_print_helper.rb +64 -0
- data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
- data/app/mailers/iiif_print/application_mailer.rb +8 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
- data/app/models/concerns/iiif_print/solr/document.rb +47 -0
- data/app/models/iiif_print/application_record.rb +6 -0
- data/app/models/iiif_print/derivative_attachment.rb +8 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
- data/app/models/iiif_print/ingest_file_relation.rb +14 -0
- data/app/models/iiif_print/pending_relationship.rb +7 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
- data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
- data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
- data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/hyrax/base/_representative_media.html.erb +9 -0
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/iiif_print.de.yml +148 -0
- data/config/locales/iiif_print.en.yml +119 -0
- data/config/locales/iiif_print.es.yml +148 -0
- data/config/locales/iiif_print.fr.yml +149 -0
- data/config/locales/iiif_print.it.yml +142 -0
- data/config/locales/iiif_print.pt-BR.yml +148 -0
- data/config/locales/iiif_print.zh.yml +142 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
- data/docker-compose.yml +129 -0
- data/iiif_print.gemspec +43 -0
- data/lib/generators/iiif_print/assets_generator.rb +29 -0
- data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
- data/lib/generators/iiif_print/install_generator.rb +52 -0
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
- data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
- data/lib/iiif_print/base_derivative_service.rb +113 -0
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
- data/lib/iiif_print/catalog_search_builder.rb +31 -0
- data/lib/iiif_print/configuration.rb +99 -0
- data/lib/iiif_print/data/fileset_helper.rb +25 -0
- data/lib/iiif_print/data/path_helper.rb +40 -0
- data/lib/iiif_print/data/work_derivatives.rb +323 -0
- data/lib/iiif_print/data/work_file.rb +92 -0
- data/lib/iiif_print/data/work_files.rb +199 -0
- data/lib/iiif_print/data.rb +35 -0
- data/lib/iiif_print/engine.rb +77 -0
- data/lib/iiif_print/errors.rb +9 -0
- data/lib/iiif_print/image_tool.rb +119 -0
- data/lib/iiif_print/jobs/application_job.rb +8 -0
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
- data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
- data/lib/iiif_print/jp2_derivative_service.rb +118 -0
- data/lib/iiif_print/jp2_image_metadata.rb +81 -0
- data/lib/iiif_print/lineage_service.rb +41 -0
- data/lib/iiif_print/metadata.rb +125 -0
- data/lib/iiif_print/pdf_derivative_service.rb +42 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
- data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
- data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
- data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
- data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
- data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
- data/lib/iiif_print/text_extraction.rb +11 -0
- data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
- data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
- data/lib/iiif_print/tiff_derivative_service.rb +50 -0
- data/lib/iiif_print/version.rb +3 -0
- data/lib/iiif_print/works_controller_behavior.rb +9 -0
- data/lib/iiif_print.rb +136 -0
- data/lib/tasks/set_child_works.rake +22 -0
- data/spec/.keep.txt +1 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/uploaded_txt_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
- data/spec/helpers/iiif_print_helper_spec.rb +43 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
- data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
- data/spec/iiif_print/configuration_spec.rb +67 -0
- data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
- data/spec/iiif_print/data/work_file_spec.rb +99 -0
- data/spec/iiif_print/data/work_files_spec.rb +237 -0
- data/spec/iiif_print/image_tool_spec.rb +109 -0
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
- data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
- data/spec/iiif_print/lineage_service_spec.rb +13 -0
- data/spec/iiif_print/metadata_spec.rb +115 -0
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
- data/spec/iiif_print_spec.rb +51 -0
- data/spec/misc_shared.rb +111 -0
- data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
- data/spec/spec_helper.rb +181 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/support/iiif_print_models.rb +127 -0
- data/spec/test_app_templates/blacklight.yml +9 -0
- data/spec/test_app_templates/fedora.yml +15 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/test_app_templates/redis.yml +9 -0
- data/spec/test_app_templates/solr/conf/schema.xml +362 -0
- data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
- data/spec/test_app_templates/solr.yml +7 -0
- data/tasks/iiif_print_dev.rake +34 -0
- data/tmp/.keep +0 -0
- metadata +605 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
require 'active_fedora'
|
|
2
|
+
require 'hyrax'
|
|
3
|
+
require 'blacklight_iiif_search'
|
|
4
|
+
|
|
5
|
+
module IiifPrint
|
|
6
|
+
# module constants:
|
|
7
|
+
GEM_PATH = Gem::Specification.find_by_name("iiif_print").gem_dir
|
|
8
|
+
|
|
9
|
+
# Engine Class
|
|
10
|
+
class Engine < ::Rails::Engine
|
|
11
|
+
isolate_namespace IiifPrint
|
|
12
|
+
|
|
13
|
+
# rubocop:disable Metrics/BlockLength
|
|
14
|
+
config.to_prepare do
|
|
15
|
+
# We don't have a hard requirement of Bullkrax but in our experience, lingering on earlier
|
|
16
|
+
# versions can introduce bugs of both Bulkrax and some of the assumptions that we've resolved.
|
|
17
|
+
# Very early versions of Bulkrax do not have VERSION defined
|
|
18
|
+
if defined?(Bulkrax) && !ENV.fetch("SKIP_IIIF_PRINT_BULKRAX_VERSION_REQUIREMENT", false)
|
|
19
|
+
if !defined?(Bulkrax::VERSION) || (Bulkrax::VERSION.to_i < 5)
|
|
20
|
+
raise "IiifPrint does not have a hard dependency on Bulkrax, " \
|
|
21
|
+
"but if you have Bulkrax installed we recommend at least version 5.0.0. " \
|
|
22
|
+
"To ignore this recommendation please add SKIP_IIIF_PRINT_BULKRAX_VERSION_REQUIREMENT " \
|
|
23
|
+
"to your ENV variables."
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Inject PluggableDerivativeService ahead of Hyrax default.
|
|
28
|
+
# This wraps Hyrax default, but allows multiple valid services
|
|
29
|
+
# to be configured, instead of just the _first_ valid service.
|
|
30
|
+
#
|
|
31
|
+
# To configure specific services, inject each service, in desired order
|
|
32
|
+
# to IiifPrint::PluggableDerivativeService.plugins array.
|
|
33
|
+
|
|
34
|
+
Hyrax::DerivativeService.services.unshift(
|
|
35
|
+
IiifPrint::PluggableDerivativeService
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
Hyrax::IiifManifestPresenter.prepend(IiifPrint::IiifManifestPresenterBehavior)
|
|
39
|
+
Hyrax::IiifManifestPresenter::Factory.prepend(IiifPrint::IiifManifestPresenterFactoryBehavior)
|
|
40
|
+
Hyrax::ManifestBuilderService.prepend(IiifPrint::ManifestBuilderServiceBehavior)
|
|
41
|
+
Hyrax::Renderers::FacetedAttributeRenderer.prepend(Hyrax::Renderers::FacetedAttributeRendererDecorator)
|
|
42
|
+
Hyrax::WorksControllerBehavior.prepend(IiifPrint::WorksControllerBehaviorDecorator)
|
|
43
|
+
Hyrax::WorkShowPresenter.prepend(IiifPrint::WorkShowPresenterDecorator)
|
|
44
|
+
|
|
45
|
+
IiifPrint::ChildIndexer.decorate_work_types!
|
|
46
|
+
IiifPrint::FileSetIndexer.decorate(Hyrax::FileSetIndexer)
|
|
47
|
+
|
|
48
|
+
::BlacklightIiifSearch::IiifSearchResponse.prepend(IiifPrint::IiifSearchResponseDecorator)
|
|
49
|
+
::BlacklightIiifSearch::IiifSearchAnnotation.prepend(IiifPrint::BlacklightIiifSearch::AnnotationDecorator)
|
|
50
|
+
Hyrax::Actors::FileSetActor.prepend(IiifPrint::Actors::FileSetActorDecorator)
|
|
51
|
+
|
|
52
|
+
# Extending the presenter to the base url which includes the protocol.
|
|
53
|
+
# We need the base url to render the facet links and normalize the interface.
|
|
54
|
+
Hyrax::IiifManifestPresenter.send(:attr_accessor, :base_url)
|
|
55
|
+
Hyrax::IiifManifestPresenter::DisplayImagePresenter.send(:attr_accessor, :base_url)
|
|
56
|
+
# Extending this class because there is an #ability= but not #ability and this definition
|
|
57
|
+
# mirrors the Hyrax::IiifManifestPresenter#ability.
|
|
58
|
+
module Hyrax::IiifManifestPresenter::DisplayImagePresenterDecorator
|
|
59
|
+
def ability
|
|
60
|
+
@ability ||= NullAbility.new
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
Hyrax::IiifManifestPresenter::DisplayImagePresenter.prepend(Hyrax::IiifManifestPresenter::DisplayImagePresenterDecorator)
|
|
64
|
+
|
|
65
|
+
Hyrax.config do |config|
|
|
66
|
+
config.callback.set(:after_create_fileset) do |file_set, user|
|
|
67
|
+
IiifPrint.config.handle_after_create_fileset(file_set, user)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
config.after_initialize do
|
|
73
|
+
IiifPrint::Solr::Document.decorate(SolrDocument)
|
|
74
|
+
end
|
|
75
|
+
# rubocop:enable Metrics/BlockLength
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
require 'tmpdir'
|
|
3
|
+
|
|
4
|
+
module IiifPrint
|
|
5
|
+
class ImageTool
|
|
6
|
+
attr_accessor :path, :ftype
|
|
7
|
+
|
|
8
|
+
def initialize(path)
|
|
9
|
+
@path = path
|
|
10
|
+
@ftype = magic
|
|
11
|
+
@metadata = nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# @return [Hash] hash with following symbol keys, and respectively
|
|
15
|
+
# typed String and/or Integer values.
|
|
16
|
+
# :width, :height — both in Integer px units
|
|
17
|
+
# :color — (String enumerated from 'gray', 'monochrome', 'color')
|
|
18
|
+
# :num_components - Integer, number of channels
|
|
19
|
+
# :bits_per_component — Integer, bits per channel (e.g. 8 vs. 1)
|
|
20
|
+
# :content_type — RFC 2045 MIME type
|
|
21
|
+
def metadata
|
|
22
|
+
return @metadata unless @metadata.nil?
|
|
23
|
+
@metadata = jp2? ? jp2_metadata : identify_metadata
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Convert source image to image at destination path, inferring file type
|
|
27
|
+
# from destination file extension. In case of JP2 files, create
|
|
28
|
+
# intermediate file using OpenJPEG 2000 that ImageMagick can use.
|
|
29
|
+
# Only outputs monochrome output if monochrome is true, destination
|
|
30
|
+
# format is TIFF.
|
|
31
|
+
# @param destination [String] Path to output / destination file
|
|
32
|
+
# @param monochrome [Boolean] true if monochrome output, otherwise false
|
|
33
|
+
def convert(destination, monochrome = false)
|
|
34
|
+
raise 'JP2 output not yet supported' if destination.end_with?('jp2')
|
|
35
|
+
return convert_image(jp2_to_tiff(@path), destination, monochrome) if jp2?
|
|
36
|
+
convert_image(@path, destination, monochrome)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def convert_image(source, destination, monochrome)
|
|
42
|
+
monochrome &&= destination.slice(-4, 4).index('tif')
|
|
43
|
+
mono_opts = "-depth 1 -monochrome -compress Group4 -type bilevel "
|
|
44
|
+
opts = monochrome ? mono_opts : ''
|
|
45
|
+
cmd = "convert #{source} #{opts}#{destination}"
|
|
46
|
+
`#{cmd}`
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def jp2_to_tiff(source)
|
|
50
|
+
intermediate_path = File.join(Dir.mktmpdir, 'intermediate.tif')
|
|
51
|
+
jp2_cmd = "opj_decompress -i #{source} -o #{intermediate_path}"
|
|
52
|
+
`#{jp2_cmd}`
|
|
53
|
+
intermediate_path
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def jp2_metadata
|
|
57
|
+
result = IiifPrint::JP2ImageMetadata.new(path).technical_metadata
|
|
58
|
+
result[:content_type] = 'image/jp2'
|
|
59
|
+
result
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def im_line_select(lines, key)
|
|
63
|
+
line = lines.find { |l| l.scrub.downcase.strip.start_with?(key) }
|
|
64
|
+
# Given "key: value" line, return the value as String stripped of
|
|
65
|
+
# leading and trailing whitespace
|
|
66
|
+
return line if line.nil?
|
|
67
|
+
line.strip.split(':')[-1].strip
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @return [Array(Integer, Integer)] width, height in Integer px units
|
|
71
|
+
def im_identify_geometry(lines)
|
|
72
|
+
img_geo = im_line_select(lines, 'geometry').split('+')[0]
|
|
73
|
+
img_geo.split('x').map(&:to_i)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# @return [Array<String>] lines of output from imagemagick `identify`
|
|
77
|
+
def im_identify
|
|
78
|
+
cmd = "identify -verbose #{path}"
|
|
79
|
+
`#{cmd}`.lines
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def im_mime(lines)
|
|
83
|
+
return 'application/pdf' if pdf? # workaround older imagemagick bug
|
|
84
|
+
im_line_select(lines, 'mime type')
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def populate_im_color!(lines, result)
|
|
88
|
+
bpc = im_line_select(lines, 'depth').split('-')[0].to_i # '1-bit' -> 1
|
|
89
|
+
colorspace = im_line_select(lines, 'colorspace')
|
|
90
|
+
color = colorspace == 'Gray' ? 'gray' : 'color'
|
|
91
|
+
has_alpha = !im_line_select(lines, 'Alpha').nil?
|
|
92
|
+
result[:num_components] = (color == 'gray' ? 1 : 3) + (has_alpha ? 1 : 0)
|
|
93
|
+
result[:color] = bpc == 1 ? 'monochrome' : color
|
|
94
|
+
result[:bits_per_component] = bpc
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Return metadata by means of imagemagick identify
|
|
98
|
+
def identify_metadata
|
|
99
|
+
result = {}
|
|
100
|
+
lines = im_identify
|
|
101
|
+
result[:width], result[:height] = im_identify_geometry(lines)
|
|
102
|
+
result[:content_type] = im_mime(lines)
|
|
103
|
+
populate_im_color!(lines, result)
|
|
104
|
+
result
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def magic
|
|
108
|
+
File.read(@path, 23, 0)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def jp2?
|
|
112
|
+
@ftype.end_with?('ftypjp2')
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def pdf?
|
|
116
|
+
magic.start_with?('%PDF-')
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
module IiifPrint
|
|
2
|
+
module Jobs
|
|
3
|
+
class ChildWorksFromPdfJob < IiifPrint::Jobs::ApplicationJob
|
|
4
|
+
# Break a pdf into individual pages
|
|
5
|
+
# @param parent_work
|
|
6
|
+
# @param pdf_paths: [<Array => String>] paths to pdfs
|
|
7
|
+
# @param user: [User]
|
|
8
|
+
# @param admin_set_id: [<String>]
|
|
9
|
+
# @param prior_pdfs: [<Integer>] count of pdfs already on parent work
|
|
10
|
+
def perform(parent_work, pdf_paths, user, admin_set_id, prior_pdfs)
|
|
11
|
+
@parent_work = parent_work
|
|
12
|
+
@child_admin_set_id = admin_set_id
|
|
13
|
+
child_model = @parent_work.iiif_print_config.pdf_split_child_model
|
|
14
|
+
|
|
15
|
+
# handle each input pdf
|
|
16
|
+
pdf_paths.each_with_index do |path, pdf_idx|
|
|
17
|
+
split_pdf(path, pdf_idx, user, prior_pdfs, child_model)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Link newly created child works to the parent
|
|
21
|
+
# @param user: [User] user
|
|
22
|
+
# @param parent_id: [<String>] parent work id
|
|
23
|
+
# @param parent_model: [<String>] parent model
|
|
24
|
+
# @param child_model: [<String>] child model
|
|
25
|
+
IiifPrint::Jobs::CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
|
26
|
+
user: user,
|
|
27
|
+
parent_id: @parent_work.id,
|
|
28
|
+
parent_model: @parent_work.class.to_s,
|
|
29
|
+
child_model: child_model.to_s
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# TODO: clean up image_files and pdf_paths
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def split_pdf(path, pdf_idx, user, prior_pdfs_count, child_model)
|
|
38
|
+
image_files = @parent_work.iiif_print_config.pdf_splitter_service.new(path).to_a
|
|
39
|
+
return if image_files.blank?
|
|
40
|
+
|
|
41
|
+
pdf_sequence = pdf_idx + prior_pdfs_count
|
|
42
|
+
prepare_import_data(pdf_sequence, image_files, user)
|
|
43
|
+
|
|
44
|
+
# submit the job to create all the child works for one PDF
|
|
45
|
+
# @param [User] user
|
|
46
|
+
# @param [Hash<String => String>] titles
|
|
47
|
+
# @param [Hash<String => String>] resource_types (optional)
|
|
48
|
+
# @param [Array<String>] uploaded_files Hyrax::UploadedFile IDs
|
|
49
|
+
# @param [Hash] attributes attributes to apply to all works, including :model
|
|
50
|
+
# @param [Hyrax::BatchCreateOperation] operation
|
|
51
|
+
operation = Hyrax::BatchCreateOperation.create!(
|
|
52
|
+
user: user,
|
|
53
|
+
operation_type: "PDF Batch Create"
|
|
54
|
+
)
|
|
55
|
+
BatchCreateJob.perform_later(user,
|
|
56
|
+
@child_work_titles,
|
|
57
|
+
{},
|
|
58
|
+
@uploaded_files,
|
|
59
|
+
attributes.merge!(model: child_model.to_s).with_indifferent_access,
|
|
60
|
+
operation)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def prepare_import_data(pdf_sequence, image_files, user)
|
|
64
|
+
@uploaded_files = []
|
|
65
|
+
@child_work_titles = {}
|
|
66
|
+
image_files.each_with_index do |image_path, idx|
|
|
67
|
+
file_id = create_uploaded_file(user, image_path).to_s
|
|
68
|
+
file_title = set_title(@parent_work.title.first, pdf_sequence, idx)
|
|
69
|
+
@uploaded_files << file_id
|
|
70
|
+
@child_work_titles[file_id] = file_title
|
|
71
|
+
# save child work info to create the member relationships
|
|
72
|
+
PendingRelationship.create!(child_title: file_title,
|
|
73
|
+
parent_id: @parent_work.id,
|
|
74
|
+
child_order: sort_order(pdf_sequence, idx))
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def sort_order(pdf_sequence, idx)
|
|
79
|
+
"#{pdf_sequence} #{idx}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def create_uploaded_file(user, path)
|
|
83
|
+
uf = Hyrax::UploadedFile.new
|
|
84
|
+
uf.user_id = user.id
|
|
85
|
+
uf.file = CarrierWave::SanitizedFile.new(path)
|
|
86
|
+
uf.save!
|
|
87
|
+
uf.id
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def set_title(title, pdf_sequence, idx)
|
|
91
|
+
pdf_index = "Pdf Nbr #{pdf_sequence + 1}"
|
|
92
|
+
page_number = "Page #{idx + 1}"
|
|
93
|
+
"#{title}: #{pdf_index}, #{page_number}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# TODO: what attributes do we need to fill in from the parent work? What about AllinsonFlex?
|
|
97
|
+
def attributes
|
|
98
|
+
{
|
|
99
|
+
admin_set_id: @child_admin_set_id.to_s,
|
|
100
|
+
creator: @parent_work.creator.to_a,
|
|
101
|
+
rights_statement: @parent_work.rights_statement.to_a,
|
|
102
|
+
visibility: @parent_work.visibility.to_s
|
|
103
|
+
}
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
module IiifPrint
|
|
2
|
+
module Jobs
|
|
3
|
+
# Break a pdf into individual pages
|
|
4
|
+
class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob
|
|
5
|
+
# Link newly created child works to the parent
|
|
6
|
+
# @param user: [User] user
|
|
7
|
+
# @param parent_id: [<String>] parent work id
|
|
8
|
+
# @param parent_model: [<String>] parent model
|
|
9
|
+
# @param child_model: [<String>] child model
|
|
10
|
+
def perform(user:, parent_id:, parent_model:, child_model:)
|
|
11
|
+
if completed_child_data_for(parent_id, child_model)
|
|
12
|
+
# add the members
|
|
13
|
+
parent_work = parent_model.constantize.find(parent_id)
|
|
14
|
+
create_relationships(user: user, parent: parent_work, ordered_children: @child_works)
|
|
15
|
+
@pending_children.each(&:destroy)
|
|
16
|
+
else
|
|
17
|
+
# reschedule the job and end this one normally
|
|
18
|
+
#
|
|
19
|
+
# TODO: Depending on how things shake out, we could be infinitely rescheduling this job.
|
|
20
|
+
# Consider a time to live parameter.
|
|
21
|
+
reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
# load @child_works, and return true or false
|
|
28
|
+
def completed_child_data_for(parent_id, child_model)
|
|
29
|
+
@child_works = []
|
|
30
|
+
found_all_children = true
|
|
31
|
+
|
|
32
|
+
# find and sequence all pending children
|
|
33
|
+
@pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc')
|
|
34
|
+
|
|
35
|
+
# find child works (skip out if any haven't yet been created)
|
|
36
|
+
@pending_children.each do |child|
|
|
37
|
+
# find by title... if any aren't found, the child works are not yet ready
|
|
38
|
+
found_children = find_children_by_title_for(child.child_title, child_model)
|
|
39
|
+
found_all_children = false if found_children.empty?
|
|
40
|
+
break unless found_all_children == true
|
|
41
|
+
@child_works += found_children
|
|
42
|
+
end
|
|
43
|
+
# return boolean
|
|
44
|
+
found_all_children
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def find_children_by_title_for(title, model)
|
|
48
|
+
# We should only find one, but there is no guarantee of that and `:where` returns an array.
|
|
49
|
+
model.constantize.where(title: title)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def reschedule(user:, parent_id:, parent_model:, child_model:)
|
|
53
|
+
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
|
54
|
+
user: user,
|
|
55
|
+
parent_id: parent_id,
|
|
56
|
+
parent_model: parent_model,
|
|
57
|
+
child_model: child_model
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def create_relationships(user:, parent:, ordered_children:)
|
|
62
|
+
records_hash = {}
|
|
63
|
+
ordered_children.map(&:id).each_with_index do |child_id, i|
|
|
64
|
+
records_hash[i.to_s] = { id: child_id }
|
|
65
|
+
end
|
|
66
|
+
attrs = { work_members_attributes: records_hash }
|
|
67
|
+
parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
|
68
|
+
env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs)
|
|
69
|
+
|
|
70
|
+
Hyrax::CurationConcern.actor.update(env)
|
|
71
|
+
# need to reindex all file_sets to make all ancestors are indexed
|
|
72
|
+
ordered_children.each do |child_work|
|
|
73
|
+
child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
|
|
3
|
+
module IiifPrint
|
|
4
|
+
class JP2DerivativeService < BaseDerivativeService
|
|
5
|
+
# OpenJPEG 2000 Command to make NDNP-compliant grayscale JP2:
|
|
6
|
+
CMD_GRAY = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
|
|
7
|
+
'-d 0,0 -b 64,64 -n 6 -p RLCP -t 1024,1024 -I -M 1 ' \
|
|
8
|
+
'-r 64,53.821,45.249,40,32,26.911,22.630,20,16,14.286,' \
|
|
9
|
+
'11.364,10,8,6.667,5.556,4.762,4,3.333,2.857,2.500,2,' \
|
|
10
|
+
'1.667,1.429,1.190,1'.freeze
|
|
11
|
+
|
|
12
|
+
# OpenJPEG 2000 Command to make RGB JP2:
|
|
13
|
+
CMD_COLOR = 'opj_compress -i %<source_file>s -o %<out_file>s ' \
|
|
14
|
+
'-d 0,0 -b 64,64 -n 6 -p RPCL -t 1024,1024 -I -M 1 '\
|
|
15
|
+
'-r 2.4,1.48331273,.91673033,.56657224,.35016049,.21641118,' \
|
|
16
|
+
'.13374944,.0944,.08266171'.freeze
|
|
17
|
+
|
|
18
|
+
# OpenJPEG 1.x command replacement for 2.x opj_compress, takes same options;
|
|
19
|
+
# this is necessary on Ubuntu Trusty (e.g. Travis CI)
|
|
20
|
+
CMD_1X = 'image_to_j2k'.freeze
|
|
21
|
+
|
|
22
|
+
# Target file extension of this service plugin:
|
|
23
|
+
self.target_extension = 'jp2'.freeze
|
|
24
|
+
|
|
25
|
+
attr_reader :file_set
|
|
26
|
+
delegate :uri, :mime_type, to: :file_set
|
|
27
|
+
|
|
28
|
+
def initialize(file_set)
|
|
29
|
+
# cached result string for imagemagick `identify` command
|
|
30
|
+
@command = nil
|
|
31
|
+
@unlink_after_creation = []
|
|
32
|
+
super(file_set)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def create_derivatives(filename)
|
|
36
|
+
# Base class takes care of loading @source_path, @dest_path
|
|
37
|
+
super(filename)
|
|
38
|
+
|
|
39
|
+
# no creation if jp2 master => deemed unnecessary/duplicative
|
|
40
|
+
return if mime_type == 'image/jp2'
|
|
41
|
+
|
|
42
|
+
# if we have a non-TIFF source, or a 1-bit monochrome source, we need
|
|
43
|
+
# to make a NetPBM-based intermediate (temporary) file for OpenJPEG
|
|
44
|
+
# to consume.
|
|
45
|
+
needs_intermediate = !tiff_source? || one_bit?
|
|
46
|
+
|
|
47
|
+
# We use either intermediate temp file, or temp symlink (to work
|
|
48
|
+
# around OpenJPEG 2000 file naming quirk).
|
|
49
|
+
needs_intermediate ? make_intermediate_source : make_symlink
|
|
50
|
+
|
|
51
|
+
# Get OpenJPEG command, rendered with source, destination, appropriate
|
|
52
|
+
# to either color or grayscale source
|
|
53
|
+
render_cmd = opj_command
|
|
54
|
+
|
|
55
|
+
# Run the generated command to make derivative file at @dest_path
|
|
56
|
+
`#{render_cmd}`
|
|
57
|
+
|
|
58
|
+
# Clean up any intermediate files or symlinks used during creation
|
|
59
|
+
cleanup_intermediate
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# source introspection:
|
|
65
|
+
|
|
66
|
+
def tiff_source?
|
|
67
|
+
identify[:content_type] == 'image/tiff'
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def make_symlink
|
|
71
|
+
# OpenJPEG binaries have annoying quirk of only using TIFF input
|
|
72
|
+
# files whose name ends in .TIF or .tif (three letter); for all
|
|
73
|
+
# non-monochrome TIFF files, we just assume we need to symlink
|
|
74
|
+
# to such a filename.
|
|
75
|
+
tmpname = File.join(Dir.tmpdir, "#{SecureRandom.uuid}.tif")
|
|
76
|
+
FileUtils.ln_s(@source_path, tmpname)
|
|
77
|
+
@unlink_after_creation.push(tmpname)
|
|
78
|
+
# finally, point @source_path for command at intermediate link:
|
|
79
|
+
@source_path = tmpname
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def make_intermediate_source
|
|
83
|
+
# generate a random filename to be made, with appropriate extension,
|
|
84
|
+
# inside /tmp dir:
|
|
85
|
+
tmpname = File.join(
|
|
86
|
+
Dir.tmpdir,
|
|
87
|
+
format(
|
|
88
|
+
"#{SecureRandom.uuid}.%<ext>s",
|
|
89
|
+
ext: use_color? ? 'ppm' : 'pgm'
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
# if pdf source, get only first page
|
|
93
|
+
source_path = @source_path
|
|
94
|
+
source_path += '[0]' if @source_path.ends_with?('pdf')
|
|
95
|
+
# Use ImageMagick `convert` to create intermediate bitmap:
|
|
96
|
+
`convert #{source_path} #{tmpname}`
|
|
97
|
+
@unlink_after_creation.push(tmpname)
|
|
98
|
+
# finally, point @source_path for command at intermediate file:
|
|
99
|
+
@source_path = tmpname
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def opj_command
|
|
103
|
+
# Get a command template appropriate to OpenJPEG 1.x or 2.x
|
|
104
|
+
use_openjpeg_1x = `which opj_compress`.empty?
|
|
105
|
+
cmd = use_color? ? CMD_COLOR : CMD_GRAY
|
|
106
|
+
cmd = cmd.sub('opj_compress', 'image_to_j2k') if use_openjpeg_1x
|
|
107
|
+
# return command with source and destination file names injected
|
|
108
|
+
format(cmd, source_file: @source_path, out_file: @dest_path)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def cleanup_intermediate
|
|
112
|
+
# remove symlink or intermediate file once we no longer need
|
|
113
|
+
@unlink_after_creation.each do |path|
|
|
114
|
+
FileUtils.rm(path)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
module IiifPrint
|
|
2
|
+
class JP2ImageMetadata
|
|
3
|
+
TOKEN_MARKER_START = "\xFF".force_encoding("BINARY").freeze
|
|
4
|
+
TOKEN_MARKER_SIZ = "\x51".force_encoding("BINARY").freeze
|
|
5
|
+
TOKEN_IHDR = 'ihdr'.freeze
|
|
6
|
+
|
|
7
|
+
attr_accessor :path
|
|
8
|
+
|
|
9
|
+
def initialize(path)
|
|
10
|
+
@path = path
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# @param io [IO] IO stream opened in binary mode, for reading
|
|
14
|
+
# @return [Array(Integer, Integer)] X size, Y size, in Integer-typed px
|
|
15
|
+
def extract_jp2_dim(io)
|
|
16
|
+
raise IOError, 'file not open in binary mode' unless io.binmode?
|
|
17
|
+
buffer = ''
|
|
18
|
+
siz_found = false
|
|
19
|
+
# Informed by ISO/IEC 15444-1:2000, pp. 26-27
|
|
20
|
+
# via:
|
|
21
|
+
# http://hosting.astro.cornell.edu/~carcich/LRO/jp2/ISO_JPEG200_Standard/INCITS+ISO+IEC+15444-1-2000.pdf
|
|
22
|
+
#
|
|
23
|
+
# first 23 bytes are file-magic, we can skip
|
|
24
|
+
io.seek(23, IO::SEEK_SET)
|
|
25
|
+
while !siz_found && !buffer.nil?
|
|
26
|
+
# read one byte at a time, until we hit marker start 0xFF
|
|
27
|
+
buffer = io.read(1) while buffer != TOKEN_MARKER_START
|
|
28
|
+
# - on 0xFF read subsequent byte; if value != 0x51, continue
|
|
29
|
+
buffer = io.read(1)
|
|
30
|
+
next if buffer != TOKEN_MARKER_SIZ
|
|
31
|
+
# - on 0x51, read next 12 bytes
|
|
32
|
+
buffer = io.read(12)
|
|
33
|
+
siz_found = true
|
|
34
|
+
end
|
|
35
|
+
# discard first 4 bytes; next 4 bytes are XSiz; last 4 bytes are YSiz
|
|
36
|
+
x_siz = buffer.byteslice(4, 4).unpack('N').first
|
|
37
|
+
y_siz = buffer.byteslice(8, 4).unpack('N').first
|
|
38
|
+
[x_siz, y_siz]
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param io [IO] IO stream opened in binary mode, for reading
|
|
42
|
+
# @return [Array(Integer, Integer)] number components, bits-per-component
|
|
43
|
+
def extract_jp2_components(io)
|
|
44
|
+
raise IOError, 'file not open in binary mode' unless io.binmode?
|
|
45
|
+
io.seek(0, IO::SEEK_SET)
|
|
46
|
+
# IHDR should be in first 64 bytes
|
|
47
|
+
buffer = io.read(64)
|
|
48
|
+
ihdr_data = buffer.split(TOKEN_IHDR)[-1]
|
|
49
|
+
raise IOError if ihdr_data.nil?
|
|
50
|
+
num_components = ihdr_data.byteslice(8, 2).unpack('n').first
|
|
51
|
+
# stored as "bit depth of the components in the codestream, minus 1", so add 1
|
|
52
|
+
bits_per_component = ihdr_data.byteslice(10, 1).unpack('c').first + 1
|
|
53
|
+
[num_components, bits_per_component]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def validate_jp2(io)
|
|
57
|
+
# verify file is jp2
|
|
58
|
+
magic = io.read(23)
|
|
59
|
+
raise IOError, 'Not JP2 file' unless magic.end_with?('ftypjp2')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# @param path [String] path to jp2, for reading
|
|
63
|
+
# @return [Hash] hash
|
|
64
|
+
def technical_metadata
|
|
65
|
+
io = File.open(path, 'rb')
|
|
66
|
+
io.seek(0, IO::SEEK_SET)
|
|
67
|
+
validate_jp2(io)
|
|
68
|
+
x_siz, y_siz = extract_jp2_dim(io)
|
|
69
|
+
nc, bpc = extract_jp2_components(io)
|
|
70
|
+
color = nc >= 3 ? 'color' : 'gray'
|
|
71
|
+
io.close
|
|
72
|
+
{
|
|
73
|
+
color: bpc == 1 ? 'monochrome' : color,
|
|
74
|
+
num_components: nc,
|
|
75
|
+
bits_per_component: bpc,
|
|
76
|
+
width: x_siz,
|
|
77
|
+
height: y_siz
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
module IiifPrint
|
|
2
|
+
# The purpose of this module is to encode lineage related services:
|
|
3
|
+
#
|
|
4
|
+
# - {.ancestor_ids_for}
|
|
5
|
+
# - {.descendent_file_set_ids_for}
|
|
6
|
+
#
|
|
7
|
+
# The ancestor and descendent_file_sets are useful for ensuring we index together related items.
|
|
8
|
+
# For example, when I have a work that is a book, and one file set per page of that book, when I
|
|
9
|
+
# search the book I want to find the text within the given book's pages.
|
|
10
|
+
#
|
|
11
|
+
# The methods of this module should be considered as defining an interface.
|
|
12
|
+
module LineageService
|
|
13
|
+
##
|
|
14
|
+
# @api public
|
|
15
|
+
#
|
|
16
|
+
# @param object [#in_works] An object that responds to #in_works
|
|
17
|
+
# @return [Array<String>]
|
|
18
|
+
def self.ancestor_ids_for(object)
|
|
19
|
+
ancestor_ids ||= []
|
|
20
|
+
object.in_works.each do |work|
|
|
21
|
+
ancestor_ids << work.id
|
|
22
|
+
ancestor_ids += ancestor_ids_for(work) if work.is_child
|
|
23
|
+
end
|
|
24
|
+
ancestor_ids.flatten.compact.uniq
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
##
|
|
28
|
+
# @param object [#ordered_works, #file_sets, #member_ids]
|
|
29
|
+
# @return [Array<String>] the ids of associated file sets
|
|
30
|
+
def self.descendent_file_set_ids_for(object)
|
|
31
|
+
# enables us to return parents when searching for child OCR
|
|
32
|
+
file_set_ids = object.file_sets.map(&:id)
|
|
33
|
+
object.ordered_works&.each do |child|
|
|
34
|
+
file_set_ids += descendent_file_set_ids_for(child)
|
|
35
|
+
end
|
|
36
|
+
# enables us to return parents when searching for child metadata
|
|
37
|
+
file_set_ids += object.member_ids
|
|
38
|
+
file_set_ids.flatten.uniq.compact
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|