iiif_print 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.env +5 -0
- data/.fcrepo_wrapper +4 -0
- data/.github/release.yml +20 -0
- data/.github/workflows/branches.yml +24 -0
- data/.github/workflows/build-lint-test-action.yaml +33 -0
- data/.github/workflows/release_labels.yml +25 -0
- data/.gitignore +52 -0
- data/.rubocop.yml +177 -0
- data/.solr_wrapper +8 -0
- data/.travis.yml +49 -0
- data/CONTRIBUTING.md +181 -0
- data/Dockerfile +15 -0
- data/Gemfile +52 -0
- data/LICENSE +203 -0
- data/README.md +203 -0
- data/Rakefile +38 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
- data/app/assets/config/iiif_print_manifest.js +2 -0
- data/app/assets/images/iiif_print/.keep +0 -0
- data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
- data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
- data/app/assets/javascripts/iiif_print.js +3 -0
- data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
- data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
- data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
- data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
- data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
- data/app/helpers/hyrax/iiif_helper.rb +22 -0
- data/app/helpers/iiif_print/application_helper.rb +5 -0
- data/app/helpers/iiif_print_helper.rb +64 -0
- data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
- data/app/mailers/iiif_print/application_mailer.rb +8 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
- data/app/models/concerns/iiif_print/solr/document.rb +47 -0
- data/app/models/iiif_print/application_record.rb +6 -0
- data/app/models/iiif_print/derivative_attachment.rb +8 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
- data/app/models/iiif_print/ingest_file_relation.rb +14 -0
- data/app/models/iiif_print/pending_relationship.rb +7 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
- data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
- data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
- data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
- data/app/views/catalog/_snippets_more.html.erb +16 -0
- data/app/views/hyrax/base/_representative_media.html.erb +9 -0
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
- data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
- data/bin/rails +13 -0
- data/config/fcrepo_wrapper_test.yml +5 -0
- data/config/initializers/assets.rb +2 -0
- data/config/locales/iiif_print.de.yml +148 -0
- data/config/locales/iiif_print.en.yml +119 -0
- data/config/locales/iiif_print.es.yml +148 -0
- data/config/locales/iiif_print.fr.yml +149 -0
- data/config/locales/iiif_print.it.yml +142 -0
- data/config/locales/iiif_print.pt-BR.yml +148 -0
- data/config/locales/iiif_print.zh.yml +142 -0
- data/config/solr_wrapper_test.yml +9 -0
- data/config/test-fixture/solr-config/_rest_managed.json +3 -0
- data/config/test-fixture/solr-config/admin-extra.html +31 -0
- data/config/test-fixture/solr-config/elevate.xml +36 -0
- data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
- data/config/test-fixture/solr-config/protwords.txt +21 -0
- data/config/test-fixture/solr-config/schema.xml +366 -0
- data/config/test-fixture/solr-config/scripts.conf +24 -0
- data/config/test-fixture/solr-config/solrconfig.xml +322 -0
- data/config/test-fixture/solr-config/spellings.txt +2 -0
- data/config/test-fixture/solr-config/stopwords.txt +58 -0
- data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
- data/config/test-fixture/solr-config/synonyms.txt +31 -0
- data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
- data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
- data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
- data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
- data/config/vendor/fits.xml +55 -0
- data/config/vendor/imagemagick-6-policy.xml +76 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
- data/docker-compose.yml +129 -0
- data/iiif_print.gemspec +43 -0
- data/lib/generators/iiif_print/assets_generator.rb +29 -0
- data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
- data/lib/generators/iiif_print/install_generator.rb +52 -0
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
- data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
- data/lib/iiif_print/base_derivative_service.rb +113 -0
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
- data/lib/iiif_print/catalog_search_builder.rb +31 -0
- data/lib/iiif_print/configuration.rb +99 -0
- data/lib/iiif_print/data/fileset_helper.rb +25 -0
- data/lib/iiif_print/data/path_helper.rb +40 -0
- data/lib/iiif_print/data/work_derivatives.rb +323 -0
- data/lib/iiif_print/data/work_file.rb +92 -0
- data/lib/iiif_print/data/work_files.rb +199 -0
- data/lib/iiif_print/data.rb +35 -0
- data/lib/iiif_print/engine.rb +77 -0
- data/lib/iiif_print/errors.rb +9 -0
- data/lib/iiif_print/image_tool.rb +119 -0
- data/lib/iiif_print/jobs/application_job.rb +8 -0
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
- data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
- data/lib/iiif_print/jp2_derivative_service.rb +118 -0
- data/lib/iiif_print/jp2_image_metadata.rb +81 -0
- data/lib/iiif_print/lineage_service.rb +41 -0
- data/lib/iiif_print/metadata.rb +125 -0
- data/lib/iiif_print/pdf_derivative_service.rb +42 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
- data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
- data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
- data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
- data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
- data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
- data/lib/iiif_print/text_extraction.rb +11 -0
- data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
- data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
- data/lib/iiif_print/tiff_derivative_service.rb +50 -0
- data/lib/iiif_print/version.rb +3 -0
- data/lib/iiif_print/works_controller_behavior.rb +9 -0
- data/lib/iiif_print.rb +136 -0
- data/lib/tasks/set_child_works.rake +22 -0
- data/spec/.keep.txt +1 -0
- data/spec/factories/ability.rb +6 -0
- data/spec/factories/newspaper_issue.rb +7 -0
- data/spec/factories/newspaper_page.rb +7 -0
- data/spec/factories/newspaper_page_solr_document.rb +12 -0
- data/spec/factories/newspaper_title.rb +8 -0
- data/spec/factories/uploaded_pdf_file.rb +9 -0
- data/spec/factories/uploaded_txt_file.rb +9 -0
- data/spec/factories/user.rb +13 -0
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +7 -0
- data/spec/fixtures/files/alto-2-0.xsd +714 -0
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +16 -0
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +31 -0
- data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
- data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
- data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +202 -0
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
- data/spec/helpers/iiif_print_helper_spec.rb +43 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
- data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
- data/spec/iiif_print/configuration_spec.rb +67 -0
- data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
- data/spec/iiif_print/data/work_file_spec.rb +99 -0
- data/spec/iiif_print/data/work_files_spec.rb +237 -0
- data/spec/iiif_print/image_tool_spec.rb +109 -0
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
- data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
- data/spec/iiif_print/lineage_service_spec.rb +13 -0
- data/spec/iiif_print/metadata_spec.rb +115 -0
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
- data/spec/iiif_print_spec.rb +51 -0
- data/spec/misc_shared.rb +111 -0
- data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
- data/spec/models/solr_document_spec.rb +14 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
- data/spec/spec_helper.rb +181 -0
- data/spec/support/controller_level_helpers.rb +28 -0
- data/spec/support/iiif_print_models.rb +127 -0
- data/spec/test_app_templates/blacklight.yml +9 -0
- data/spec/test_app_templates/fedora.yml +15 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/test_app_templates/redis.yml +9 -0
- data/spec/test_app_templates/solr/conf/schema.xml +362 -0
- data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
- data/spec/test_app_templates/solr.yml +7 -0
- data/tasks/iiif_print_dev.rake +34 -0
- data/tmp/.keep +0 -0
- metadata +605 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module IiifPrint
|
4
|
+
module Data
|
5
|
+
# Mixin for methods related to paths on filesystem
|
6
|
+
module PathHelper
|
7
|
+
def normalize_path(path)
|
8
|
+
path = path.to_s
|
9
|
+
isuri?(path) ? path : File.expand_path(path)
|
10
|
+
end
|
11
|
+
|
12
|
+
def isuri?(path)
|
13
|
+
!path.scan(URI.regexp).empty?
|
14
|
+
end
|
15
|
+
|
16
|
+
def path_to_uri(path)
|
17
|
+
isuri?(path) ? path : "file://#{path}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def registered_ingest_path(path)
|
21
|
+
IiifPrint.config.registered_ingest_dirs.any? do |dir|
|
22
|
+
path.start_with?(dir) && path.length > dir.length
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def validate_path(path)
|
27
|
+
# treat file URIs equivalent to local paths
|
28
|
+
path = File.expand_path(path.sub(/^file:\/\//, ''))
|
29
|
+
# make sure file exists
|
30
|
+
raise IOError, "Not found: #{path}" unless File.exist?(path)
|
31
|
+
return if registered_ingest_path(path)
|
32
|
+
# we cannot use path if it is not in the registered list for Hyrax ingest, we
|
33
|
+
# would prefer to fail early vs. later+silently
|
34
|
+
raise SecurityError,
|
35
|
+
"Path specified is not configured in Hyrax ingest registered list: " \
|
36
|
+
"#{path}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,323 @@
|
|
1
|
+
require 'hyrax'
|
2
|
+
|
3
|
+
module IiifPrint
|
4
|
+
module Data
|
5
|
+
# TODO: consider compositional refactoring (not mixins), but this
|
6
|
+
# may make readability/comprehendability higher, and yield
|
7
|
+
# higher applied/practical complexity.
|
8
|
+
class WorkDerivatives
|
9
|
+
include IiifPrint::Data::FilesetHelper
|
10
|
+
include IiifPrint::Data::PathHelper
|
11
|
+
|
12
|
+
# Work is primary adapted context
|
13
|
+
# @return [ActiveFedora::Base] Hyrax work-type object
|
14
|
+
attr_accessor :work
|
15
|
+
|
16
|
+
# FileSet is secondary adapted context
|
17
|
+
# @return [FileSet] fileset for work, with regard to these derivatives
|
18
|
+
attr_accessor :fileset
|
19
|
+
|
20
|
+
# Parent pointer to WorkFile object representing fileset
|
21
|
+
# @return [IiifPrint::Data::WorkFile] WorkFile for fileset, work pair
|
22
|
+
attr_accessor :parent
|
23
|
+
|
24
|
+
# Assigned attachment queue (of paths)
|
25
|
+
# @return [Array<String>] list of paths queued for attachment
|
26
|
+
attr_accessor :assigned
|
27
|
+
|
28
|
+
# Assigned deletion queue (of destination names)
|
29
|
+
# @return [Array<String>] list of destination names queued for deletion
|
30
|
+
attr_accessor :unassigned
|
31
|
+
|
32
|
+
# mapping of special names Hyrax uses for derivatives, not extension:
|
33
|
+
@remap_names = {
|
34
|
+
'jpeg' => 'thumbnail'
|
35
|
+
}
|
36
|
+
class << self
|
37
|
+
attr_accessor :remap_names
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param from [Object] the work from which we'll extract the given type of data.
|
41
|
+
# @param of_type [String] the type of data we want extracted from the work (e.g. "txt", "json")
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
def self.data(from:, of_type:)
|
45
|
+
new(from).data(of_type)
|
46
|
+
end
|
47
|
+
|
48
|
+
# alternate constructor spelling:
|
49
|
+
def self.of(work, fileset = nil, parent = nil)
|
50
|
+
new(work, fileset, parent)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Adapt work and either specific or first fileset
|
54
|
+
def initialize(work, fileset = nil, parent = nil)
|
55
|
+
# adapted context usually work, may be string id of FileSet
|
56
|
+
@work = work
|
57
|
+
@fileset = fileset.nil? ? first_fileset : fileset
|
58
|
+
# computed name-to-path mapping, initially nil as sentinel for JIT load
|
59
|
+
@paths = nil
|
60
|
+
# assignments for attachment
|
61
|
+
@assigned = []
|
62
|
+
# un-assignments for deletion
|
63
|
+
@unassigned = []
|
64
|
+
# parent is IiifPrint::Data::WorkFile object for derivatives
|
65
|
+
@parent = parent
|
66
|
+
end
|
67
|
+
|
68
|
+
# Assignment state
|
69
|
+
# @return [String] A label describing the state of assignment queues
|
70
|
+
def state
|
71
|
+
load_paths
|
72
|
+
return 'dirty' unless @unassigned.empty? && @assigned.empty?
|
73
|
+
return 'empty' if @paths.keys.empty?
|
74
|
+
'saved'
|
75
|
+
end
|
76
|
+
|
77
|
+
# Assign a path to assigned queue for attachment
|
78
|
+
# @param path [String] Path to source file
|
79
|
+
def assign(path)
|
80
|
+
path = normalize_path(path)
|
81
|
+
validate_path(path)
|
82
|
+
@assigned.push(path)
|
83
|
+
# We are keeping assignment both in ephemeral, transient @assigned
|
84
|
+
# and mirroring to db to share context with other components:
|
85
|
+
log_assignment(path, path_destination_name(path))
|
86
|
+
end
|
87
|
+
|
88
|
+
# Assign a destination name to unassigned queue for deletion -- OR --
|
89
|
+
# remove a path from queue of assigned items
|
90
|
+
# @param name [String] Destination name (file extension), or source path
|
91
|
+
def unassign(name)
|
92
|
+
# if name is queued path, remove from @assigned queue:
|
93
|
+
if @assigned.include?(name)
|
94
|
+
@assigned.delete(name)
|
95
|
+
unlog_assignment(name, path_destination_name(name))
|
96
|
+
end
|
97
|
+
# if name is known destination name, remove
|
98
|
+
@unassigned.push(name) if exist?(name)
|
99
|
+
end
|
100
|
+
|
101
|
+
# commit pending changes to work files
|
102
|
+
# beginning with removals, then with new assignments
|
103
|
+
def commit!
|
104
|
+
@unassigned.each { |name| delete(name) }
|
105
|
+
@assigned.each do |path|
|
106
|
+
attach(path, path_destination_name(path))
|
107
|
+
end
|
108
|
+
# reset queues after work is complete
|
109
|
+
@assigned = []
|
110
|
+
@unassigned = []
|
111
|
+
end
|
112
|
+
|
113
|
+
# Given a fileset meeting both of the following conditions:
|
114
|
+
# 1. a non-nil import_url value;
|
115
|
+
# 2. is attached to a work (persisted in Fedora, if not yet in Solr)...
|
116
|
+
# ...this method gets associated derivative paths queued and attach all.
|
117
|
+
# @param file_set [FileSet] saved file set, attached to work,
|
118
|
+
# with identifier, and a non-nil import_url
|
119
|
+
def commit_queued!(file_set)
|
120
|
+
raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil?
|
121
|
+
import_path = file_url_to_path(file_set.import_url)
|
122
|
+
work = file_set.member_of.select(&:work?)[0]
|
123
|
+
raise ArgumentError, 'Work not found for fileset' if work.nil?
|
124
|
+
derivatives = WorkDerivatives.of(work, file_set)
|
125
|
+
IngestFileRelation.derivatives_for_file(import_path).each do |path|
|
126
|
+
next unless File.exist?(path)
|
127
|
+
attachment_record = DerivativeAttachment.where(path: path).first
|
128
|
+
derivatives.attach(path, attachment_record.destination_name)
|
129
|
+
# update previously nil fileset id
|
130
|
+
attachment_record.fileset_id = file_set.id
|
131
|
+
attachment_record.save!
|
132
|
+
end
|
133
|
+
@fileset ||= file_set
|
134
|
+
load_paths
|
135
|
+
end
|
136
|
+
|
137
|
+
# attach a single derivative file to work
|
138
|
+
# @param file [String, IO] path to file or IO object
|
139
|
+
# @param name [String] destination name, usually file extension
|
140
|
+
def attach(file, name)
|
141
|
+
raise 'Cannot save for nil fileset' if fileset.nil?
|
142
|
+
mkdir_pairtree
|
143
|
+
path = path_factory.derivative_path_for_reference(fileset, name)
|
144
|
+
# if file argument is path, copy file
|
145
|
+
if file.is_a? String
|
146
|
+
FileUtils.copy(file, path)
|
147
|
+
else
|
148
|
+
# otherwise, presume file is an IO, read, write it
|
149
|
+
# note: does not close input file/IO, presume that is caller's
|
150
|
+
# responsibility.
|
151
|
+
orig_pos = file.tell
|
152
|
+
file.seek(0)
|
153
|
+
File.open(path, 'w') { |dstfile| dstfile.write(file.read) }
|
154
|
+
file.seek(orig_pos)
|
155
|
+
end
|
156
|
+
# finally, reload @paths after mutation
|
157
|
+
load_paths
|
158
|
+
end
|
159
|
+
|
160
|
+
# Delete a derivative file from work, by destination name
|
161
|
+
# @param name [String] destination name, usually file extension
|
162
|
+
def delete(name, force: nil)
|
163
|
+
raise 'Cannot save for nil fileset' if fileset.nil?
|
164
|
+
path = path_factory.derivative_path_for_reference(fileset, name)
|
165
|
+
# will remove file, if it exists; won't remove pairtree, even
|
166
|
+
# if it becomes empty, as that is excess scope.
|
167
|
+
FileUtils.rm(path, force: force) if File.exist?(path)
|
168
|
+
# finally, reload @paths after mutation
|
169
|
+
load_paths
|
170
|
+
end
|
171
|
+
|
172
|
+
# Load all paths/names to @paths once, upon first access
|
173
|
+
def load_paths
|
174
|
+
fsid = fileset_id
|
175
|
+
if fsid.nil?
|
176
|
+
@paths = {}
|
177
|
+
return
|
178
|
+
end
|
179
|
+
# list of paths
|
180
|
+
paths = path_factory.derivatives_for_reference(fsid)
|
181
|
+
# names from paths
|
182
|
+
@paths = paths.map { |e| [path_destination_name(e), e] }.to_h
|
183
|
+
end
|
184
|
+
|
185
|
+
# path to existing derivative file for destination name
|
186
|
+
# @param name [String] destination name, usually file extension
|
187
|
+
# @return [String, NilClass] path (or nil)
|
188
|
+
def path(name)
|
189
|
+
load_paths if @paths.nil?
|
190
|
+
result = @paths[name]
|
191
|
+
return if result.nil?
|
192
|
+
File.exist?(result) ? result : nil
|
193
|
+
end
|
194
|
+
|
195
|
+
# Run a block in context of the opened derivative file for reading
|
196
|
+
# @param name [String] destination name, usually file extension
|
197
|
+
# @param block [Proc] block/proc to run in context of file IO
|
198
|
+
def with_io(name, &block)
|
199
|
+
mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb'
|
200
|
+
filepath = path(name)
|
201
|
+
return if filepath.nil?
|
202
|
+
File.open(filepath, mode, &block)
|
203
|
+
end
|
204
|
+
|
205
|
+
# Get number of derivatives or, if a destination name argument
|
206
|
+
# is provided, the size of derivative file
|
207
|
+
# @param name [String] optional destination name, usually file extension
|
208
|
+
# @return [Integer] size in bytes
|
209
|
+
def size(name = nil)
|
210
|
+
load_paths if @paths.nil?
|
211
|
+
return @paths.size if name.nil?
|
212
|
+
File.size(@paths[name])
|
213
|
+
end
|
214
|
+
|
215
|
+
# Check if derivative file exists for destination name
|
216
|
+
# @param name [String] optional destination name, usually file extension
|
217
|
+
# @return [TrueClass, FalseClass] boolean
|
218
|
+
def exist?(name)
|
219
|
+
# TODO: It is unclear where the #keys and and #[] methods are coming from. There's @paths.keys referenced in this code.
|
220
|
+
keys.include?(name) && File.exist?(self[name])
|
221
|
+
end
|
222
|
+
|
223
|
+
# Get raw binary or encoded text data of file as a String
|
224
|
+
# @param name [String] destination name, usually file extension
|
225
|
+
# @return [String] Raw bytes, or if text file, a UTF-8 encoded String
|
226
|
+
def data(name)
|
227
|
+
result = ''
|
228
|
+
with_io(name) do |io|
|
229
|
+
result += io.read
|
230
|
+
end
|
231
|
+
result
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
|
236
|
+
def primary_file_path
|
237
|
+
if fileset.nil?
|
238
|
+
# if there is a nil fileset, we look for *intent* in the form
|
239
|
+
# of the first assigned file path for single-file work.
|
240
|
+
work_file = parent
|
241
|
+
return if work_file.nil?
|
242
|
+
work_files = work_file.parent
|
243
|
+
return if work_files.nil?
|
244
|
+
work_files.assigned[0]
|
245
|
+
else
|
246
|
+
file_url_to_path(fileset.import_url) unless fileset.import_url.nil?
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def file_url_to_path(url)
|
251
|
+
url.gsub('file://', '')
|
252
|
+
end
|
253
|
+
|
254
|
+
def log_primary_file_relation(path)
|
255
|
+
file_path = primary_file_path
|
256
|
+
return if file_path.nil?
|
257
|
+
IiifPrint::IngestFileRelation.create!(
|
258
|
+
file_path: file_path,
|
259
|
+
derivative_path: path
|
260
|
+
)
|
261
|
+
end
|
262
|
+
|
263
|
+
def log_assignment(path, name)
|
264
|
+
IiifPrint::DerivativeAttachment.create!(
|
265
|
+
fileset_id: fileset_id,
|
266
|
+
path: path,
|
267
|
+
destination_name: name
|
268
|
+
)
|
269
|
+
log_primary_file_relation(path)
|
270
|
+
end
|
271
|
+
|
272
|
+
def unlog_assignment(path, name)
|
273
|
+
if fileset_id.nil?
|
274
|
+
IiifPrint::DerivativeAttachment.where(
|
275
|
+
path: path,
|
276
|
+
destination_name: name
|
277
|
+
).destroy_all
|
278
|
+
else
|
279
|
+
IiifPrint::DerivativeAttachment.where(
|
280
|
+
fileset_id: fileset_id,
|
281
|
+
path: path,
|
282
|
+
destination_name: name
|
283
|
+
).destroy_all
|
284
|
+
end
|
285
|
+
# note: there is deliberately no attempt to "unlog" primary
|
286
|
+
# file relation, as leaving it should have no side-effect.
|
287
|
+
end
|
288
|
+
|
289
|
+
def path_destination_name(path)
|
290
|
+
ext = path.split('.')[-1]
|
291
|
+
self.class.remap_names[ext] || ext
|
292
|
+
end
|
293
|
+
|
294
|
+
def respond_to_missing?(symbol, include_priv = false)
|
295
|
+
{}.respond_to?(symbol, include_priv)
|
296
|
+
end
|
297
|
+
|
298
|
+
def method_missing(method, *args, &block)
|
299
|
+
# if we proxy mapping/hash enumertion methods,
|
300
|
+
# make sure @paths loaded, then proxy to it.
|
301
|
+
if respond_to_missing?(method)
|
302
|
+
load_paths if @paths.nil?
|
303
|
+
return @paths.send(method, *args, &block)
|
304
|
+
end
|
305
|
+
super
|
306
|
+
end
|
307
|
+
|
308
|
+
def path_factory
|
309
|
+
Hyrax::DerivativePath
|
310
|
+
end
|
311
|
+
|
312
|
+
# make shared path for derivatives to live, given
|
313
|
+
def mkdir_pairtree
|
314
|
+
# Hyrax::DerivativePath has no public method to directly get the
|
315
|
+
# bare pairtree path for derivatives for a fileset, but we
|
316
|
+
# can infer it...
|
317
|
+
path = path_factory.derivative_path_for_reference(fileset, '')
|
318
|
+
dir = File.join(path.split('/')[0..-2])
|
319
|
+
FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
|
3
|
+
require 'hyrax'
|
4
|
+
|
5
|
+
module IiifPrint
|
6
|
+
module Data
|
7
|
+
# WorkFile is a read-only convenience wrapper for just-in-time
|
8
|
+
# file operations, and is the type of values returned by
|
9
|
+
# IiifPrint::Data::WorkFiles (container) adapter.
|
10
|
+
class WorkFile
|
11
|
+
# accessors for adaptation relationships:
|
12
|
+
attr_accessor :work, :parent, :fileset
|
13
|
+
# delegate these metadata properties to @fileset.original_file:
|
14
|
+
delegate :size, :date_created, :date_modified, :mime_type, to: :unwrapped
|
15
|
+
|
16
|
+
# alternate constructor spelling:
|
17
|
+
def self.of(work, fileset = nil, parent = nil)
|
18
|
+
new(work, fileset, parent)
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(work, fileset = nil, parent = nil)
|
22
|
+
@work = work
|
23
|
+
# If fileset is nil, presume *first* fileset of work, as in
|
24
|
+
# the single-file-per-work use-case:
|
25
|
+
@fileset = fileset
|
26
|
+
# Parent is WorkFiles (container) object, if applciable:
|
27
|
+
@parent = parent
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get original repository object representing file (not fileset).
|
31
|
+
# @return [ActiveFedora::File] repository file persistence object
|
32
|
+
def unwrapped
|
33
|
+
return nil if @fileset.nil?
|
34
|
+
@fileset.original_file
|
35
|
+
end
|
36
|
+
|
37
|
+
def ==(other)
|
38
|
+
return false if @fileset.nil?
|
39
|
+
unwrapped.id == other.unwrapped.id
|
40
|
+
end
|
41
|
+
|
42
|
+
# Get path to working copy of file on local filesystem;
|
43
|
+
# checkout file from repository/source as needed.
|
44
|
+
# @return [String] path to working copy of binary
|
45
|
+
def path
|
46
|
+
return nil if @fileset.nil?
|
47
|
+
checkout
|
48
|
+
end
|
49
|
+
|
50
|
+
# Read data from working copy of file on local filesystem;
|
51
|
+
# checkout file from repository/source as needed.
|
52
|
+
# @return [String] byte data of binary/file payload
|
53
|
+
def data
|
54
|
+
return '' if @fileset.nil?
|
55
|
+
File.read(path, mode: 'rb')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Run block/proc upon data of file;
|
59
|
+
# checkout file from repository/source as needed.
|
60
|
+
# @yield [io] read-only IO or File object to block/proc.
|
61
|
+
def with_io(&block)
|
62
|
+
filepath = path
|
63
|
+
return if filepath.nil?
|
64
|
+
File.open(filepath, 'rb', &block)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Get filename from stored metadata
|
68
|
+
# @return [String] file name stored in repository metadata for file
|
69
|
+
def name
|
70
|
+
return nil if @fileset.nil?
|
71
|
+
unwrapped.original_name
|
72
|
+
end
|
73
|
+
|
74
|
+
# Derivatives for fileset associated with this primary file object
|
75
|
+
# @return [IiifPrint::Data::WorkDerviatives] derivatives adapter
|
76
|
+
def derivatives
|
77
|
+
IiifPrint::Data::WorkDerivatives.of(work, fileset, self)
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def checkout
|
83
|
+
file = @fileset.original_file
|
84
|
+
# find_or_retrieve returns path to working copy, but only
|
85
|
+
# fetches from Fedora if no working copy exists on filesystem.
|
86
|
+
# NOTE: there may be some benefit to memoizing to avoid
|
87
|
+
# call and File.exist? IO operation, but YAGNI for now.
|
88
|
+
Hyrax::WorkingDirectory.find_or_retrieve(file.id, @fileset.id)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
module Data
|
3
|
+
class WorkFiles
|
4
|
+
include IiifPrint::Data::PathHelper
|
5
|
+
|
6
|
+
attr_accessor :work, :assigned, :unassigned
|
7
|
+
delegate :include?, to: :keys
|
8
|
+
|
9
|
+
# alternate constructor spelling:
|
10
|
+
def self.of(work)
|
11
|
+
new(work)
|
12
|
+
end
|
13
|
+
|
14
|
+
##
|
15
|
+
# A convenience method to associate files (original and derivatives) to the given work.
|
16
|
+
#
|
17
|
+
# @param to [Object] the work to which we're assigning the file(s) for the given paths.
|
18
|
+
# @param path [String] the path of the file we're assignging to the given work.
|
19
|
+
# @param derivative_paths [Array<String>] the path(s) to derivatives we'll assign to the given
|
20
|
+
# work.
|
21
|
+
# @param commit [Boolean] when true, commit the changes to the attachment.
|
22
|
+
# @return void
|
23
|
+
def self.assign!(to:, path:, derivative_paths: [], commit: true)
|
24
|
+
attachment = new(to)
|
25
|
+
attachment.assign(path)
|
26
|
+
Array.wrap(derivative_paths).each do |derivative_path|
|
27
|
+
attachment.derivatives.assign(derivative_path)
|
28
|
+
end
|
29
|
+
attachment.commit! if commit
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(work)
|
33
|
+
@work = work
|
34
|
+
@assigned = []
|
35
|
+
@unassigned = []
|
36
|
+
@derivatives = nil
|
37
|
+
end
|
38
|
+
|
39
|
+
# Derivatives for specified fileset or first fileset found.
|
40
|
+
# The `WorkDerivatives` adapter as assign/commmit! semantics just
|
41
|
+
# like `WorkFiles`, and also acts like a hash/mapping of
|
42
|
+
# destination names (usually file extension) to path of saved
|
43
|
+
# derviative. Always returns same instance (memoized after first
|
44
|
+
# use) of `WorkDerivatives`.
|
45
|
+
# @return [IiifPrint::Data::WorkDerviatives] derivatives adapter
|
46
|
+
def derivatives(fileset: nil)
|
47
|
+
fileset ||= @fileset
|
48
|
+
return @derivatives unless @derivatives.nil?
|
49
|
+
if fileset.nil?
|
50
|
+
# for the deferred assignement case, we have no fileset yet...
|
51
|
+
work_file = IiifPrint::Data::WorkFile.of(work, nil, self)
|
52
|
+
return work_file.derivatives
|
53
|
+
end
|
54
|
+
# Otherwise, delegate actual construction to WorkFile.derivatives:
|
55
|
+
@derivatives = values[0].derivatives
|
56
|
+
end
|
57
|
+
|
58
|
+
# Assignment state
|
59
|
+
# @return [String] A label describing the state of assignment queues
|
60
|
+
def state
|
61
|
+
return 'dirty' unless @assigned.empty? && @unassigned.empty?
|
62
|
+
return 'empty' if keys.empty?
|
63
|
+
# TODO: implement 'pending' as intermediate state between 'dirty'
|
64
|
+
# and saved, where we look for saved state that matches what was
|
65
|
+
# previously assigned in THIS instance. We can only know that
|
66
|
+
# changes initiated by this instance in this thread are pending
|
67
|
+
# because there's no global storage for the assignment queue.
|
68
|
+
'saved'
|
69
|
+
end
|
70
|
+
|
71
|
+
# List of fileset (not file) id keys, presumes system like Hyrax
|
72
|
+
# is only keeping a 1:1 between fileset and contained PCDM file,
|
73
|
+
# because derivatives are not stored in the FileSet.
|
74
|
+
# @return [String] fileset ids
|
75
|
+
def keys
|
76
|
+
filesets.map(&:id)
|
77
|
+
end
|
78
|
+
|
79
|
+
# List of WorkFile for each primary file
|
80
|
+
# @return [Array<IiifPrint::Data::WorkFile>] adapter for persisted
|
81
|
+
# primary file
|
82
|
+
def values
|
83
|
+
keys.map(&method(:get))
|
84
|
+
end
|
85
|
+
|
86
|
+
# Array of [id, WorkFile] for each primary file
|
87
|
+
# @return [Array<Array>] key/value pairs for primary files of work
|
88
|
+
def entries
|
89
|
+
filesets.map { |fs| [fs.id, self[fs.id]] }
|
90
|
+
end
|
91
|
+
|
92
|
+
# List of local file names for attachments, based on original ingested
|
93
|
+
# or uploaded file name.
|
94
|
+
# @return [Array<String>]
|
95
|
+
def names
|
96
|
+
filesets.map(&method(:original_name))
|
97
|
+
end
|
98
|
+
|
99
|
+
# Get a WorkFile adapter representing primary file, either by name or id
|
100
|
+
# @param name_or_id [String] Fileset id or work-local file name
|
101
|
+
# @return [IiifPrint::Data::WorkFile] adapter for persisted
|
102
|
+
# primary file
|
103
|
+
def get(name_or_id)
|
104
|
+
return get_by_fileset_id(name_or_id) if keys.include?(name_or_id)
|
105
|
+
get_by_filename(name_or_id)
|
106
|
+
end
|
107
|
+
|
108
|
+
# Assign a path to assigned queue for attachment
|
109
|
+
# @param path [String] Path to source file
|
110
|
+
def assign(path)
|
111
|
+
path = normalize_path(path)
|
112
|
+
validate_path(path)
|
113
|
+
@assigned.push(path)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Assign a name or id to unassigned queue for deletion -- OR -- remove a
|
117
|
+
# path from queue of assigned items
|
118
|
+
# @param name_or_id [String] Fileset id, local file name, or source path
|
119
|
+
def unassign(name_or_id)
|
120
|
+
# if name_or_id is queued path, remove from @assigned queue:
|
121
|
+
@assigned.delete(name_or_id) if @assigned.include?(name_or_id)
|
122
|
+
# if name_or_id is known id or name, remove
|
123
|
+
@unassigned.push(name_or_id) if include?(name_or_id)
|
124
|
+
end
|
125
|
+
|
126
|
+
# commit pending changes to work files
|
127
|
+
# beginning with removals, then with new assignments
|
128
|
+
def commit!
|
129
|
+
commit_unassigned
|
130
|
+
commit_assigned
|
131
|
+
end
|
132
|
+
|
133
|
+
alias [] :get
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def get_by_fileset_id(id)
|
138
|
+
nil unless keys.include?(id)
|
139
|
+
fileset = FileSet.find(id)
|
140
|
+
IiifPrint::Data::WorkFile.of(work, fileset, self)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Get one WorkFile object based on filename in metadata
|
144
|
+
def get_by_filename(name)
|
145
|
+
r = filesets.select { |fs| original_name(fs) == name }
|
146
|
+
# checkout first match
|
147
|
+
r.empty? ? nil : IiifPrint::Data::WorkFile.of(work, r[0], self)
|
148
|
+
end
|
149
|
+
|
150
|
+
def original_name(fileset)
|
151
|
+
fileset.original_file.original_name
|
152
|
+
end
|
153
|
+
|
154
|
+
def filesets
|
155
|
+
# file sets with non-nil original file contained:
|
156
|
+
work.members.select { |m| m.is_a?(FileSet) && m.original_file }
|
157
|
+
end
|
158
|
+
|
159
|
+
def user
|
160
|
+
return User.find_by(email: work.depositor) unless work.depositor.nil?
|
161
|
+
defined?(current_user) ? current_user : User.batch_user
|
162
|
+
end
|
163
|
+
|
164
|
+
def ensure_depositor
|
165
|
+
return unless @work.depositor.nil?
|
166
|
+
@work.depositor = user.user_key
|
167
|
+
end
|
168
|
+
|
169
|
+
def commit_unassigned
|
170
|
+
# for each (name or) id to be removed from work, use actor to destroy
|
171
|
+
@unassigned.each do |id|
|
172
|
+
# "actor" here is simply a multi-adapter of Fileset, User
|
173
|
+
# Calling destroy will:
|
174
|
+
# 1. unlink fileset from work, and save work
|
175
|
+
# 2. Destroy fileset:
|
176
|
+
# - :before_destroy callback will delegate derivative cleanup
|
177
|
+
# to derivatives service component(s).
|
178
|
+
# - Remove fileset from storage/persistence layers
|
179
|
+
# - Invoke (logging or other) :after_destroy callback
|
180
|
+
Hyrax::Actors::FileSetActor.new(get(id).fileset, user).destroy
|
181
|
+
work.reload
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def commit_assigned
|
186
|
+
return if @assigned.blank?
|
187
|
+
ensure_depositor
|
188
|
+
remote_files = @assigned.map do |path|
|
189
|
+
{ url: path_to_uri(path), file_name: File.basename(path) }
|
190
|
+
end
|
191
|
+
attrs = { remote_files: remote_files }
|
192
|
+
# Create an environment for actor stack:
|
193
|
+
env = Hyrax::Actors::Environment.new(@work, Ability.new(user), attrs)
|
194
|
+
# Invoke default Hyrax actor stack middleware:
|
195
|
+
@work.new_record? ? Hyrax::CurationConcern.actor.create(env) : Hyrax::CurationConcern.actor.update(env)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'iiif_print/data/fileset_helper'
|
2
|
+
require 'iiif_print/data/path_helper'
|
3
|
+
require 'iiif_print/data/work_derivatives'
|
4
|
+
require 'iiif_print/data/work_files'
|
5
|
+
require 'iiif_print/data/work_file'
|
6
|
+
|
7
|
+
module IiifPrint
|
8
|
+
# Module for data access helper / adapter classes supporting, enhancing
|
9
|
+
# IiifPrint work models
|
10
|
+
module Data
|
11
|
+
# Handler for after_create_fileset, to be called by block subscribing to
|
12
|
+
# and overriding default Hyrax `:after_create_fileset` handler, via
|
13
|
+
# app integrating iiif_print.
|
14
|
+
def self.handle_after_create_fileset(file_set, user)
|
15
|
+
handle_queued_derivative_attachments(file_set)
|
16
|
+
# Hyrax queues this job by default, and since iiif_print
|
17
|
+
# overrides the single subscriber Hyrax uses to do so, we
|
18
|
+
# must call this here:
|
19
|
+
FileSetAttachedEventJob.perform_later(file_set, user)
|
20
|
+
work = file_set.member_of[0]
|
21
|
+
# Hyrax CreateWithRemoteFilesActor has glaring omission re: this job,
|
22
|
+
# so we call it here, once we have a fileset to copy permissions to.
|
23
|
+
InheritPermissionsJob.perform_later(work) unless work.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.handle_queued_derivative_attachments(file_set)
|
27
|
+
return if file_set.import_url.nil?
|
28
|
+
work = file_set.member_of.find(&:work?)
|
29
|
+
derivatives = IiifPrint::Data::WorkDerivatives.of(work)
|
30
|
+
# For now, becuase this is IO-bound operation, it makes sense to have
|
31
|
+
# this not be a job, but run inline:
|
32
|
+
derivatives.commit_queued!(file_set)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|