iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,40 @@
1
+ require 'uri'
2
+
3
+ module IiifPrint
4
+ module Data
5
+ # Mixin for methods related to paths on filesystem
6
+ module PathHelper
7
+ def normalize_path(path)
8
+ path = path.to_s
9
+ isuri?(path) ? path : File.expand_path(path)
10
+ end
11
+
12
+ def isuri?(path)
13
+ !path.scan(URI.regexp).empty?
14
+ end
15
+
16
+ def path_to_uri(path)
17
+ isuri?(path) ? path : "file://#{path}"
18
+ end
19
+
20
+ def registered_ingest_path(path)
21
+ IiifPrint.config.registered_ingest_dirs.any? do |dir|
22
+ path.start_with?(dir) && path.length > dir.length
23
+ end
24
+ end
25
+
26
+ def validate_path(path)
27
+ # treat file URIs equivalent to local paths
28
+ path = File.expand_path(path.sub(/^file:\/\//, ''))
29
+ # make sure file exists
30
+ raise IOError, "Not found: #{path}" unless File.exist?(path)
31
+ return if registered_ingest_path(path)
32
+ # we cannot use path if it is not in the registered list for Hyrax ingest, we
33
+ # would prefer to fail early vs. later+silently
34
+ raise SecurityError,
35
+ "Path specified is not configured in Hyrax ingest registered list: " \
36
+ "#{path}"
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,323 @@
1
+ require 'hyrax'
2
+
3
+ module IiifPrint
4
+ module Data
5
+ # TODO: consider compositional refactoring (not mixins), but this
6
+ # may make readability/comprehendability higher, and yield
7
+ # higher applied/practical complexity.
8
+ class WorkDerivatives
9
+ include IiifPrint::Data::FilesetHelper
10
+ include IiifPrint::Data::PathHelper
11
+
12
+ # Work is primary adapted context
13
+ # @return [ActiveFedora::Base] Hyrax work-type object
14
+ attr_accessor :work
15
+
16
+ # FileSet is secondary adapted context
17
+ # @return [FileSet] fileset for work, with regard to these derivatives
18
+ attr_accessor :fileset
19
+
20
+ # Parent pointer to WorkFile object representing fileset
21
+ # @return [IiifPrint::Data::WorkFile] WorkFile for fileset, work pair
22
+ attr_accessor :parent
23
+
24
+ # Assigned attachment queue (of paths)
25
+ # @return [Array<String>] list of paths queued for attachment
26
+ attr_accessor :assigned
27
+
28
+ # Assigned deletion queue (of destination names)
29
+ # @return [Array<String>] list of destination names queued for deletion
30
+ attr_accessor :unassigned
31
+
32
+ # mapping of special names Hyrax uses for derivatives, not extension:
33
+ @remap_names = {
34
+ 'jpeg' => 'thumbnail'
35
+ }
36
+ class << self
37
+ attr_accessor :remap_names
38
+ end
39
+
40
+ # @param from [Object] the work from which we'll extract the given type of data.
41
+ # @param of_type [String] the type of data we want extracted from the work (e.g. "txt", "json")
42
+ #
43
+ # @return [String]
44
+ def self.data(from:, of_type:)
45
+ new(from).data(of_type)
46
+ end
47
+
48
+ # alternate constructor spelling:
49
+ def self.of(work, fileset = nil, parent = nil)
50
+ new(work, fileset, parent)
51
+ end
52
+
53
+ # Adapt work and either specific or first fileset
54
+ def initialize(work, fileset = nil, parent = nil)
55
+ # adapted context usually work, may be string id of FileSet
56
+ @work = work
57
+ @fileset = fileset.nil? ? first_fileset : fileset
58
+ # computed name-to-path mapping, initially nil as sentinel for JIT load
59
+ @paths = nil
60
+ # assignments for attachment
61
+ @assigned = []
62
+ # un-assignments for deletion
63
+ @unassigned = []
64
+ # parent is IiifPrint::Data::WorkFile object for derivatives
65
+ @parent = parent
66
+ end
67
+
68
+ # Assignment state
69
+ # @return [String] A label describing the state of assignment queues
70
+ def state
71
+ load_paths
72
+ return 'dirty' unless @unassigned.empty? && @assigned.empty?
73
+ return 'empty' if @paths.keys.empty?
74
+ 'saved'
75
+ end
76
+
77
+ # Assign a path to assigned queue for attachment
78
+ # @param path [String] Path to source file
79
+ def assign(path)
80
+ path = normalize_path(path)
81
+ validate_path(path)
82
+ @assigned.push(path)
83
+ # We are keeping assignment both in ephemeral, transient @assigned
84
+ # and mirroring to db to share context with other components:
85
+ log_assignment(path, path_destination_name(path))
86
+ end
87
+
88
+ # Assign a destination name to unassigned queue for deletion -- OR --
89
+ # remove a path from queue of assigned items
90
+ # @param name [String] Destination name (file extension), or source path
91
+ def unassign(name)
92
+ # if name is queued path, remove from @assigned queue:
93
+ if @assigned.include?(name)
94
+ @assigned.delete(name)
95
+ unlog_assignment(name, path_destination_name(name))
96
+ end
97
+ # if name is known destination name, remove
98
+ @unassigned.push(name) if exist?(name)
99
+ end
100
+
101
+ # commit pending changes to work files
102
+ # beginning with removals, then with new assignments
103
+ def commit!
104
+ @unassigned.each { |name| delete(name) }
105
+ @assigned.each do |path|
106
+ attach(path, path_destination_name(path))
107
+ end
108
+ # reset queues after work is complete
109
+ @assigned = []
110
+ @unassigned = []
111
+ end
112
+
113
+ # Given a fileset meeting both of the following conditions:
114
+ # 1. a non-nil import_url value;
115
+ # 2. is attached to a work (persisted in Fedora, if not yet in Solr)...
116
+ # ...this method gets associated derivative paths queued and attach all.
117
+ # @param file_set [FileSet] saved file set, attached to work,
118
+ # with identifier, and a non-nil import_url
119
+ def commit_queued!(file_set)
120
+ raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil?
121
+ import_path = file_url_to_path(file_set.import_url)
122
+ work = file_set.member_of.select(&:work?)[0]
123
+ raise ArgumentError, 'Work not found for fileset' if work.nil?
124
+ derivatives = WorkDerivatives.of(work, file_set)
125
+ IngestFileRelation.derivatives_for_file(import_path).each do |path|
126
+ next unless File.exist?(path)
127
+ attachment_record = DerivativeAttachment.where(path: path).first
128
+ derivatives.attach(path, attachment_record.destination_name)
129
+ # update previously nil fileset id
130
+ attachment_record.fileset_id = file_set.id
131
+ attachment_record.save!
132
+ end
133
+ @fileset ||= file_set
134
+ load_paths
135
+ end
136
+
137
+ # attach a single derivative file to work
138
+ # @param file [String, IO] path to file or IO object
139
+ # @param name [String] destination name, usually file extension
140
+ def attach(file, name)
141
+ raise 'Cannot save for nil fileset' if fileset.nil?
142
+ mkdir_pairtree
143
+ path = path_factory.derivative_path_for_reference(fileset, name)
144
+ # if file argument is path, copy file
145
+ if file.is_a? String
146
+ FileUtils.copy(file, path)
147
+ else
148
+ # otherwise, presume file is an IO, read, write it
149
+ # note: does not close input file/IO, presume that is caller's
150
+ # responsibility.
151
+ orig_pos = file.tell
152
+ file.seek(0)
153
+ File.open(path, 'w') { |dstfile| dstfile.write(file.read) }
154
+ file.seek(orig_pos)
155
+ end
156
+ # finally, reload @paths after mutation
157
+ load_paths
158
+ end
159
+
160
+ # Delete a derivative file from work, by destination name
161
+ # @param name [String] destination name, usually file extension
162
+ def delete(name, force: nil)
163
+ raise 'Cannot save for nil fileset' if fileset.nil?
164
+ path = path_factory.derivative_path_for_reference(fileset, name)
165
+ # will remove file, if it exists; won't remove pairtree, even
166
+ # if it becomes empty, as that is excess scope.
167
+ FileUtils.rm(path, force: force) if File.exist?(path)
168
+ # finally, reload @paths after mutation
169
+ load_paths
170
+ end
171
+
172
+ # Load all paths/names to @paths once, upon first access
173
+ def load_paths
174
+ fsid = fileset_id
175
+ if fsid.nil?
176
+ @paths = {}
177
+ return
178
+ end
179
+ # list of paths
180
+ paths = path_factory.derivatives_for_reference(fsid)
181
+ # names from paths
182
+ @paths = paths.map { |e| [path_destination_name(e), e] }.to_h
183
+ end
184
+
185
+ # path to existing derivative file for destination name
186
+ # @param name [String] destination name, usually file extension
187
+ # @return [String, NilClass] path (or nil)
188
+ def path(name)
189
+ load_paths if @paths.nil?
190
+ result = @paths[name]
191
+ return if result.nil?
192
+ File.exist?(result) ? result : nil
193
+ end
194
+
195
+ # Run a block in context of the opened derivative file for reading
196
+ # @param name [String] destination name, usually file extension
197
+ # @param block [Proc] block/proc to run in context of file IO
198
+ def with_io(name, &block)
199
+ mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb'
200
+ filepath = path(name)
201
+ return if filepath.nil?
202
+ File.open(filepath, mode, &block)
203
+ end
204
+
205
+ # Get number of derivatives or, if a destination name argument
206
+ # is provided, the size of derivative file
207
+ # @param name [String] optional destination name, usually file extension
208
+ # @return [Integer] size in bytes
209
+ def size(name = nil)
210
+ load_paths if @paths.nil?
211
+ return @paths.size if name.nil?
212
+ File.size(@paths[name])
213
+ end
214
+
215
+ # Check if derivative file exists for destination name
216
+ # @param name [String] optional destination name, usually file extension
217
+ # @return [TrueClass, FalseClass] boolean
218
+ def exist?(name)
219
+ # TODO: It is unclear where the #keys and and #[] methods are coming from. There's @paths.keys referenced in this code.
220
+ keys.include?(name) && File.exist?(self[name])
221
+ end
222
+
223
+ # Get raw binary or encoded text data of file as a String
224
+ # @param name [String] destination name, usually file extension
225
+ # @return [String] Raw bytes, or if text file, a UTF-8 encoded String
226
+ def data(name)
227
+ result = ''
228
+ with_io(name) do |io|
229
+ result += io.read
230
+ end
231
+ result
232
+ end
233
+
234
+ private
235
+
236
+ def primary_file_path
237
+ if fileset.nil?
238
+ # if there is a nil fileset, we look for *intent* in the form
239
+ # of the first assigned file path for single-file work.
240
+ work_file = parent
241
+ return if work_file.nil?
242
+ work_files = work_file.parent
243
+ return if work_files.nil?
244
+ work_files.assigned[0]
245
+ else
246
+ file_url_to_path(fileset.import_url) unless fileset.import_url.nil?
247
+ end
248
+ end
249
+
250
+ def file_url_to_path(url)
251
+ url.gsub('file://', '')
252
+ end
253
+
254
+ def log_primary_file_relation(path)
255
+ file_path = primary_file_path
256
+ return if file_path.nil?
257
+ IiifPrint::IngestFileRelation.create!(
258
+ file_path: file_path,
259
+ derivative_path: path
260
+ )
261
+ end
262
+
263
+ def log_assignment(path, name)
264
+ IiifPrint::DerivativeAttachment.create!(
265
+ fileset_id: fileset_id,
266
+ path: path,
267
+ destination_name: name
268
+ )
269
+ log_primary_file_relation(path)
270
+ end
271
+
272
+ def unlog_assignment(path, name)
273
+ if fileset_id.nil?
274
+ IiifPrint::DerivativeAttachment.where(
275
+ path: path,
276
+ destination_name: name
277
+ ).destroy_all
278
+ else
279
+ IiifPrint::DerivativeAttachment.where(
280
+ fileset_id: fileset_id,
281
+ path: path,
282
+ destination_name: name
283
+ ).destroy_all
284
+ end
285
+ # note: there is deliberately no attempt to "unlog" primary
286
+ # file relation, as leaving it should have no side-effect.
287
+ end
288
+
289
+ def path_destination_name(path)
290
+ ext = path.split('.')[-1]
291
+ self.class.remap_names[ext] || ext
292
+ end
293
+
294
+ def respond_to_missing?(symbol, include_priv = false)
295
+ {}.respond_to?(symbol, include_priv)
296
+ end
297
+
298
+ def method_missing(method, *args, &block)
299
+ # if we proxy mapping/hash enumertion methods,
300
+ # make sure @paths loaded, then proxy to it.
301
+ if respond_to_missing?(method)
302
+ load_paths if @paths.nil?
303
+ return @paths.send(method, *args, &block)
304
+ end
305
+ super
306
+ end
307
+
308
+ def path_factory
309
+ Hyrax::DerivativePath
310
+ end
311
+
312
+ # make shared path for derivatives to live, given
313
+ def mkdir_pairtree
314
+ # Hyrax::DerivativePath has no public method to directly get the
315
+ # bare pairtree path for derivatives for a fileset, but we
316
+ # can infer it...
317
+ path = path_factory.derivative_path_for_reference(fileset, '')
318
+ dir = File.join(path.split('/')[0..-2])
319
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
320
+ end
321
+ end
322
+ end
323
+ end
@@ -0,0 +1,92 @@
1
+ # encoding=utf-8
2
+
3
+ require 'hyrax'
4
+
5
+ module IiifPrint
6
+ module Data
7
+ # WorkFile is a read-only convenience wrapper for just-in-time
8
+ # file operations, and is the type of values returned by
9
+ # IiifPrint::Data::WorkFiles (container) adapter.
10
+ class WorkFile
11
+ # accessors for adaptation relationships:
12
+ attr_accessor :work, :parent, :fileset
13
+ # delegate these metadata properties to @fileset.original_file:
14
+ delegate :size, :date_created, :date_modified, :mime_type, to: :unwrapped
15
+
16
+ # alternate constructor spelling:
17
+ def self.of(work, fileset = nil, parent = nil)
18
+ new(work, fileset, parent)
19
+ end
20
+
21
+ def initialize(work, fileset = nil, parent = nil)
22
+ @work = work
23
+ # If fileset is nil, presume *first* fileset of work, as in
24
+ # the single-file-per-work use-case:
25
+ @fileset = fileset
26
+ # Parent is WorkFiles (container) object, if applciable:
27
+ @parent = parent
28
+ end
29
+
30
+ # Get original repository object representing file (not fileset).
31
+ # @return [ActiveFedora::File] repository file persistence object
32
+ def unwrapped
33
+ return nil if @fileset.nil?
34
+ @fileset.original_file
35
+ end
36
+
37
+ def ==(other)
38
+ return false if @fileset.nil?
39
+ unwrapped.id == other.unwrapped.id
40
+ end
41
+
42
+ # Get path to working copy of file on local filesystem;
43
+ # checkout file from repository/source as needed.
44
+ # @return [String] path to working copy of binary
45
+ def path
46
+ return nil if @fileset.nil?
47
+ checkout
48
+ end
49
+
50
+ # Read data from working copy of file on local filesystem;
51
+ # checkout file from repository/source as needed.
52
+ # @return [String] byte data of binary/file payload
53
+ def data
54
+ return '' if @fileset.nil?
55
+ File.read(path, mode: 'rb')
56
+ end
57
+
58
+ # Run block/proc upon data of file;
59
+ # checkout file from repository/source as needed.
60
+ # @yield [io] read-only IO or File object to block/proc.
61
+ def with_io(&block)
62
+ filepath = path
63
+ return if filepath.nil?
64
+ File.open(filepath, 'rb', &block)
65
+ end
66
+
67
+ # Get filename from stored metadata
68
+ # @return [String] file name stored in repository metadata for file
69
+ def name
70
+ return nil if @fileset.nil?
71
+ unwrapped.original_name
72
+ end
73
+
74
+ # Derivatives for fileset associated with this primary file object
75
+ # @return [IiifPrint::Data::WorkDerviatives] derivatives adapter
76
+ def derivatives
77
+ IiifPrint::Data::WorkDerivatives.of(work, fileset, self)
78
+ end
79
+
80
+ private
81
+
82
+ def checkout
83
+ file = @fileset.original_file
84
+ # find_or_retrieve returns path to working copy, but only
85
+ # fetches from Fedora if no working copy exists on filesystem.
86
+ # NOTE: there may be some benefit to memoizing to avoid
87
+ # call and File.exist? IO operation, but YAGNI for now.
88
+ Hyrax::WorkingDirectory.find_or_retrieve(file.id, @fileset.id)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,199 @@
1
+ module IiifPrint
2
+ module Data
3
+ class WorkFiles
4
+ include IiifPrint::Data::PathHelper
5
+
6
+ attr_accessor :work, :assigned, :unassigned
7
+ delegate :include?, to: :keys
8
+
9
+ # alternate constructor spelling:
10
+ def self.of(work)
11
+ new(work)
12
+ end
13
+
14
+ ##
15
+ # A convenience method to associate files (original and derivatives) to the given work.
16
+ #
17
+ # @param to [Object] the work to which we're assigning the file(s) for the given paths.
18
+ # @param path [String] the path of the file we're assignging to the given work.
19
+ # @param derivative_paths [Array<String>] the path(s) to derivatives we'll assign to the given
20
+ # work.
21
+ # @param commit [Boolean] when true, commit the changes to the attachment.
22
+ # @return void
23
+ def self.assign!(to:, path:, derivative_paths: [], commit: true)
24
+ attachment = new(to)
25
+ attachment.assign(path)
26
+ Array.wrap(derivative_paths).each do |derivative_path|
27
+ attachment.derivatives.assign(derivative_path)
28
+ end
29
+ attachment.commit! if commit
30
+ end
31
+
32
+ def initialize(work)
33
+ @work = work
34
+ @assigned = []
35
+ @unassigned = []
36
+ @derivatives = nil
37
+ end
38
+
39
+ # Derivatives for specified fileset or first fileset found.
40
+ # The `WorkDerivatives` adapter as assign/commmit! semantics just
41
+ # like `WorkFiles`, and also acts like a hash/mapping of
42
+ # destination names (usually file extension) to path of saved
43
+ # derviative. Always returns same instance (memoized after first
44
+ # use) of `WorkDerivatives`.
45
+ # @return [IiifPrint::Data::WorkDerviatives] derivatives adapter
46
+ def derivatives(fileset: nil)
47
+ fileset ||= @fileset
48
+ return @derivatives unless @derivatives.nil?
49
+ if fileset.nil?
50
+ # for the deferred assignement case, we have no fileset yet...
51
+ work_file = IiifPrint::Data::WorkFile.of(work, nil, self)
52
+ return work_file.derivatives
53
+ end
54
+ # Otherwise, delegate actual construction to WorkFile.derivatives:
55
+ @derivatives = values[0].derivatives
56
+ end
57
+
58
+ # Assignment state
59
+ # @return [String] A label describing the state of assignment queues
60
+ def state
61
+ return 'dirty' unless @assigned.empty? && @unassigned.empty?
62
+ return 'empty' if keys.empty?
63
+ # TODO: implement 'pending' as intermediate state between 'dirty'
64
+ # and saved, where we look for saved state that matches what was
65
+ # previously assigned in THIS instance. We can only know that
66
+ # changes initiated by this instance in this thread are pending
67
+ # because there's no global storage for the assignment queue.
68
+ 'saved'
69
+ end
70
+
71
+ # List of fileset (not file) id keys, presumes system like Hyrax
72
+ # is only keeping a 1:1 between fileset and contained PCDM file,
73
+ # because derivatives are not stored in the FileSet.
74
+ # @return [String] fileset ids
75
+ def keys
76
+ filesets.map(&:id)
77
+ end
78
+
79
+ # List of WorkFile for each primary file
80
+ # @return [Array<IiifPrint::Data::WorkFile>] adapter for persisted
81
+ # primary file
82
+ def values
83
+ keys.map(&method(:get))
84
+ end
85
+
86
+ # Array of [id, WorkFile] for each primary file
87
+ # @return [Array<Array>] key/value pairs for primary files of work
88
+ def entries
89
+ filesets.map { |fs| [fs.id, self[fs.id]] }
90
+ end
91
+
92
+ # List of local file names for attachments, based on original ingested
93
+ # or uploaded file name.
94
+ # @return [Array<String>]
95
+ def names
96
+ filesets.map(&method(:original_name))
97
+ end
98
+
99
+ # Get a WorkFile adapter representing primary file, either by name or id
100
+ # @param name_or_id [String] Fileset id or work-local file name
101
+ # @return [IiifPrint::Data::WorkFile] adapter for persisted
102
+ # primary file
103
+ def get(name_or_id)
104
+ return get_by_fileset_id(name_or_id) if keys.include?(name_or_id)
105
+ get_by_filename(name_or_id)
106
+ end
107
+
108
+ # Assign a path to assigned queue for attachment
109
+ # @param path [String] Path to source file
110
+ def assign(path)
111
+ path = normalize_path(path)
112
+ validate_path(path)
113
+ @assigned.push(path)
114
+ end
115
+
116
+ # Assign a name or id to unassigned queue for deletion -- OR -- remove a
117
+ # path from queue of assigned items
118
+ # @param name_or_id [String] Fileset id, local file name, or source path
119
+ def unassign(name_or_id)
120
+ # if name_or_id is queued path, remove from @assigned queue:
121
+ @assigned.delete(name_or_id) if @assigned.include?(name_or_id)
122
+ # if name_or_id is known id or name, remove
123
+ @unassigned.push(name_or_id) if include?(name_or_id)
124
+ end
125
+
126
+ # commit pending changes to work files
127
+ # beginning with removals, then with new assignments
128
+ def commit!
129
+ commit_unassigned
130
+ commit_assigned
131
+ end
132
+
133
+ alias [] :get
134
+
135
+ private
136
+
137
+ def get_by_fileset_id(id)
138
+ nil unless keys.include?(id)
139
+ fileset = FileSet.find(id)
140
+ IiifPrint::Data::WorkFile.of(work, fileset, self)
141
+ end
142
+
143
+ # Get one WorkFile object based on filename in metadata
144
+ def get_by_filename(name)
145
+ r = filesets.select { |fs| original_name(fs) == name }
146
+ # checkout first match
147
+ r.empty? ? nil : IiifPrint::Data::WorkFile.of(work, r[0], self)
148
+ end
149
+
150
+ def original_name(fileset)
151
+ fileset.original_file.original_name
152
+ end
153
+
154
+ def filesets
155
+ # file sets with non-nil original file contained:
156
+ work.members.select { |m| m.is_a?(FileSet) && m.original_file }
157
+ end
158
+
159
+ def user
160
+ return User.find_by(email: work.depositor) unless work.depositor.nil?
161
+ defined?(current_user) ? current_user : User.batch_user
162
+ end
163
+
164
+ def ensure_depositor
165
+ return unless @work.depositor.nil?
166
+ @work.depositor = user.user_key
167
+ end
168
+
169
+ def commit_unassigned
170
+ # for each (name or) id to be removed from work, use actor to destroy
171
+ @unassigned.each do |id|
172
+ # "actor" here is simply a multi-adapter of Fileset, User
173
+ # Calling destroy will:
174
+ # 1. unlink fileset from work, and save work
175
+ # 2. Destroy fileset:
176
+ # - :before_destroy callback will delegate derivative cleanup
177
+ # to derivatives service component(s).
178
+ # - Remove fileset from storage/persistence layers
179
+ # - Invoke (logging or other) :after_destroy callback
180
+ Hyrax::Actors::FileSetActor.new(get(id).fileset, user).destroy
181
+ work.reload
182
+ end
183
+ end
184
+
185
+ def commit_assigned
186
+ return if @assigned.blank?
187
+ ensure_depositor
188
+ remote_files = @assigned.map do |path|
189
+ { url: path_to_uri(path), file_name: File.basename(path) }
190
+ end
191
+ attrs = { remote_files: remote_files }
192
+ # Create an environment for actor stack:
193
+ env = Hyrax::Actors::Environment.new(@work, Ability.new(user), attrs)
194
+ # Invoke default Hyrax actor stack middleware:
195
+ @work.new_record? ? Hyrax::CurationConcern.actor.create(env) : Hyrax::CurationConcern.actor.update(env)
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,35 @@
1
+ require 'iiif_print/data/fileset_helper'
2
+ require 'iiif_print/data/path_helper'
3
+ require 'iiif_print/data/work_derivatives'
4
+ require 'iiif_print/data/work_files'
5
+ require 'iiif_print/data/work_file'
6
+
7
+ module IiifPrint
8
+ # Module for data access helper / adapter classes supporting, enhancing
9
+ # IiifPrint work models
10
+ module Data
11
+ # Handler for after_create_fileset, to be called by block subscribing to
12
+ # and overriding default Hyrax `:after_create_fileset` handler, via
13
+ # app integrating iiif_print.
14
+ def self.handle_after_create_fileset(file_set, user)
15
+ handle_queued_derivative_attachments(file_set)
16
+ # Hyrax queues this job by default, and since iiif_print
17
+ # overrides the single subscriber Hyrax uses to do so, we
18
+ # must call this here:
19
+ FileSetAttachedEventJob.perform_later(file_set, user)
20
+ work = file_set.member_of[0]
21
+ # Hyrax CreateWithRemoteFilesActor has glaring omission re: this job,
22
+ # so we call it here, once we have a fileset to copy permissions to.
23
+ InheritPermissionsJob.perform_later(work) unless work.nil?
24
+ end
25
+
26
+ def self.handle_queued_derivative_attachments(file_set)
27
+ return if file_set.import_url.nil?
28
+ work = file_set.member_of.find(&:work?)
29
+ derivatives = IiifPrint::Data::WorkDerivatives.of(work)
30
+ # For now, becuase this is IO-bound operation, it makes sense to have
31
+ # this not be a job, but run inline:
32
+ derivatives.commit_queued!(file_set)
33
+ end
34
+ end
35
+ end