iiif_print 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,40 @@
1
+ require 'uri'
2
+
3
+ module IiifPrint
4
+ module Data
5
+ # Mixin for methods related to paths on filesystem
6
+ module PathHelper
7
+ def normalize_path(path)
8
+ path = path.to_s
9
+ isuri?(path) ? path : File.expand_path(path)
10
+ end
11
+
12
+ def isuri?(path)
13
+ !path.scan(URI.regexp).empty?
14
+ end
15
+
16
+ def path_to_uri(path)
17
+ isuri?(path) ? path : "file://#{path}"
18
+ end
19
+
20
+ def registered_ingest_path(path)
21
+ IiifPrint.config.registered_ingest_dirs.any? do |dir|
22
+ path.start_with?(dir) && path.length > dir.length
23
+ end
24
+ end
25
+
26
+ def validate_path(path)
27
+ # treat file URIs equivalent to local paths
28
+ path = File.expand_path(path.sub(/^file:\/\//, ''))
29
+ # make sure file exists
30
+ raise IOError, "Not found: #{path}" unless File.exist?(path)
31
+ return if registered_ingest_path(path)
32
+ # we cannot use path if it is not in the registered list for Hyrax ingest, we
33
+ # would prefer to fail early vs. later+silently
34
+ raise SecurityError,
35
+ "Path specified is not configured in Hyrax ingest registered list: " \
36
+ "#{path}"
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,323 @@
1
+ require 'hyrax'
2
+
3
+ module IiifPrint
4
+ module Data
5
+ # TODO: consider compositional refactoring (not mixins), but this
6
+ # may make readability/comprehendability higher, and yield
7
+ # higher applied/practical complexity.
8
+ class WorkDerivatives
9
+ include IiifPrint::Data::FilesetHelper
10
+ include IiifPrint::Data::PathHelper
11
+
12
+ # Work is primary adapted context
13
+ # @return [ActiveFedora::Base] Hyrax work-type object
14
+ attr_accessor :work
15
+
16
+ # FileSet is secondary adapted context
17
+ # @return [FileSet] fileset for work, with regard to these derivatives
18
+ attr_accessor :fileset
19
+
20
+ # Parent pointer to WorkFile object representing fileset
21
+ # @return [IiifPrint::Data::WorkFile] WorkFile for fileset, work pair
22
+ attr_accessor :parent
23
+
24
+ # Assigned attachment queue (of paths)
25
+ # @return [Array<String>] list of paths queued for attachment
26
+ attr_accessor :assigned
27
+
28
+ # Assigned deletion queue (of destination names)
29
+ # @return [Array<String>] list of destination names queued for deletion
30
+ attr_accessor :unassigned
31
+
32
+ # mapping of special names Hyrax uses for derivatives, not extension:
33
+ @remap_names = {
34
+ 'jpeg' => 'thumbnail'
35
+ }
36
+ class << self
37
+ attr_accessor :remap_names
38
+ end
39
+
40
+ # @param from [Object] the work from which we'll extract the given type of data.
41
+ # @param of_type [String] the type of data we want extracted from the work (e.g. "txt", "json")
42
+ #
43
+ # @return [String]
44
+ def self.data(from:, of_type:)
45
+ new(from).data(of_type)
46
+ end
47
+
48
+ # alternate constructor spelling:
49
+ def self.of(work, fileset = nil, parent = nil)
50
+ new(work, fileset, parent)
51
+ end
52
+
53
+ # Adapt work and either specific or first fileset
54
+ def initialize(work, fileset = nil, parent = nil)
55
+ # adapted context usually work, may be string id of FileSet
56
+ @work = work
57
+ @fileset = fileset.nil? ? first_fileset : fileset
58
+ # computed name-to-path mapping, initially nil as sentinel for JIT load
59
+ @paths = nil
60
+ # assignments for attachment
61
+ @assigned = []
62
+ # un-assignments for deletion
63
+ @unassigned = []
64
+ # parent is IiifPrint::Data::WorkFile object for derivatives
65
+ @parent = parent
66
+ end
67
+
68
+ # Assignment state
69
+ # @return [String] A label describing the state of assignment queues
70
+ def state
71
+ load_paths
72
+ return 'dirty' unless @unassigned.empty? && @assigned.empty?
73
+ return 'empty' if @paths.keys.empty?
74
+ 'saved'
75
+ end
76
+
77
+ # Assign a path to assigned queue for attachment
78
+ # @param path [String] Path to source file
79
+ def assign(path)
80
+ path = normalize_path(path)
81
+ validate_path(path)
82
+ @assigned.push(path)
83
+ # We are keeping assignment both in ephemeral, transient @assigned
84
+ # and mirroring to db to share context with other components:
85
+ log_assignment(path, path_destination_name(path))
86
+ end
87
+
88
+ # Assign a destination name to unassigned queue for deletion -- OR --
89
+ # remove a path from queue of assigned items
90
+ # @param name [String] Destination name (file extension), or source path
91
+ def unassign(name)
92
+ # if name is queued path, remove from @assigned queue:
93
+ if @assigned.include?(name)
94
+ @assigned.delete(name)
95
+ unlog_assignment(name, path_destination_name(name))
96
+ end
97
+ # if name is known destination name, remove
98
+ @unassigned.push(name) if exist?(name)
99
+ end
100
+
101
+ # commit pending changes to work files
102
+ # beginning with removals, then with new assignments
103
+ def commit!
104
+ @unassigned.each { |name| delete(name) }
105
+ @assigned.each do |path|
106
+ attach(path, path_destination_name(path))
107
+ end
108
+ # reset queues after work is complete
109
+ @assigned = []
110
+ @unassigned = []
111
+ end
112
+
113
+ # Given a fileset meeting both of the following conditions:
114
+ # 1. a non-nil import_url value;
115
+ # 2. is attached to a work (persisted in Fedora, if not yet in Solr)...
116
+ # ...this method gets associated derivative paths queued and attach all.
117
+ # @param file_set [FileSet] saved file set, attached to work,
118
+ # with identifier, and a non-nil import_url
119
+ def commit_queued!(file_set)
120
+ raise ArgumentError, 'No FileSet import_url' if file_set.import_url.nil?
121
+ import_path = file_url_to_path(file_set.import_url)
122
+ work = file_set.member_of.select(&:work?)[0]
123
+ raise ArgumentError, 'Work not found for fileset' if work.nil?
124
+ derivatives = WorkDerivatives.of(work, file_set)
125
+ IngestFileRelation.derivatives_for_file(import_path).each do |path|
126
+ next unless File.exist?(path)
127
+ attachment_record = DerivativeAttachment.where(path: path).first
128
+ derivatives.attach(path, attachment_record.destination_name)
129
+ # update previously nil fileset id
130
+ attachment_record.fileset_id = file_set.id
131
+ attachment_record.save!
132
+ end
133
+ @fileset ||= file_set
134
+ load_paths
135
+ end
136
+
137
+ # attach a single derivative file to work
138
+ # @param file [String, IO] path to file or IO object
139
+ # @param name [String] destination name, usually file extension
140
+ def attach(file, name)
141
+ raise 'Cannot save for nil fileset' if fileset.nil?
142
+ mkdir_pairtree
143
+ path = path_factory.derivative_path_for_reference(fileset, name)
144
+ # if file argument is path, copy file
145
+ if file.is_a? String
146
+ FileUtils.copy(file, path)
147
+ else
148
+ # otherwise, presume file is an IO, read, write it
149
+ # note: does not close input file/IO, presume that is caller's
150
+ # responsibility.
151
+ orig_pos = file.tell
152
+ file.seek(0)
153
+ File.open(path, 'w') { |dstfile| dstfile.write(file.read) }
154
+ file.seek(orig_pos)
155
+ end
156
+ # finally, reload @paths after mutation
157
+ load_paths
158
+ end
159
+
160
+ # Delete a derivative file from work, by destination name
161
+ # @param name [String] destination name, usually file extension
162
+ def delete(name, force: nil)
163
+ raise 'Cannot save for nil fileset' if fileset.nil?
164
+ path = path_factory.derivative_path_for_reference(fileset, name)
165
+ # will remove file, if it exists; won't remove pairtree, even
166
+ # if it becomes empty, as that is excess scope.
167
+ FileUtils.rm(path, force: force) if File.exist?(path)
168
+ # finally, reload @paths after mutation
169
+ load_paths
170
+ end
171
+
172
+ # Load all paths/names to @paths once, upon first access
173
+ def load_paths
174
+ fsid = fileset_id
175
+ if fsid.nil?
176
+ @paths = {}
177
+ return
178
+ end
179
+ # list of paths
180
+ paths = path_factory.derivatives_for_reference(fsid)
181
+ # names from paths
182
+ @paths = paths.map { |e| [path_destination_name(e), e] }.to_h
183
+ end
184
+
185
+ # path to existing derivative file for destination name
186
+ # @param name [String] destination name, usually file extension
187
+ # @return [String, NilClass] path (or nil)
188
+ def path(name)
189
+ load_paths if @paths.nil?
190
+ result = @paths[name]
191
+ return if result.nil?
192
+ File.exist?(result) ? result : nil
193
+ end
194
+
195
+ # Run a block in context of the opened derivative file for reading
196
+ # @param name [String] destination name, usually file extension
197
+ # @param block [Proc] block/proc to run in context of file IO
198
+ def with_io(name, &block)
199
+ mode = ['xml', 'txt', 'html'].include?(name) ? 'rb:UTF-8' : 'rb'
200
+ filepath = path(name)
201
+ return if filepath.nil?
202
+ File.open(filepath, mode, &block)
203
+ end
204
+
205
+ # Get number of derivatives or, if a destination name argument
206
+ # is provided, the size of derivative file
207
+ # @param name [String] optional destination name, usually file extension
208
+ # @return [Integer] size in bytes
209
+ def size(name = nil)
210
+ load_paths if @paths.nil?
211
+ return @paths.size if name.nil?
212
+ File.size(@paths[name])
213
+ end
214
+
215
+ # Check if derivative file exists for destination name
216
+ # @param name [String] optional destination name, usually file extension
217
+ # @return [TrueClass, FalseClass] boolean
218
+ def exist?(name)
219
+ # TODO: It is unclear where the #keys and and #[] methods are coming from. There's @paths.keys referenced in this code.
220
+ keys.include?(name) && File.exist?(self[name])
221
+ end
222
+
223
+ # Get raw binary or encoded text data of file as a String
224
+ # @param name [String] destination name, usually file extension
225
+ # @return [String] Raw bytes, or if text file, a UTF-8 encoded String
226
+ def data(name)
227
+ result = ''
228
+ with_io(name) do |io|
229
+ result += io.read
230
+ end
231
+ result
232
+ end
233
+
234
+ private
235
+
236
+ def primary_file_path
237
+ if fileset.nil?
238
+ # if there is a nil fileset, we look for *intent* in the form
239
+ # of the first assigned file path for single-file work.
240
+ work_file = parent
241
+ return if work_file.nil?
242
+ work_files = work_file.parent
243
+ return if work_files.nil?
244
+ work_files.assigned[0]
245
+ else
246
+ file_url_to_path(fileset.import_url) unless fileset.import_url.nil?
247
+ end
248
+ end
249
+
250
+ def file_url_to_path(url)
251
+ url.gsub('file://', '')
252
+ end
253
+
254
+ def log_primary_file_relation(path)
255
+ file_path = primary_file_path
256
+ return if file_path.nil?
257
+ IiifPrint::IngestFileRelation.create!(
258
+ file_path: file_path,
259
+ derivative_path: path
260
+ )
261
+ end
262
+
263
+ def log_assignment(path, name)
264
+ IiifPrint::DerivativeAttachment.create!(
265
+ fileset_id: fileset_id,
266
+ path: path,
267
+ destination_name: name
268
+ )
269
+ log_primary_file_relation(path)
270
+ end
271
+
272
+ def unlog_assignment(path, name)
273
+ if fileset_id.nil?
274
+ IiifPrint::DerivativeAttachment.where(
275
+ path: path,
276
+ destination_name: name
277
+ ).destroy_all
278
+ else
279
+ IiifPrint::DerivativeAttachment.where(
280
+ fileset_id: fileset_id,
281
+ path: path,
282
+ destination_name: name
283
+ ).destroy_all
284
+ end
285
+ # note: there is deliberately no attempt to "unlog" primary
286
+ # file relation, as leaving it should have no side-effect.
287
+ end
288
+
289
+ def path_destination_name(path)
290
+ ext = path.split('.')[-1]
291
+ self.class.remap_names[ext] || ext
292
+ end
293
+
294
+ def respond_to_missing?(symbol, include_priv = false)
295
+ {}.respond_to?(symbol, include_priv)
296
+ end
297
+
298
+ def method_missing(method, *args, &block)
299
+ # if we proxy mapping/hash enumertion methods,
300
+ # make sure @paths loaded, then proxy to it.
301
+ if respond_to_missing?(method)
302
+ load_paths if @paths.nil?
303
+ return @paths.send(method, *args, &block)
304
+ end
305
+ super
306
+ end
307
+
308
+ def path_factory
309
+ Hyrax::DerivativePath
310
+ end
311
+
312
+ # make shared path for derivatives to live, given
313
+ def mkdir_pairtree
314
+ # Hyrax::DerivativePath has no public method to directly get the
315
+ # bare pairtree path for derivatives for a fileset, but we
316
+ # can infer it...
317
+ path = path_factory.derivative_path_for_reference(fileset, '')
318
+ dir = File.join(path.split('/')[0..-2])
319
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
320
+ end
321
+ end
322
+ end
323
+ end
@@ -0,0 +1,92 @@
1
+ # encoding=utf-8
2
+
3
+ require 'hyrax'
4
+
5
+ module IiifPrint
6
+ module Data
7
+ # WorkFile is a read-only convenience wrapper for just-in-time
8
+ # file operations, and is the type of values returned by
9
+ # IiifPrint::Data::WorkFiles (container) adapter.
10
+ class WorkFile
11
+ # accessors for adaptation relationships:
12
+ attr_accessor :work, :parent, :fileset
13
+ # delegate these metadata properties to @fileset.original_file:
14
+ delegate :size, :date_created, :date_modified, :mime_type, to: :unwrapped
15
+
16
+ # alternate constructor spelling:
17
+ def self.of(work, fileset = nil, parent = nil)
18
+ new(work, fileset, parent)
19
+ end
20
+
21
+ def initialize(work, fileset = nil, parent = nil)
22
+ @work = work
23
+ # If fileset is nil, presume *first* fileset of work, as in
24
+ # the single-file-per-work use-case:
25
+ @fileset = fileset
26
+ # Parent is WorkFiles (container) object, if applciable:
27
+ @parent = parent
28
+ end
29
+
30
+ # Get original repository object representing file (not fileset).
31
+ # @return [ActiveFedora::File] repository file persistence object
32
+ def unwrapped
33
+ return nil if @fileset.nil?
34
+ @fileset.original_file
35
+ end
36
+
37
+ def ==(other)
38
+ return false if @fileset.nil?
39
+ unwrapped.id == other.unwrapped.id
40
+ end
41
+
42
+ # Get path to working copy of file on local filesystem;
43
+ # checkout file from repository/source as needed.
44
+ # @return [String] path to working copy of binary
45
+ def path
46
+ return nil if @fileset.nil?
47
+ checkout
48
+ end
49
+
50
+ # Read data from working copy of file on local filesystem;
51
+ # checkout file from repository/source as needed.
52
+ # @return [String] byte data of binary/file payload
53
+ def data
54
+ return '' if @fileset.nil?
55
+ File.read(path, mode: 'rb')
56
+ end
57
+
58
+ # Run block/proc upon data of file;
59
+ # checkout file from repository/source as needed.
60
+ # @yield [io] read-only IO or File object to block/proc.
61
+ def with_io(&block)
62
+ filepath = path
63
+ return if filepath.nil?
64
+ File.open(filepath, 'rb', &block)
65
+ end
66
+
67
+ # Get filename from stored metadata
68
+ # @return [String] file name stored in repository metadata for file
69
+ def name
70
+ return nil if @fileset.nil?
71
+ unwrapped.original_name
72
+ end
73
+
74
+ # Derivatives for fileset associated with this primary file object
75
+ # @return [IiifPrint::Data::WorkDerviatives] derivatives adapter
76
+ def derivatives
77
+ IiifPrint::Data::WorkDerivatives.of(work, fileset, self)
78
+ end
79
+
80
+ private
81
+
82
+ def checkout
83
+ file = @fileset.original_file
84
+ # find_or_retrieve returns path to working copy, but only
85
+ # fetches from Fedora if no working copy exists on filesystem.
86
+ # NOTE: there may be some benefit to memoizing to avoid
87
+ # call and File.exist? IO operation, but YAGNI for now.
88
+ Hyrax::WorkingDirectory.find_or_retrieve(file.id, @fileset.id)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,199 @@
1
+ module IiifPrint
2
+ module Data
3
+ class WorkFiles
4
+ include IiifPrint::Data::PathHelper
5
+
6
+ attr_accessor :work, :assigned, :unassigned
7
+ delegate :include?, to: :keys
8
+
9
+ # alternate constructor spelling:
10
+ def self.of(work)
11
+ new(work)
12
+ end
13
+
14
+ ##
15
+ # A convenience method to associate files (original and derivatives) to the given work.
16
+ #
17
+ # @param to [Object] the work to which we're assigning the file(s) for the given paths.
18
+ # @param path [String] the path of the file we're assignging to the given work.
19
+ # @param derivative_paths [Array<String>] the path(s) to derivatives we'll assign to the given
20
+ # work.
21
+ # @param commit [Boolean] when true, commit the changes to the attachment.
22
+ # @return void
23
+ def self.assign!(to:, path:, derivative_paths: [], commit: true)
24
+ attachment = new(to)
25
+ attachment.assign(path)
26
+ Array.wrap(derivative_paths).each do |derivative_path|
27
+ attachment.derivatives.assign(derivative_path)
28
+ end
29
+ attachment.commit! if commit
30
+ end
31
+
32
+ def initialize(work)
33
+ @work = work
34
+ @assigned = []
35
+ @unassigned = []
36
+ @derivatives = nil
37
+ end
38
+
39
+ # Derivatives for specified fileset or first fileset found.
40
+ # The `WorkDerivatives` adapter as assign/commmit! semantics just
41
+ # like `WorkFiles`, and also acts like a hash/mapping of
42
+ # destination names (usually file extension) to path of saved
43
+ # derviative. Always returns same instance (memoized after first
44
+ # use) of `WorkDerivatives`.
45
+ # @return [IiifPrint::Data::WorkDerviatives] derivatives adapter
46
+ def derivatives(fileset: nil)
47
+ fileset ||= @fileset
48
+ return @derivatives unless @derivatives.nil?
49
+ if fileset.nil?
50
+ # for the deferred assignement case, we have no fileset yet...
51
+ work_file = IiifPrint::Data::WorkFile.of(work, nil, self)
52
+ return work_file.derivatives
53
+ end
54
+ # Otherwise, delegate actual construction to WorkFile.derivatives:
55
+ @derivatives = values[0].derivatives
56
+ end
57
+
58
+ # Assignment state
59
+ # @return [String] A label describing the state of assignment queues
60
+ def state
61
+ return 'dirty' unless @assigned.empty? && @unassigned.empty?
62
+ return 'empty' if keys.empty?
63
+ # TODO: implement 'pending' as intermediate state between 'dirty'
64
+ # and saved, where we look for saved state that matches what was
65
+ # previously assigned in THIS instance. We can only know that
66
+ # changes initiated by this instance in this thread are pending
67
+ # because there's no global storage for the assignment queue.
68
+ 'saved'
69
+ end
70
+
71
+ # List of fileset (not file) id keys, presumes system like Hyrax
72
+ # is only keeping a 1:1 between fileset and contained PCDM file,
73
+ # because derivatives are not stored in the FileSet.
74
+ # @return [String] fileset ids
75
+ def keys
76
+ filesets.map(&:id)
77
+ end
78
+
79
+ # List of WorkFile for each primary file
80
+ # @return [Array<IiifPrint::Data::WorkFile>] adapter for persisted
81
+ # primary file
82
+ def values
83
+ keys.map(&method(:get))
84
+ end
85
+
86
+ # Array of [id, WorkFile] for each primary file
87
+ # @return [Array<Array>] key/value pairs for primary files of work
88
+ def entries
89
+ filesets.map { |fs| [fs.id, self[fs.id]] }
90
+ end
91
+
92
+ # List of local file names for attachments, based on original ingested
93
+ # or uploaded file name.
94
+ # @return [Array<String>]
95
+ def names
96
+ filesets.map(&method(:original_name))
97
+ end
98
+
99
+ # Get a WorkFile adapter representing primary file, either by name or id
100
+ # @param name_or_id [String] Fileset id or work-local file name
101
+ # @return [IiifPrint::Data::WorkFile] adapter for persisted
102
+ # primary file
103
+ def get(name_or_id)
104
+ return get_by_fileset_id(name_or_id) if keys.include?(name_or_id)
105
+ get_by_filename(name_or_id)
106
+ end
107
+
108
+ # Assign a path to assigned queue for attachment
109
+ # @param path [String] Path to source file
110
+ def assign(path)
111
+ path = normalize_path(path)
112
+ validate_path(path)
113
+ @assigned.push(path)
114
+ end
115
+
116
+ # Assign a name or id to unassigned queue for deletion -- OR -- remove a
117
+ # path from queue of assigned items
118
+ # @param name_or_id [String] Fileset id, local file name, or source path
119
+ def unassign(name_or_id)
120
+ # if name_or_id is queued path, remove from @assigned queue:
121
+ @assigned.delete(name_or_id) if @assigned.include?(name_or_id)
122
+ # if name_or_id is known id or name, remove
123
+ @unassigned.push(name_or_id) if include?(name_or_id)
124
+ end
125
+
126
+ # commit pending changes to work files
127
+ # beginning with removals, then with new assignments
128
+ def commit!
129
+ commit_unassigned
130
+ commit_assigned
131
+ end
132
+
133
+ alias [] :get
134
+
135
+ private
136
+
137
+ def get_by_fileset_id(id)
138
+ nil unless keys.include?(id)
139
+ fileset = FileSet.find(id)
140
+ IiifPrint::Data::WorkFile.of(work, fileset, self)
141
+ end
142
+
143
+ # Get one WorkFile object based on filename in metadata
144
+ def get_by_filename(name)
145
+ r = filesets.select { |fs| original_name(fs) == name }
146
+ # checkout first match
147
+ r.empty? ? nil : IiifPrint::Data::WorkFile.of(work, r[0], self)
148
+ end
149
+
150
+ def original_name(fileset)
151
+ fileset.original_file.original_name
152
+ end
153
+
154
+ def filesets
155
+ # file sets with non-nil original file contained:
156
+ work.members.select { |m| m.is_a?(FileSet) && m.original_file }
157
+ end
158
+
159
+ def user
160
+ return User.find_by(email: work.depositor) unless work.depositor.nil?
161
+ defined?(current_user) ? current_user : User.batch_user
162
+ end
163
+
164
+ def ensure_depositor
165
+ return unless @work.depositor.nil?
166
+ @work.depositor = user.user_key
167
+ end
168
+
169
+ def commit_unassigned
170
+ # for each (name or) id to be removed from work, use actor to destroy
171
+ @unassigned.each do |id|
172
+ # "actor" here is simply a multi-adapter of Fileset, User
173
+ # Calling destroy will:
174
+ # 1. unlink fileset from work, and save work
175
+ # 2. Destroy fileset:
176
+ # - :before_destroy callback will delegate derivative cleanup
177
+ # to derivatives service component(s).
178
+ # - Remove fileset from storage/persistence layers
179
+ # - Invoke (logging or other) :after_destroy callback
180
+ Hyrax::Actors::FileSetActor.new(get(id).fileset, user).destroy
181
+ work.reload
182
+ end
183
+ end
184
+
185
+ def commit_assigned
186
+ return if @assigned.blank?
187
+ ensure_depositor
188
+ remote_files = @assigned.map do |path|
189
+ { url: path_to_uri(path), file_name: File.basename(path) }
190
+ end
191
+ attrs = { remote_files: remote_files }
192
+ # Create an environment for actor stack:
193
+ env = Hyrax::Actors::Environment.new(@work, Ability.new(user), attrs)
194
+ # Invoke default Hyrax actor stack middleware:
195
+ @work.new_record? ? Hyrax::CurationConcern.actor.create(env) : Hyrax::CurationConcern.actor.update(env)
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,35 @@
1
+ require 'iiif_print/data/fileset_helper'
2
+ require 'iiif_print/data/path_helper'
3
+ require 'iiif_print/data/work_derivatives'
4
+ require 'iiif_print/data/work_files'
5
+ require 'iiif_print/data/work_file'
6
+
7
+ module IiifPrint
8
+ # Module for data access helper / adapter classes supporting, enhancing
9
+ # IiifPrint work models
10
+ module Data
11
+ # Handler for after_create_fileset, to be called by block subscribing to
12
+ # and overriding default Hyrax `:after_create_fileset` handler, via
13
+ # app integrating iiif_print.
14
+ def self.handle_after_create_fileset(file_set, user)
15
+ handle_queued_derivative_attachments(file_set)
16
+ # Hyrax queues this job by default, and since iiif_print
17
+ # overrides the single subscriber Hyrax uses to do so, we
18
+ # must call this here:
19
+ FileSetAttachedEventJob.perform_later(file_set, user)
20
+ work = file_set.member_of[0]
21
+ # Hyrax CreateWithRemoteFilesActor has glaring omission re: this job,
22
+ # so we call it here, once we have a fileset to copy permissions to.
23
+ InheritPermissionsJob.perform_later(work) unless work.nil?
24
+ end
25
+
26
+ def self.handle_queued_derivative_attachments(file_set)
27
+ return if file_set.import_url.nil?
28
+ work = file_set.member_of.find(&:work?)
29
+ derivatives = IiifPrint::Data::WorkDerivatives.of(work)
30
+ # For now, becuase this is IO-bound operation, it makes sense to have
31
+ # this not be a job, but run inline:
32
+ derivatives.commit_queued!(file_set)
33
+ end
34
+ end
35
+ end