iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
data/LICENSE ADDED
@@ -0,0 +1,203 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "{}"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2019 Boston Public Library
190
+ Copyright 2019 J. Willard Marriott Library, The University of Utah
191
+ Additional copyright may be held by others, as reflected in the commit history.
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,203 @@
1
+ IiifPrint
2
+ ===================================================
3
+ Docs:
4
+ [![Apache 2.0 License](http://img.shields.io/badge/APACHE2-license-blue.svg)](./LICENSE)
5
+ [![Contribution Guidelines](http://img.shields.io/badge/CONTRIBUTING-Guidelines-blue.svg)](./CONTRIBUTING.md)
6
+
7
+ Jump in the Samvera Slack: <a href="http://slack.samvera.org/"><img src="https://status.slack.com/fonts/icons/icon_slack_hash_colored.svg" width="15" /></a>
8
+
9
+ <!-- TOC -->
10
+
11
+ - [Overview](#overview)
12
+ - [Documentation](#documentation)
13
+ - [Requirements](#requirements)
14
+ - [Dependencies](#dependencies)
15
+ - [Installation](#installation)
16
+ - [Changes made by the installer:](#changes-made-by-the-installer)
17
+ - [Configuration to enable IiifPrint features](#configuration-to-enable-iiifprint-features)
18
+ - [Model level configurations](#model-level-configurations)
19
+ - [Application level configurations](#application-level-configurations)
20
+ - [Ingesting Content](#ingesting-content)
21
+ - [Developing, Testing, and Contributing](#developing-testing-and-contributing)
22
+ - [Contributing](#contributing)
23
+ - [Acknowledgements](#acknowledgements)
24
+
25
+ <!-- /TOC -->
26
+
27
+ # Overview
28
+ IiifPrint is a gem (Rails "engine") for [Hyrax](https://hyrax.samvera.org/)-based digital repository applications to support displaying parent/child works in the same viewer (Universal Viewer) and the ability to search OCR from the parent work to the child work(s).
29
+
30
+ IiifPrint is not a stand-alone application. It is designed to be integrated into a new or existing Hyku (v4.0-v5.0) application. Future development will include integrating it into a Hyrax-based application without Hyku and support for [IIIF Presentation Manifest version 3](https://iiif.io/api/presentation/3.0/) along with [AllinsonFlex](https://github.com/samvera-labs/allinson_flex) metadata profiles.
31
+
32
+ IiifPrint supports:
33
+ * OCR and ALTO creation
34
+ * full-text search
35
+ * OCR keyword match highlighting
36
+ * viewer with page navigation and deep zooming
37
+ * splitting of PDFs to LZW compressed TIFFs for viewing
38
+ * configuring how the manifest canvases are sorted in the viewer
39
+ * adding metadata fields to the manifest with faceted search links and external links
40
+ * excluding specified work types to be found in the catalog search
41
+
42
+ A complete list of features can be found [here](https://github.com/scientist-softserv/iiif_print/wiki/Features-List).
43
+
44
+ ## Documentation
45
+ A set of helpful documents to help you learn more and deploy IiifPrint can be found on the [Project Wiki](https://github.com/scientist-softserv/iiif_print/wiki).
46
+
47
+ IiifPrint was developed against [Hyku](https://github.com/samvera/hyku) v4.0-v5.0. If your application uses [Bulkrax](https://github.com/samvera-labs/bulkrax), please ensure that its version is 5.0.1 or greater.
48
+
49
+ ## Requirements
50
+
51
+ * [Ruby](https://rubyonrails.org/) >=2.4
52
+ * [Rails](https://rubyonrails.org/) ~>5.0
53
+ * [Bundler](http://bundler.io/)
54
+ * [Hyrax](https://github.com/samvera/hyrax) v2.5-v3.5.0
55
+ - ..._and various [Samvera dependencies](https://github.com/samvera/hyrax#https://github.com/samvera/hyrax#how-to-run-the-code) that entails_.
56
+ * A Hyrax-based Rails application
57
+
58
+ ## Dependencies
59
+
60
+ * [FITS](https://projects.iq.harvard.edu/fits/home)
61
+ * [Tesseract-ocr](https://github.com/tesseract-ocr/)
62
+ * [LibreOffice](https://www.libreoffice.org/)
63
+ * [ghostscript](https://www.ghostscript.com/)
64
+ * [poppler-utils](https://poppler.freedesktop.org/)
65
+ * [ImageMagick](https://github.com/ImageMagick/ImageMagick6)
66
+ - _ImageMagick policy XML may need to be more permissive in both resources and source media types allowed. See template [policy.xml](config/vendor/imagemagick-6-policy.xml)._
67
+ * [libcurl3](https://packages.ubuntu.com/search?keywords=libcurl3)
68
+ * [libgbm1](https://packages.debian.org/sid/libgbm1)
69
+
70
+ # Installation
71
+ IiifPrint easily integrates with your Hyrax 2.x applications.
72
+
73
+ * Add `gem 'iiif_print'` to your Gemfile.
74
+ * Run `bundle install`
75
+ * Run `rails generate iiif_print:install`
76
+ * Set config options as indicated below...
77
+
78
+
79
+ ## Changes made by the installer:
80
+ * In `app/assets/javascripts/application.js`, it adds `//= require iiif_print`
81
+ * Adds `app/assets/stylesheets/iiif_print.scss`
82
+ * In `app/controllers/catalog_controller.rb`, it adds `include BlacklightIiifSearch::Controller`
83
+ * In `app/controllers/catalog_controller.rb`, it adds `add_index_field` and `iiif_search` config in the `configure_blacklight` block
84
+ * Adds `app/models/iiif_search_build.rb`
85
+ * In `config/routes.rb`, it adds `concern :iiif_search, BlacklightIiifSearch::Routes.new`
86
+ * In `config/routes.rb`, it adds `concerns :iiif_search` in the `resources :solr_documents` block
87
+ * Adds `config/initializers/iiif_print.rb`
88
+ * Adds three migrations, `CreateIiifPrintDerivativeAttachments`, `CreateIiifPrintIngestFileRelations`, and `CreateIiifPrintPendingRelationships`
89
+ * In `solr/conf/schema.xml`, it adds Blacklight IIIF Search autocomplete config
90
+ * In `solr/conf/solrconfig.xml`, it adds Blacklight IIIF Search autocomplete config
91
+ * Adds `solr/lib/solr-tokenizing_suggester-7.x.jar`
92
+
93
+ (It may be helpful to run `git diff` after installation to see all the changes made by the installer.)
94
+
95
+ ## Configuration to enable IiifPrint features
96
+ **NOTE: WorkTypes and models are used synonymously here.**
97
+
98
+ ### Model level configurations
99
+
100
+ In `app/models/{work_type}.rb` add `include IiifPrint.model_configuration` to any work types which require IiifPrint processing features (such as PDF splitting or OCR derivatives). See [lib/iiif_print.rb](./lib/iiif_print.rb) for details on configuration options.
101
+
102
+ ```rb
103
+ # Example model Book which splits PDFs into child works of
104
+ # model Page, and runs only one derivative service (TIFFs)
105
+
106
+ class Book < ActiveFedora::Base
107
+ include IiifPrint.model_configuration(
108
+ pdf_split_child_model: Page,
109
+ derivative_service_plugins: [
110
+ IiifPrint::TIFFDerivativeService
111
+ ]
112
+ )
113
+ end
114
+ ```
115
+
116
+ ### Application level configurations
117
+
118
+ In `config/initializers/iiif_print.rb` specify application level configuration options.
119
+
120
+ ```rb
121
+ IiifPrint.config do |config|
122
+ # Add models to be excluded from search so the user would not see them in the search results.
123
+ # By default, use the human readable versions like:
124
+ config.excluded_model_name_solr_field_values = ['Generic Work', 'Image']
125
+
126
+ # Add configurable solr field key for searching, default key is: 'human_readable_type_sim' if
127
+ # another key is used, make sure to adjust the config.excluded_model_name_solr_field_values to match
128
+ config.excluded_model_name_solr_field_key = 'some_solr_field_key'
129
+
130
+ # Configure how the manifest sorts the canvases, by default it sorts by `:title`, but a different
131
+ # model property may be desired such as :date_published
132
+ config.sort_iiif_manifest_canvases_by = :date_published
133
+ end
134
+ ```
135
+
136
+ TO ENABLE OCR Search (from the UV and catalog search)
137
+ ### catalog_controller.rb
138
+ * In the CatalogController, find the add_search_field config block for 'all_fields'. Add `advanced_parse: false` as seen in the following example:
139
+ ```rb
140
+ config.add_search_field('all_fields', label: 'All Fields', include_in_advanced_search: false, advanced_parse: false) do |field|
141
+ all_names = config.show_fields.values.map(&:field).join(" ")
142
+ title_name = 'title_tesim'
143
+ field.solr_parameters = {
144
+ qf: "#{all_names} file_format_tesim all_text_timv",
145
+ pf: title_name.to_s
146
+ }
147
+ end
148
+ ```
149
+ * Set `config.search_builder_class = IiifPrint::CatalogSearchBuilder` to remove works from the catalog search results if `is_child_bsi: true`
150
+ * Ensure that all text search is configured in default_solr_params config block:
151
+ ```rb
152
+ config.default_solr_params = {
153
+ qt: "search",
154
+ rows: 10,
155
+ qf: "title_tesim description_tesim creator_tesim keyword_tesim all_text_timv"
156
+ }
157
+ ```
158
+
159
+ # Ingesting Content
160
+
161
+ IiifPrint supports a range of different ingest workflows:
162
+ * single-item ingest via the UI
163
+ * batch ingest of works from local files or remote files via Bulkrax
164
+
165
+ The ingest process is configurable at the model level, granting the option to:
166
+ * split a PDF into TIFFs and create child works
167
+ * create a full complement of derivatives, including TIFF, JP2, PDF, OCR text, and word-coordinate JSON
168
+
169
+ # Developing, Testing, and Contributing
170
+
171
+ We develop the IIIF Print gem using Docker and Docker Compose. You'll want to clone this repository and run the following commands:
172
+
173
+ ```shell
174
+ $ docker compose build
175
+ $ docker compose up
176
+ $ docker compose exec web bash
177
+ ```
178
+
179
+ You'll now be inside the web container:
180
+
181
+ ```shell
182
+ $ bundle exec rake
183
+ ```
184
+
185
+ The above will build the test application (if it doesn't already exist). During the rebuild you might get a notice on a conflict for files. It will ask you to override. We recommend that you select the "accept all" option (e.g. Typing <kbd>a</kbd>).
186
+
187
+ To rebuild the test application, delete the `.internal_test_app` directory.
188
+
189
+ ## Contributing
190
+
191
+ If you're working on a PR for this project, create a feature branch off of `main`.
192
+
193
+ This repository follows the [Samvera Community Code of Conduct](https://samvera.atlassian.net/wiki/spaces/samvera/pages/405212316/Code+of+Conduct) and [language recommendations](https://github.com/samvera/maintenance/blob/master/templates/CONTRIBUTING.md#language). Please ***do not*** create a branch called `master` for this repository or as part of your pull request; the branch will either need to be removed or renamed before it can be considered for inclusion in the code base and history of this repository.
194
+
195
+ We encourage anyone who is interested in newspapers and Samvera to contribute to this project. [How can I contribute?](https://github.com/samvera/hyrax/blob/master/.github/CONTRIBUTING.md)
196
+
197
+ # Acknowledgements
198
+
199
+ IIIF Print is a gem that was forked off [Newspaper Works](https://github.com/samvera-labs/newspaper_works), a powerful and versatile library for working with digitized newspapers. We would like to thank the team and maintainers of Newspaper Works for creating such a useful and well-designed gem. Our work on IIIF Print would not have been possible without their hard work and dedication.
200
+
201
+ In particular, we would like to express our gratitude to [brianmcbride](https://github.com/brianmcbride), [seanupton](https://github.com/seanupton), [ebenenglish](https://github.com/ebenenglish), and [JacobR](https://github.com/JacobR) for their pioneering efforts on Newspaper Works. Their foundation and expertise were invaluable in the development of this gem.
202
+
203
+ Thank you to the entire Newspaper Works team for creating and maintaining such a valuable resource for the Samvera community.
data/Rakefile ADDED
@@ -0,0 +1,38 @@
1
+ # Require the gems listed in Gemfile, including any gems
2
+ # you've limited to :test, :development, or :production.
3
+
4
+ require 'rspec/core/rake_task'
5
+ require 'bundler/gem_tasks'
6
+ require 'engine_cart/rake_task'
7
+
8
+ # Bundler.require(*Rails.groups)
9
+
10
+ # Rails.application.load_tasks
11
+
12
+ begin
13
+ require 'bundler/setup'
14
+ rescue LoadError
15
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
16
+ end
17
+
18
+ Bundler::GemHelper.install_tasks
19
+
20
+ # task :ci => ['engine_cart:generate'] do
21
+ # run the tests
22
+ # end
23
+
24
+ # RSpec::Core::RakeTask.new(:spec)
25
+
26
+ # task :default => :spec
27
+ # RSpec::Core::RakeTask.new
28
+
29
+ # Set up the test application prior to running jasmine tasks.
30
+ task :setup_test_server do
31
+ require 'engine_cart'
32
+ EngineCart.load_application!
33
+ end
34
+
35
+ Dir.glob('tasks/*.rake').each { |r| import r }
36
+ Dir.glob('lib/tasks/*.rake').each { |r| import r }
37
+
38
+ task default: :ci
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ # override to add PDF splitting for file sets
4
+ module IiifPrint
5
+ module Actors
6
+ module FileSetActorDecorator
7
+ def create_content(file, relation = :original_file, from_url: false)
8
+ # Spawns asynchronous IngestJob unless ingesting from URL
9
+ super
10
+
11
+ if from_url
12
+ # we have everything we need... queue the job
13
+ parent = parent_for(file_set: @file_set)
14
+
15
+ if service.iiif_print_split?(work: parent) && service.pdfs?(paths: [file_set.import_url])
16
+ service.queue_job(
17
+ work: parent,
18
+ file_locations: [file.path],
19
+ user: @user,
20
+ admin_set_id: parent.admin_set_id
21
+ )
22
+ end
23
+ else
24
+ # we don't have the parent yet... save the paths for later use
25
+ @pdf_paths = service.pdf_paths(files: [file.id.to_s])
26
+ end
27
+ end
28
+
29
+ # Prior to Hyrax v3.1.0, this method did not exist
30
+ # @param file_set [FileSet]
31
+ # @return [ActiveFedora::Base]
32
+ def parent_for(file_set:)
33
+ file_set.parent
34
+ end
35
+
36
+ # Override to add PDF splitting
37
+ def attach_to_work(work, file_set_params = {})
38
+ # Locks to ensure that only one process is operating on the list at a time.
39
+ super
40
+
41
+ return if @pdf_paths.blank?
42
+ return unless service.iiif_print_split?(work: work)
43
+ service.queue_job(
44
+ work: work,
45
+ file_locations: @pdf_paths,
46
+ user: @user,
47
+ admin_set_id: work.admin_set_id
48
+ )
49
+ end
50
+
51
+ def service
52
+ IiifPrint::SplitPdfs::ChildWorkCreationFromPdfService
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,2 @@
1
+ //= link_directory ../javascripts/iiif_print .js
2
+ //= link_directory ../stylesheets/iiif_print .css
File without changes
@@ -0,0 +1,33 @@
1
+ /*jshint browser: true, nomen: false, eqnull: true, es5:true, trailing:true */
2
+
3
+ (function () {
4
+ "use strict";
5
+ var work_form = $("form[data-behavior='work-form']"),
6
+ Autocomplete = require('hyrax/autocomplete'),
7
+ LinkedData = require('hyrax/autocomplete/linked_data');
8
+
9
+ function patch_autocomplete() {
10
+ var method = Autocomplete.prototype.setup;
11
+ if (!window._autocomplete_patched) {
12
+ Autocomplete.prototype.setup = function (element, fieldName, url) {
13
+ // defer to original method for relevant BasicMetadata fields:
14
+ var defer = ['based_near', 'language', 'work'],
15
+ autourl = element.attr('data-autocomplete-url'),
16
+ declared = (element.attr('data-autocomplete') === fieldName),
17
+ ld = (declared && autourl && autourl.indexOf('authorities') !== -1),
18
+ additionalField = element.context === undefined;
19
+ if (additionalField || (ld && defer.indexOf(fieldName) == -1)) {
20
+ new LinkedData(element, url);
21
+ console.log('Using patched autocomplete for field', fieldName);
22
+ } else {
23
+ method(element, fieldName, url);
24
+ }
25
+ };
26
+ window._autocomplete_patched = true;
27
+ }
28
+ }
29
+
30
+ if (work_form.length && work_form.attr('id').indexOf('newspaper_') != -1) {
31
+ patch_autocomplete();
32
+ }
33
+ }());
@@ -0,0 +1,6 @@
1
+ /* toggle the ocr snippets collapse link text */
2
+ $(document).ready(function(){
3
+ $('.ocr_snippets_expand').click(function() {
4
+ $(this).text($(this).text() == '<%= I18n.t('blacklight.search.results.snippets.more') %>' ? '<%= I18n.t('blacklight.search.results.snippets.less') %>' : '<%= I18n.t('blacklight.search.results.snippets.more') %>');
5
+ });
6
+ });
@@ -0,0 +1,3 @@
1
+ // this file gets included in downstream application.js by installer
2
+
3
+ //= require iiif_print/ocr_search
@@ -0,0 +1,4 @@
1
+ @import 'issues_calendar';
2
+ @import 'issue_search';
3
+ @import 'newspapers_search';
4
+ @import 'search_results';
@@ -0,0 +1,13 @@
1
+ #issue_search_form_wrapper {
2
+ #q_issues {
3
+ width: 250px;
4
+ }
5
+
6
+ .input-group {
7
+ margin-bottom: 5px;
8
+ }
9
+ }
10
+
11
+ #front_page_search_form {
12
+ margin-top: 20px;
13
+ }
@@ -0,0 +1,18 @@
1
+ .calendar_day {
2
+ display: inline-block;
3
+ width: 17px;
4
+ height: 10px;
5
+ text-align: center;
6
+ }
7
+
8
+ .calendar_day_inactive {
9
+ color: gray;
10
+ }
11
+
12
+ .issues_calendar {
13
+ margin-bottom: 25px;
14
+ }
15
+
16
+ #issues_calendar_title h2 select {
17
+ font-size: 0.6em;
18
+ }
@@ -0,0 +1,38 @@
1
+ #newspapers_search_form_wrapper {
2
+
3
+ #newspapers_search_form{
4
+ margin-top: 25px;
5
+
6
+ #keyword_input {
7
+ padding-bottom: 20px;
8
+ }
9
+
10
+ #date_range_limit_wrapper {
11
+ padding-bottom: 10px;
12
+
13
+ .date_input {
14
+ float: left;
15
+ margin-right: 20px;
16
+ }
17
+ }
18
+
19
+ .facet_scrollbox_wrapper {
20
+ margin-left: 15px;
21
+ }
22
+ }
23
+
24
+ }
25
+
26
+ #newspapers_search_help_wrapper {
27
+
28
+ ul.newspapers_help {
29
+ font-size: 12px;
30
+ padding-left: 24px;
31
+
32
+ li {
33
+ padding-bottom:10px;
34
+ }
35
+
36
+ }
37
+
38
+ }
@@ -0,0 +1,6 @@
1
+ $highlight-background-color: rgba(5,166,86,0.36);
2
+
3
+ .metadata em {
4
+ background-color: $highlight-background-color;
5
+ font-weight: bold;
6
+ }
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hyrax
4
+ module IiifHelper
5
+ def iiif_viewer_display(work_presenter, locals = {})
6
+ render iiif_viewer_display_partial(work_presenter),
7
+ locals.merge(presenter: work_presenter)
8
+ end
9
+
10
+ def iiif_viewer_display_partial(work_presenter)
11
+ 'hyrax/base/iiif_viewers/' + work_presenter.iiif_viewer.to_s
12
+ end
13
+
14
+ def universal_viewer_base_url
15
+ "#{request&.base_url}/uv/uv.html"
16
+ end
17
+
18
+ def universal_viewer_config_url
19
+ "#{request&.base_url}/uv/uv-config.json"
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module IiifPrint
2
+ # Application Helper module
3
+ module ApplicationHelper
4
+ end
5
+ end