iiif_print 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,129 @@
1
+ version: '3.8'
2
+
3
+ x-app: &app
4
+ build:
5
+ context: .
6
+ target: hyku-base
7
+ args:
8
+ - EXTRA_APK_PACKAGES=less vim bash openjdk11-jre ffmpeg rsync exiftool
9
+ # image: ghcr.io/scientist-softserv/palni-palci:${TAG:-latest}
10
+ env_file:
11
+ - .env
12
+ # NOTE: all common env variables moved to .env
13
+ volumes:
14
+ - node_modules:/app/samvera/hyrax-webapp/node_modules:cached
15
+ - uploads:/app/samvera/hyrax-webapp/public/uploads:cached
16
+ - assets:/app/samvera/hyrax-webapp/public/assets:cached
17
+ - cache:/app/samvera/hyrax-webapp/tmp/cache:cached
18
+ - .:/app/samvera/hyrax-webapp
19
+ networks:
20
+ internal:
21
+
22
+ volumes:
23
+ assets:
24
+ cache:
25
+ db:
26
+ fcrepo:
27
+ node_modules:
28
+ redis:
29
+ solr:
30
+ uploads:
31
+ zk:
32
+ zoo:
33
+
34
+ networks:
35
+ internal:
36
+
37
+ services:
38
+ solr:
39
+ image: solr:7.7.2
40
+ environment:
41
+ - VIRTUAL_PORT=8983
42
+ - VIRTUAL_HOST=solr.iiif_print.test
43
+ command:
44
+ - sh
45
+ - "-c"
46
+ - "precreate-core hyrax_test /opt/solr/server/configsets/hyraxconf; solr-precreate hyrax /opt/solr/server/configsets/hyraxconf"
47
+ volumes:
48
+ - solr:/var/solr/data:cached
49
+ - ./spec/test_app_templates/solr/conf/:/opt/solr/server/configsets/hyraxconf
50
+ networks:
51
+ - internal
52
+
53
+ fcrepo:
54
+ image: ghcr.io/samvera/fcrepo4:4.7.5
55
+ volumes:
56
+ - fcrepo:/data:cached
57
+ env_file:
58
+ - .env
59
+ environment:
60
+ - VIRTUAL_PORT=8080
61
+ - VIRTUAL_HOST=fcrepo.hyku.test
62
+ - JAVA_OPTS=${JAVA_OPTS} -Dfcrepo.modeshape.configuration="classpath:/config/file-simple/repository.json" -Dfcrepo.object.directory="/data/objects" -Dfcrepo.binary.directory="/data/binaries"
63
+ expose:
64
+ - 8080
65
+ networks:
66
+ internal:
67
+
68
+ db:
69
+ image: postgres:11.1
70
+ env_file:
71
+ - .env
72
+ environment:
73
+ - POSTGRES_DB=${DB_NAME}
74
+ - POSTGRES_PASSWORD=${DB_PASSWORD}
75
+ - POSTGRES_USER=${DB_USER}
76
+ - VIRTUAL_PORT=5432
77
+ - VIRTUAL_HOST=db.hyku.test
78
+ volumes:
79
+ - db:/var/lib/postgresql/data
80
+ networks:
81
+ internal:
82
+
83
+ web:
84
+ <<: *app
85
+ environment:
86
+ - VIRTUAL_PORT=3000
87
+ - VIRTUAL_HOST=.hyku.test
88
+ command: tail -f /dev/null
89
+ ##
90
+ ## Similar to the above, except we will bundle and then tell the container
91
+ ## to wait. You'll then need to bash into the web container to do much of
92
+ ## anything.
93
+ # command: sh -l -c "bundle && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
94
+ depends_on:
95
+ db:
96
+ condition: service_started
97
+ solr:
98
+ condition: service_started
99
+ fcrepo:
100
+ condition: service_started
101
+ redis:
102
+ condition: service_started
103
+ chrome:
104
+ condition: service_started
105
+ expose:
106
+ - 3000
107
+
108
+ redis:
109
+ image: redis:5
110
+ command: redis-server
111
+ volumes:
112
+ - redis:/data
113
+ networks:
114
+ internal:
115
+
116
+ chrome:
117
+ # password is 'secret'
118
+ image: selenium/standalone-chrome-debug:3.141.59-20201010
119
+ logging:
120
+ driver: none
121
+ volumes:
122
+ - /dev/shm:/dev/shm
123
+ shm_size: 3G
124
+ networks:
125
+ internal:
126
+ environment:
127
+ - JAVA_OPTS=-Dwebdriver.chrome.whitelistedIps=
128
+ - VIRTUAL_PORT=7900
129
+ - VIRTUAL_HOST=chrome.hyku.test
@@ -0,0 +1,43 @@
1
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
2
+
3
+ # version updated in one place:
4
+ require 'iiif_print/version'
5
+
6
+ # Gem description:
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'iiif_print'
9
+ spec.version = IiifPrint::VERSION
10
+ spec.authors = ['Sean Upton', 'Jacob Reed', 'Brian McBride',
11
+ 'Eben English', 'Kirk Wang', 'LaRita Robinson', 'Jeremy Friesen']
12
+ spec.email = ['sean.upton@utah.edu', 'jacob.reed@utah.edu',
13
+ 'brian.mcbride@utah.edu', 'eenglish@bpl.org', 'kirk.wang@scientist.com',
14
+ 'larita@scientist.com', 'jeremy.n.friesen@gmail.com']
15
+ spec.homepage = 'https://github.com/samvera-labs/iiif_print'
16
+ spec.description = 'Gem/Engine for IIIF Print works in Hyrax-based Samvera Application.'
17
+ spec.summary = <<-SUMMARY
18
+ iiif_print is a Rails Engine gem providing model and administrative
19
+ functions to Hyrax-based Samvera applications, for management of
20
+ (primarily scanned) content.
21
+ SUMMARY
22
+ spec.license = 'Apache-2.0'
23
+ spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
24
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
25
+ spec.add_dependency 'blacklight_iiif_search', '~> 1.0'
26
+ spec.add_dependency 'dry-monads', '~> 1.4.0'
27
+ spec.add_dependency 'hyrax', '>= 2.5', '< 4.0'
28
+ spec.add_dependency 'nokogiri', '>=1.13.2'
29
+ spec.add_dependency 'rails', '~> 5.0'
30
+ spec.add_dependency 'rdf-vocab', '~> 3.0'
31
+
32
+ spec.add_development_dependency 'bixby'
33
+ spec.add_development_dependency 'database_cleaner', '~> 1.3'
34
+ spec.add_development_dependency 'engine_cart', '~> 2.2'
35
+ spec.add_development_dependency "factory_bot", '~> 4.4'
36
+ spec.add_development_dependency 'fcrepo_wrapper', '~> 0.5', '>= 0.5.1'
37
+ spec.add_development_dependency 'newspaper_works_fixtures', '~> 0.3', '>=0.3.1'
38
+ spec.add_development_dependency 'rails-controller-testing', '~> 1'
39
+ spec.add_development_dependency 'rspec-rails', '~> 3.1'
40
+ spec.add_development_dependency 'rspec-activemodel-mocks'
41
+ spec.add_development_dependency 'shoulda-matchers', '~> 3.1'
42
+ spec.add_development_dependency 'solr_wrapper', '>= 1.1', '< 3.0'
43
+ end
@@ -0,0 +1,29 @@
1
+ require 'rails/generators'
2
+
3
+ module IiifPrint
4
+ class AssetsGenerator < Rails::Generators::Base
5
+ desc "This generator installs the iiif_print CSS assets into your application"
6
+
7
+ source_root File.expand_path('../templates', __FILE__)
8
+
9
+ def inject_css
10
+ copy_file "iiif_print.scss", "app/assets/stylesheets/iiif_print.scss"
11
+ end
12
+
13
+ def inject_js
14
+ return if iiif_print_js_installed?
15
+ insert_into_file 'app/assets/javascripts/application.js', after: '//= require hyrax' do
16
+ <<-JS.strip_heredoc
17
+
18
+ //= require iiif_print
19
+ JS
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def iiif_print_js_installed?
26
+ IO.read("app/assets/javascripts/application.js").include?('iiif_print')
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ # adds controller-scope behavior to the implementing application
2
+ require 'rails/generators'
3
+
4
+ module IiifPrint
5
+ class CatalogControllerGenerator < Rails::Generators::Base
6
+ desc "
7
+ This generator makes the following changes to your app:
8
+ 1. Adds index fields in CatalogController
9
+ 2. Adjusts Blacklight IIIF Search configuration settings in CatalogController
10
+ "
11
+
12
+ def add_index_fields_to_catalog_controller
13
+ marker = 'configure_blacklight do |config|'
14
+ inject_into_file 'app/controllers/catalog_controller.rb', after: marker do
15
+ "\n\n # IiifPrint index fields\n"\
16
+ " config.add_index_field 'all_text_tsimv', highlight: true, helper_method: :render_ocr_snippets\n"
17
+ end
18
+ end
19
+
20
+ def adjust_catalog_controller_all_text_config
21
+ gsub_file('app/controllers/catalog_controller.rb',
22
+ " full_text_field: 'text',",
23
+ " full_text_field: 'all_text_tsimv',")
24
+ end
25
+
26
+ def adjust_catalog_controller_is_page_of_config
27
+ gsub_file('app/controllers/catalog_controller.rb',
28
+ " object_relation_field: 'is_page_of_s',",
29
+ " object_relation_field: 'is_page_of_ssim',")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,52 @@
1
+ require 'rails/generators'
2
+
3
+ module IiifPrint
4
+ # Install Generator Class
5
+ # rubocop:disable Metrics/ClassLength
6
+ class InstallGenerator < Rails::Generators::Base
7
+ source_root File.expand_path('../templates', __FILE__)
8
+
9
+ def copy_migrations
10
+ rake "iiif_print:install:migrations"
11
+ end
12
+
13
+ def verify_biiif_installed
14
+ return if IO.read('app/controllers/catalog_controller.rb').include?('include BlacklightIiifSearch::Controller')
15
+ say_status('info',
16
+ 'BLACKLIGHT IIIF SEARCH NOT INSTALLED; INSTALLING BLACKLIGHT IIIF SEARCH',
17
+ :blue)
18
+ generate 'blacklight_iiif_search:install'
19
+ end
20
+
21
+ def catalog_controller_configuration
22
+ generate 'iiif_print:catalog_controller'
23
+ end
24
+
25
+ def inject_configuration
26
+ copy_file 'config/initializers/iiif_print.rb'
27
+ end
28
+
29
+ def inject_assets
30
+ generate 'iiif_print:assets'
31
+ end
32
+
33
+ # Blacklight IIIF Search generator has some linting that does not agree with CircleCI on Hyku
34
+ # ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/controller_generator.rb
35
+ # the follow two methods does a clean up to appease Rubocop
36
+ def lint_catalog_controller
37
+ file = "app/controllers/catalog_controller.rb"
38
+ contents = File.read(file)
39
+ contents.gsub!(/\n\s*\n\s*# IiifPrint index fields/, "\n # IiifPrint index fields")
40
+ contents.gsub!(/\n\s*\n\s*# configuration for Blacklight IIIF Content Search/, "\n\n # configuration for Blacklight IIIF Content Search")
41
+ File.write(file, contents)
42
+ end
43
+
44
+ # ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/templates/iiif_search_builder.rb
45
+ def lint_iiif_search_builder
46
+ file = "app/models/iiif_search_builder.rb"
47
+ contents = File.read(file)
48
+ contents.insert(0, "# frozen_string_literal: true\n\n")
49
+ File.write(file, contents)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,22 @@
1
+ IiifPrint.config do |config|
2
+ # NOTE: WorkTypes and models are used synonymously here.
3
+ # Add models to be excluded from search so the user
4
+ # would not see them in the search results.
5
+ # by default, use the human readable versions like:
6
+ # @example
7
+ # # config.excluded_model_name_solr_field_values = ['Generic Work', 'Image']
8
+ #
9
+ # config.excluded_model_name_solr_field_values = []
10
+
11
+ # Add configurable solr field key for searching,
12
+ # default key is: 'human_readable_type_sim'
13
+ # if another key is used, make sure to adjust the
14
+ # config.excluded_model_name_solr_field_values to match
15
+ # @example
16
+ # config.excluded_model_name_solr_field_key = 'some_solr_field_key'
17
+
18
+ # Configure how the manifest sorts the canvases, by default it sorts by :title,
19
+ # but a different model property may be desired such as :date_published
20
+ # @example
21
+ # config.sort_iiif_manifest_canvases_by = :date_published
22
+ end
@@ -0,0 +1 @@
1
+ @import 'iiif_print/iiif_print';
@@ -0,0 +1,113 @@
1
+ module IiifPrint
2
+ # Base type for IiifPrint derivative services
3
+ class BaseDerivativeService
4
+ attr_reader :file_set, :master_format
5
+ delegate :uri, to: :file_set
6
+
7
+ class_attribute :target_extension, default: nil
8
+
9
+ def initialize(file_set)
10
+ @file_set = file_set
11
+ @dest_path = nil
12
+ @source_path = nil
13
+ @source_meta = nil
14
+ end
15
+
16
+ ##
17
+ # We assume that for the file set's parent that this is an acceptable plugin. Now, we ask for
18
+ # this specific file_set is it valid. For example, we would not attempt to extract text from a
19
+ # movie even though the parent work says to attempt to extract text on any attached file sets.
20
+ # Put another way, we can upload a PDF or a Movie to the parent.
21
+ #
22
+ # In subclass, you'll want to consider the attributes of the file_set and whether that subclass
23
+ # should process the given file_set.
24
+ #
25
+ # @see IiifPrint::PluggableDerivativeService#plugins_for
26
+ # @return [Boolean]
27
+ def valid?
28
+ true
29
+ end
30
+
31
+ def derivative_path_factory
32
+ Hyrax::DerivativePath
33
+ end
34
+
35
+ # prepare full path for passed extension/destination name, return path
36
+ def prepare_path(extension)
37
+ dest_path = derivative_path_factory.derivative_path_for_reference(
38
+ @file_set,
39
+ extension
40
+ )
41
+ dir = File.join(dest_path.split('/')[0..-2])
42
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
43
+ dest_path
44
+ end
45
+
46
+ # calculate and ensure directory components for singular @dest_path
47
+ # should only be used by subclasses producing a single derivative
48
+ def load_destpath
49
+ @dest_path = prepare_path(target_extension)
50
+ end
51
+
52
+ def identify
53
+ return @source_meta unless @source_meta.nil?
54
+ @source_meta = IiifPrint::ImageTool.new(@source_path).metadata
55
+ end
56
+
57
+ def mime_type
58
+ identify[:content_type]
59
+ end
60
+
61
+ def use_color?
62
+ identify[:color] == 'color'
63
+ end
64
+
65
+ # is source one-bit monochrome?
66
+ def one_bit?
67
+ identify[:color] == 'monochrome'
68
+ end
69
+
70
+ def create_derivatives(filename)
71
+ # presuming that filename is full path to source file
72
+ @source_path = filename
73
+
74
+ # Get destination path from Hyrax for file extension defined in
75
+ # self.target_extension constant on respective derivative service subclass.
76
+ load_destpath
77
+ end
78
+
79
+ def cleanup_derivatives(extension = target_extension, *_args)
80
+ derivative_path_factory.derivatives_for_reference(file_set).each do |path|
81
+ FileUtils.rm_f(path) if path.ends_with?(extension)
82
+ end
83
+ end
84
+
85
+ def jp2_to_intermediate
86
+ intermediate_path = File.join(Dir.mktmpdir, 'intermediate.tif')
87
+ jp2_cmd = "opj_decompress -i #{@source_path} -o #{intermediate_path}"
88
+ # make intermediate, then...
89
+ `#{jp2_cmd}`
90
+ intermediate_path
91
+ end
92
+
93
+ def convert_cmd
94
+ raise NotImplementedError, 'Calling subclass missing convert_cmd method'
95
+ end
96
+
97
+ # convert non-JP2 source/primary file to PDF derivative with ImageMagick6
98
+ # calls convert_cmd on calling subclasses
99
+ def im_convert
100
+ `#{convert_cmd}`
101
+ end
102
+
103
+ # convert JP2 source/primary file to PDF derivative, via
104
+ # opj_decompress to intermediate TIFF, then ImageMagick6 convert
105
+ def jp2_convert
106
+ # jp2 source -> intermediate
107
+ intermediate_path = jp2_to_intermediate
108
+ @source_path = intermediate_path
109
+ # intermediate -> PDF
110
+ im_convert
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,84 @@
1
+ # customize behavior for IiifSearch
2
+ module IiifPrint
3
+ module BlacklightIiifSearch
4
+ module AnnotationDecorator
5
+ ##
6
+ # Create a URL for the annotation
7
+ # use a Hyrax-y URL syntax:
8
+ # protocol://host:port/concern/model_type/work_id/manifest/canvas/file_set_id/annotation/index
9
+ # @return [String]
10
+ def annotation_id
11
+ "#{base_url}/manifest/canvas/#{file_set_id}/annotation/#{hl_index}"
12
+ end
13
+
14
+ ##
15
+ # Create a URL for the canvas that the annotation refers to
16
+ # match the Hyrax default canvas URL syntax:
17
+ # protocol://host:port/concern/model_type/work_id/manifest/canvas/file_set_id
18
+ # @return [String]
19
+ def canvas_uri_for_annotation
20
+ "#{base_url}/manifest/canvas/#{file_set_id}#{coordinates}"
21
+ end
22
+
23
+ private
24
+
25
+ ##
26
+ # return a string like "#xywh=100,100,250,20"
27
+ # corresponding to coordinates of query term on image
28
+ # @return [String]
29
+ def coordinates
30
+ return default_coords if query.blank?
31
+ coords_json = fetch_and_parse_coords
32
+ return default_coords unless coords_json && coords_json['coords']
33
+ query_terms = query.split(' ').map(&:downcase)
34
+ matches = coords_json['coords'].select do |k, _v|
35
+ k.downcase =~ /(#{query_terms.join('|')})/
36
+ end
37
+ return default_coords if matches.blank?
38
+ coords_array = matches.values.flatten(1)[hl_index]
39
+ return default unless coords_array
40
+ "#xywh=#{coords_array.join(',')}"
41
+ end
42
+
43
+ ##
44
+ # return the JSON word-coordinates file contents
45
+ # @return [JSON]
46
+ def fetch_and_parse_coords
47
+ coords = IiifPrint::Data::WorkDerivatives.data(from: file_set_id, of_type: 'json')
48
+ return nil if coords.blank?
49
+ begin
50
+ JSON.parse(coords)
51
+ rescue JSON::ParserError
52
+ nil
53
+ end
54
+ end
55
+
56
+ ##
57
+ # a default set of coordinates
58
+ # @return [String]
59
+ def default_coords
60
+ '#xywh=0,0,0,0'
61
+ end
62
+
63
+ ##
64
+ # the base URL for the Newspaper object
65
+ # use polymorphic_url, since we deal with multiple object types
66
+ # @return [String]
67
+ def base_url
68
+ host = controller.request.base_url
69
+ controller.polymorphic_url(parent_document, host: host, locale: nil)
70
+ end
71
+
72
+ ##
73
+ # return the first file set id
74
+ # @return [String]
75
+ def file_set_id
76
+ return document['id'] if document.file_set?
77
+
78
+ file_set_ids = document['file_set_ids_ssim']
79
+ raise "#{self.class}: NO FILE SET ID" if file_set_ids.blank?
80
+ file_set_ids.first
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,31 @@
1
+ require 'hyrax/catalog_search_builder'
2
+
3
+ module IiifPrint
4
+ # This class extends the base Hyrax::CatalogSearchBuilder by:
5
+ #
6
+ # - supporting highlighting of snippets in results
7
+ # - excluding models from search result; with complex works you might want to skip some of those
8
+ # works.
9
+ class CatalogSearchBuilder < Hyrax::CatalogSearchBuilder
10
+ # TODO: Do we need the following as a module? It hides the behavior
11
+ include IiifPrint::HighlightSearchParams
12
+ # TODO: Do we need the following as a module? It hides the behavior
13
+ include IiifPrint::ExcludeModels
14
+
15
+ # NOTE: If you are using advanced_search, the :exclude_models and :highlight_search_params must
16
+ # be added after the advanced_search methods (which are not part of this gem). In other tests,
17
+ # we found that having the advanced search processing after the two aforementioned processors
18
+ # resulted in improper evaluation of keyword querying.
19
+ self.default_processor_chain += [:exclude_models, :highlight_search_params, :show_parents_only]
20
+
21
+ # rubocop:enable Naming/PredicateName
22
+ def show_parents_only(solr_parameters)
23
+ query = if blacklight_params["include_child_works"] == 'true'
24
+ ActiveFedora::SolrQueryBuilder.construct_query(is_child_bsi: 'true')
25
+ else
26
+ ActiveFedora::SolrQueryBuilder.construct_query(is_child_bsi: nil)
27
+ end
28
+ solr_parameters[:fq] += [query]
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,99 @@
1
+ module IiifPrint
2
+ class Configuration
3
+ attr_writer :after_create_fileset_handler
4
+
5
+ # @param file_set [FileSet]
6
+ # @param user [User]
7
+ def handle_after_create_fileset(file_set, user)
8
+ if defined? @after_create_fileset_handler
9
+ @after_create_fileset_handler.call(file_set, user)
10
+ else
11
+ IiifPrint::Data.handle_after_create_fileset(file_set, user)
12
+ end
13
+ end
14
+
15
+ attr_writer :excluded_model_name_solr_field_values
16
+ # By default, this uses an array of human readable types
17
+ # ex: ['Generic Work', 'Image']
18
+ # @return [Array<String>]
19
+ def excluded_model_name_solr_field_values
20
+ return @excluded_model_name_solr_field_values unless @excluded_model_name_solr_field_values.nil?
21
+ @excluded_model_name_solr_field_values = []
22
+ end
23
+
24
+ # This method wraps Hyrax's configuration so we can sniff out the correct method to use. The
25
+ # {Hyrax::Configuration#whitelisted_ingest_dirs} is deprecated in favor of
26
+ # {Hyrax::Configuration#registered_ingest_dirs}.
27
+ #
28
+ # @return [Array<String>]
29
+ def registered_ingest_dirs
30
+ if Hyrax.config.respond_to?(:registered_ingest_dirs)
31
+ Hyrax.config.registered_ingest_dirs
32
+ else
33
+ Hyrax.config.whitelisted_ingest_dirs
34
+ end
35
+ end
36
+
37
+ attr_writer :excluded_model_name_solr_field_key
38
+ # A string of a solr field key
39
+ # @return [String]
40
+ def excluded_model_name_solr_field_key
41
+ return "human_readable_type_sim" unless defined?(@excluded_model_name_solr_field_key)
42
+ @excluded_model_name_solr_field_key
43
+ end
44
+
45
+ attr_writer :default_iiif_manifest_version
46
+ def default_iiif_manifest_version
47
+ @default_iiif_manifest_version || 2
48
+ end
49
+
50
+ attr_writer :metadata_fields
51
+ # rubocop:disable Metrics/MethodLength
52
+ # @api private
53
+ # @note These fields will appear in rendering order.
54
+ # @todo To move this to an `@api public` state, we need to consider what a proper configuration looks like.
55
+ def metadata_fields
56
+ @metadata_fields ||= {
57
+ title: {},
58
+ description: {},
59
+ collection: {},
60
+ abstract: {},
61
+ date_modified: {},
62
+ creator: { render_as: :faceted },
63
+ contributor: { render_as: :faceted },
64
+ subject: { render_as: :faceted },
65
+ publisher: { render_as: :faceted },
66
+ language: { render_as: :faceted },
67
+ identifier: { render_as: :linked },
68
+ keyword: { render_as: :faceted },
69
+ date_created: { render_as: :linked },
70
+ based_near_label: {},
71
+ related_url: { render_as: :external_link },
72
+ resource_type: { render_as: :faceted },
73
+ source: {},
74
+ extent: {},
75
+ rights_statement: { render_as: :rights_statement },
76
+ rights_notes: {},
77
+ access_right: {},
78
+ license: { render_as: :license },
79
+ searchable_text: {}
80
+ }
81
+ end
82
+ # rubocop:enable Metrics/MethodLength
83
+
84
+ attr_writer :sort_iiif_manifest_canvases_by
85
+ def sort_iiif_manifest_canvases_by
86
+ @sort_iiif_manifest_canvases_by || :title
87
+ end
88
+
89
+ attr_writer :additional_tessearct_options
90
+ ##
91
+ # The additional options to pass to the Tesseract configuration
92
+ #
93
+ # @see https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
94
+ # @return [String]
95
+ def additional_tessearct_options
96
+ @additional_tessearct_options || ""
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,25 @@
1
+ module IiifPrint
2
+ module Data
3
+ # Mixin module for fileset methods for work, presumes an @work
4
+ # instance attribute refering to a work object
5
+ module FilesetHelper
6
+ def fileset_id
7
+ # if context is itself a string, presume it is a file set id
8
+ return @work if @work.is_a? String
9
+ # if context is not a String, presume a work or fileset context:
10
+ fileset.nil? ? nil : fileset.id
11
+ end
12
+
13
+ def first_fileset
14
+ # if context is fileset id (e.g. caller is view partial) string,
15
+ # get the fileset from that id
16
+ return FileSet.find(@work) if @work.is_a?(String)
17
+ # if "work" context is a FileSet, not actual work, return it
18
+ return @work if @work.is_a? FileSet
19
+ # in most cases, get from work's members:
20
+ filesets = @work.members.select { |m| m.is_a? FileSet }
21
+ filesets.empty? ? nil : filesets[0]
22
+ end
23
+ end
24
+ end
25
+ end