iiif_print 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.env +5 -0
  4. data/.fcrepo_wrapper +4 -0
  5. data/.github/release.yml +20 -0
  6. data/.github/workflows/branches.yml +24 -0
  7. data/.github/workflows/build-lint-test-action.yaml +33 -0
  8. data/.github/workflows/release_labels.yml +25 -0
  9. data/.gitignore +52 -0
  10. data/.rubocop.yml +177 -0
  11. data/.solr_wrapper +8 -0
  12. data/.travis.yml +49 -0
  13. data/CONTRIBUTING.md +181 -0
  14. data/Dockerfile +15 -0
  15. data/Gemfile +52 -0
  16. data/LICENSE +203 -0
  17. data/README.md +203 -0
  18. data/Rakefile +38 -0
  19. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +56 -0
  20. data/app/assets/config/iiif_print_manifest.js +2 -0
  21. data/app/assets/images/iiif_print/.keep +0 -0
  22. data/app/assets/javascripts/iiif_print/autocomplete_fix.js +33 -0
  23. data/app/assets/javascripts/iiif_print/ocr_search.js.erb +6 -0
  24. data/app/assets/javascripts/iiif_print.js +3 -0
  25. data/app/assets/stylesheets/iiif_print/_iiif_print.scss +4 -0
  26. data/app/assets/stylesheets/iiif_print/_issue_search.scss +13 -0
  27. data/app/assets/stylesheets/iiif_print/_issues_calendar.scss +18 -0
  28. data/app/assets/stylesheets/iiif_print/_newspapers_search.scss +38 -0
  29. data/app/assets/stylesheets/iiif_print/_search_results.scss +6 -0
  30. data/app/helpers/hyrax/iiif_helper.rb +22 -0
  31. data/app/helpers/iiif_print/application_helper.rb +5 -0
  32. data/app/helpers/iiif_print_helper.rb +64 -0
  33. data/app/indexers/concerns/iiif_print/child_indexer.rb +34 -0
  34. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +29 -0
  35. data/app/mailers/iiif_print/application_mailer.rb +8 -0
  36. data/app/models/concerns/iiif_print/set_child_flag.rb +29 -0
  37. data/app/models/concerns/iiif_print/solr/document.rb +47 -0
  38. data/app/models/iiif_print/application_record.rb +6 -0
  39. data/app/models/iiif_print/derivative_attachment.rb +8 -0
  40. data/app/models/iiif_print/iiif_search_response_decorator.rb +17 -0
  41. data/app/models/iiif_print/ingest_file_relation.rb +14 -0
  42. data/app/models/iiif_print/pending_relationship.rb +7 -0
  43. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +10 -0
  44. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +33 -0
  45. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +29 -0
  46. data/app/renderers/hyrax/renderers/faceted_attribute_renderer_decorator.rb +18 -0
  47. data/app/search_builders/concerns/iiif_print/exclude_models.rb +17 -0
  48. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +14 -0
  49. data/app/services/iiif_print/manifest_builder_service_behavior.rb +97 -0
  50. data/app/services/iiif_print/pluggable_derivative_service.rb +120 -0
  51. data/app/views/catalog/_snippets_more.html.erb +16 -0
  52. data/app/views/hyrax/base/_representative_media.html.erb +9 -0
  53. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +8 -0
  54. data/app/views/hyrax/file_sets/_actions.html.erb +45 -0
  55. data/bin/rails +13 -0
  56. data/config/fcrepo_wrapper_test.yml +5 -0
  57. data/config/initializers/assets.rb +2 -0
  58. data/config/locales/iiif_print.de.yml +148 -0
  59. data/config/locales/iiif_print.en.yml +119 -0
  60. data/config/locales/iiif_print.es.yml +148 -0
  61. data/config/locales/iiif_print.fr.yml +149 -0
  62. data/config/locales/iiif_print.it.yml +142 -0
  63. data/config/locales/iiif_print.pt-BR.yml +148 -0
  64. data/config/locales/iiif_print.zh.yml +142 -0
  65. data/config/solr_wrapper_test.yml +9 -0
  66. data/config/test-fixture/solr-config/_rest_managed.json +3 -0
  67. data/config/test-fixture/solr-config/admin-extra.html +31 -0
  68. data/config/test-fixture/solr-config/elevate.xml +36 -0
  69. data/config/test-fixture/solr-config/mapping-ISOLatin1Accent.txt +246 -0
  70. data/config/test-fixture/solr-config/protwords.txt +21 -0
  71. data/config/test-fixture/solr-config/schema.xml +366 -0
  72. data/config/test-fixture/solr-config/scripts.conf +24 -0
  73. data/config/test-fixture/solr-config/solrconfig.xml +322 -0
  74. data/config/test-fixture/solr-config/spellings.txt +2 -0
  75. data/config/test-fixture/solr-config/stopwords.txt +58 -0
  76. data/config/test-fixture/solr-config/stopwords_en.txt +58 -0
  77. data/config/test-fixture/solr-config/synonyms.txt +31 -0
  78. data/config/test-fixture/solr-config/xslt/example.xsl +132 -0
  79. data/config/test-fixture/solr-config/xslt/example_atom.xsl +67 -0
  80. data/config/test-fixture/solr-config/xslt/example_rss.xsl +66 -0
  81. data/config/test-fixture/solr-config/xslt/luke.xsl +337 -0
  82. data/config/vendor/fits.xml +55 -0
  83. data/config/vendor/imagemagick-6-policy.xml +76 -0
  84. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +12 -0
  85. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +11 -0
  86. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +11 -0
  87. data/docker-compose.yml +129 -0
  88. data/iiif_print.gemspec +43 -0
  89. data/lib/generators/iiif_print/assets_generator.rb +29 -0
  90. data/lib/generators/iiif_print/catalog_controller_generator.rb +32 -0
  91. data/lib/generators/iiif_print/install_generator.rb +52 -0
  92. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +22 -0
  93. data/lib/generators/iiif_print/templates/iiif_print.scss +1 -0
  94. data/lib/iiif_print/base_derivative_service.rb +113 -0
  95. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +84 -0
  96. data/lib/iiif_print/catalog_search_builder.rb +31 -0
  97. data/lib/iiif_print/configuration.rb +99 -0
  98. data/lib/iiif_print/data/fileset_helper.rb +25 -0
  99. data/lib/iiif_print/data/path_helper.rb +40 -0
  100. data/lib/iiif_print/data/work_derivatives.rb +323 -0
  101. data/lib/iiif_print/data/work_file.rb +92 -0
  102. data/lib/iiif_print/data/work_files.rb +199 -0
  103. data/lib/iiif_print/data.rb +35 -0
  104. data/lib/iiif_print/engine.rb +77 -0
  105. data/lib/iiif_print/errors.rb +9 -0
  106. data/lib/iiif_print/image_tool.rb +119 -0
  107. data/lib/iiif_print/jobs/application_job.rb +8 -0
  108. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +107 -0
  109. data/lib/iiif_print/jobs/create_relationships_job.rb +78 -0
  110. data/lib/iiif_print/jp2_derivative_service.rb +118 -0
  111. data/lib/iiif_print/jp2_image_metadata.rb +81 -0
  112. data/lib/iiif_print/lineage_service.rb +41 -0
  113. data/lib/iiif_print/metadata.rb +125 -0
  114. data/lib/iiif_print/pdf_derivative_service.rb +42 -0
  115. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +75 -0
  116. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +130 -0
  117. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +85 -0
  118. data/lib/iiif_print/text_extraction/alto_reader.rb +123 -0
  119. data/lib/iiif_print/text_extraction/hocr_reader.rb +172 -0
  120. data/lib/iiif_print/text_extraction/page_ocr.rb +87 -0
  121. data/lib/iiif_print/text_extraction/render_alto.rb +84 -0
  122. data/lib/iiif_print/text_extraction/word_coords_builder.rb +38 -0
  123. data/lib/iiif_print/text_extraction.rb +11 -0
  124. data/lib/iiif_print/text_extraction_derivative_service.rb +47 -0
  125. data/lib/iiif_print/text_formats_from_alto_service.rb +77 -0
  126. data/lib/iiif_print/tiff_derivative_service.rb +50 -0
  127. data/lib/iiif_print/version.rb +3 -0
  128. data/lib/iiif_print/works_controller_behavior.rb +9 -0
  129. data/lib/iiif_print.rb +136 -0
  130. data/lib/tasks/set_child_works.rake +22 -0
  131. data/spec/.keep.txt +1 -0
  132. data/spec/factories/ability.rb +6 -0
  133. data/spec/factories/newspaper_issue.rb +7 -0
  134. data/spec/factories/newspaper_page.rb +7 -0
  135. data/spec/factories/newspaper_page_solr_document.rb +12 -0
  136. data/spec/factories/newspaper_title.rb +8 -0
  137. data/spec/factories/uploaded_pdf_file.rb +9 -0
  138. data/spec/factories/uploaded_txt_file.rb +9 -0
  139. data/spec/factories/user.rb +13 -0
  140. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  141. data/spec/fixtures/files/4.1.07.tiff +0 -0
  142. data/spec/fixtures/files/README.md +7 -0
  143. data/spec/fixtures/files/alto-2-0.xsd +714 -0
  144. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  145. data/spec/fixtures/files/credits.md +16 -0
  146. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  147. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  148. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  149. data/spec/fixtures/files/minimal-alto.xml +31 -0
  150. data/spec/fixtures/files/ndnp-alto-sample.xml +24 -0
  151. data/spec/fixtures/files/ndnp-sample1-json.json +1 -0
  152. data/spec/fixtures/files/ndnp-sample1-txt.txt +1 -0
  153. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  154. data/spec/fixtures/files/ocr_alto.xml +202 -0
  155. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +202 -0
  156. data/spec/fixtures/files/ocr_color.tiff +0 -0
  157. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  158. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  159. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  160. data/spec/fixtures/files/ocr_mono_text_hocr.html +78 -0
  161. data/spec/fixtures/files/page1.tiff +0 -0
  162. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  163. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  164. data/spec/fixtures/files/thumbnail.jpg +0 -0
  165. data/spec/helpers/hyrax/iiif_helper_spec.rb +65 -0
  166. data/spec/helpers/iiif_print_helper_spec.rb +43 -0
  167. data/spec/iiif_print/base_derivative_service_spec.rb +11 -0
  168. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +51 -0
  169. data/spec/iiif_print/catalog_search_builder_spec.rb +60 -0
  170. data/spec/iiif_print/configuration_spec.rb +67 -0
  171. data/spec/iiif_print/data/work_derivatives_spec.rb +245 -0
  172. data/spec/iiif_print/data/work_file_spec.rb +99 -0
  173. data/spec/iiif_print/data/work_files_spec.rb +237 -0
  174. data/spec/iiif_print/image_tool_spec.rb +109 -0
  175. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +30 -0
  176. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +17 -0
  177. data/spec/iiif_print/jp2_image_metadata_spec.rb +37 -0
  178. data/spec/iiif_print/lineage_service_spec.rb +13 -0
  179. data/spec/iiif_print/metadata_spec.rb +115 -0
  180. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +6 -0
  181. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +49 -0
  182. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +45 -0
  183. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +84 -0
  184. data/spec/iiif_print/text_extraction/render_alto_spec.rb +54 -0
  185. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +44 -0
  186. data/spec/iiif_print_spec.rb +51 -0
  187. data/spec/misc_shared.rb +111 -0
  188. data/spec/models/iiif_print/derivative_attachment_spec.rb +37 -0
  189. data/spec/models/iiif_print/ingest_file_relation_spec.rb +56 -0
  190. data/spec/models/solr_document_spec.rb +14 -0
  191. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +19 -0
  192. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +49 -0
  193. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +59 -0
  194. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +66 -0
  195. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +178 -0
  196. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +82 -0
  197. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +127 -0
  198. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +65 -0
  199. data/spec/spec_helper.rb +181 -0
  200. data/spec/support/controller_level_helpers.rb +28 -0
  201. data/spec/support/iiif_print_models.rb +127 -0
  202. data/spec/test_app_templates/blacklight.yml +9 -0
  203. data/spec/test_app_templates/fedora.yml +15 -0
  204. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  205. data/spec/test_app_templates/redis.yml +9 -0
  206. data/spec/test_app_templates/solr/conf/schema.xml +362 -0
  207. data/spec/test_app_templates/solr/conf/solrconfig.xml +322 -0
  208. data/spec/test_app_templates/solr.yml +7 -0
  209. data/tasks/iiif_print_dev.rake +34 -0
  210. data/tmp/.keep +0 -0
  211. metadata +605 -0
@@ -0,0 +1,129 @@
1
+ version: '3.8'
2
+
3
+ x-app: &app
4
+ build:
5
+ context: .
6
+ target: hyku-base
7
+ args:
8
+ - EXTRA_APK_PACKAGES=less vim bash openjdk11-jre ffmpeg rsync exiftool
9
+ # image: ghcr.io/scientist-softserv/palni-palci:${TAG:-latest}
10
+ env_file:
11
+ - .env
12
+ # NOTE: all common env variables moved to .env
13
+ volumes:
14
+ - node_modules:/app/samvera/hyrax-webapp/node_modules:cached
15
+ - uploads:/app/samvera/hyrax-webapp/public/uploads:cached
16
+ - assets:/app/samvera/hyrax-webapp/public/assets:cached
17
+ - cache:/app/samvera/hyrax-webapp/tmp/cache:cached
18
+ - .:/app/samvera/hyrax-webapp
19
+ networks:
20
+ internal:
21
+
22
+ volumes:
23
+ assets:
24
+ cache:
25
+ db:
26
+ fcrepo:
27
+ node_modules:
28
+ redis:
29
+ solr:
30
+ uploads:
31
+ zk:
32
+ zoo:
33
+
34
+ networks:
35
+ internal:
36
+
37
+ services:
38
+ solr:
39
+ image: solr:7.7.2
40
+ environment:
41
+ - VIRTUAL_PORT=8983
42
+ - VIRTUAL_HOST=solr.iiif_print.test
43
+ command:
44
+ - sh
45
+ - "-c"
46
+ - "precreate-core hyrax_test /opt/solr/server/configsets/hyraxconf; solr-precreate hyrax /opt/solr/server/configsets/hyraxconf"
47
+ volumes:
48
+ - solr:/var/solr/data:cached
49
+ - ./spec/test_app_templates/solr/conf/:/opt/solr/server/configsets/hyraxconf
50
+ networks:
51
+ - internal
52
+
53
+ fcrepo:
54
+ image: ghcr.io/samvera/fcrepo4:4.7.5
55
+ volumes:
56
+ - fcrepo:/data:cached
57
+ env_file:
58
+ - .env
59
+ environment:
60
+ - VIRTUAL_PORT=8080
61
+ - VIRTUAL_HOST=fcrepo.hyku.test
62
+ - JAVA_OPTS=${JAVA_OPTS} -Dfcrepo.modeshape.configuration="classpath:/config/file-simple/repository.json" -Dfcrepo.object.directory="/data/objects" -Dfcrepo.binary.directory="/data/binaries"
63
+ expose:
64
+ - 8080
65
+ networks:
66
+ internal:
67
+
68
+ db:
69
+ image: postgres:11.1
70
+ env_file:
71
+ - .env
72
+ environment:
73
+ - POSTGRES_DB=${DB_NAME}
74
+ - POSTGRES_PASSWORD=${DB_PASSWORD}
75
+ - POSTGRES_USER=${DB_USER}
76
+ - VIRTUAL_PORT=5432
77
+ - VIRTUAL_HOST=db.hyku.test
78
+ volumes:
79
+ - db:/var/lib/postgresql/data
80
+ networks:
81
+ internal:
82
+
83
+ web:
84
+ <<: *app
85
+ environment:
86
+ - VIRTUAL_PORT=3000
87
+ - VIRTUAL_HOST=.hyku.test
88
+ command: tail -f /dev/null
89
+ ##
90
+ ## Similar to the above, except we will bundle and then tell the container
91
+ ## to wait. You'll then need to bash into the web container to do much of
92
+ ## anything.
93
+ # command: sh -l -c "bundle && echo \"Finished bundling now waiting...\" && tail -f /dev/null"
94
+ depends_on:
95
+ db:
96
+ condition: service_started
97
+ solr:
98
+ condition: service_started
99
+ fcrepo:
100
+ condition: service_started
101
+ redis:
102
+ condition: service_started
103
+ chrome:
104
+ condition: service_started
105
+ expose:
106
+ - 3000
107
+
108
+ redis:
109
+ image: redis:5
110
+ command: redis-server
111
+ volumes:
112
+ - redis:/data
113
+ networks:
114
+ internal:
115
+
116
+ chrome:
117
+ # password is 'secret'
118
+ image: selenium/standalone-chrome-debug:3.141.59-20201010
119
+ logging:
120
+ driver: none
121
+ volumes:
122
+ - /dev/shm:/dev/shm
123
+ shm_size: 3G
124
+ networks:
125
+ internal:
126
+ environment:
127
+ - JAVA_OPTS=-Dwebdriver.chrome.whitelistedIps=
128
+ - VIRTUAL_PORT=7900
129
+ - VIRTUAL_HOST=chrome.hyku.test
@@ -0,0 +1,43 @@
1
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
2
+
3
+ # version updated in one place:
4
+ require 'iiif_print/version'
5
+
6
+ # Gem description:
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'iiif_print'
9
+ spec.version = IiifPrint::VERSION
10
+ spec.authors = ['Sean Upton', 'Jacob Reed', 'Brian McBride',
11
+ 'Eben English', 'Kirk Wang', 'LaRita Robinson', 'Jeremy Friesen']
12
+ spec.email = ['sean.upton@utah.edu', 'jacob.reed@utah.edu',
13
+ 'brian.mcbride@utah.edu', 'eenglish@bpl.org', 'kirk.wang@scientist.com',
14
+ 'larita@scientist.com', 'jeremy.n.friesen@gmail.com']
15
+ spec.homepage = 'https://github.com/samvera-labs/iiif_print'
16
+ spec.description = 'Gem/Engine for IIIF Print works in Hyrax-based Samvera Application.'
17
+ spec.summary = <<-SUMMARY
18
+ iiif_print is a Rails Engine gem providing model and administrative
19
+ functions to Hyrax-based Samvera applications, for management of
20
+ (primarily scanned) content.
21
+ SUMMARY
22
+ spec.license = 'Apache-2.0'
23
+ spec.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
24
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
25
+ spec.add_dependency 'blacklight_iiif_search', '~> 1.0'
26
+ spec.add_dependency 'dry-monads', '~> 1.4.0'
27
+ spec.add_dependency 'hyrax', '>= 2.5', '< 4.0'
28
+ spec.add_dependency 'nokogiri', '>=1.13.2'
29
+ spec.add_dependency 'rails', '~> 5.0'
30
+ spec.add_dependency 'rdf-vocab', '~> 3.0'
31
+
32
+ spec.add_development_dependency 'bixby'
33
+ spec.add_development_dependency 'database_cleaner', '~> 1.3'
34
+ spec.add_development_dependency 'engine_cart', '~> 2.2'
35
+ spec.add_development_dependency "factory_bot", '~> 4.4'
36
+ spec.add_development_dependency 'fcrepo_wrapper', '~> 0.5', '>= 0.5.1'
37
+ spec.add_development_dependency 'newspaper_works_fixtures', '~> 0.3', '>=0.3.1'
38
+ spec.add_development_dependency 'rails-controller-testing', '~> 1'
39
+ spec.add_development_dependency 'rspec-rails', '~> 3.1'
40
+ spec.add_development_dependency 'rspec-activemodel-mocks'
41
+ spec.add_development_dependency 'shoulda-matchers', '~> 3.1'
42
+ spec.add_development_dependency 'solr_wrapper', '>= 1.1', '< 3.0'
43
+ end
@@ -0,0 +1,29 @@
1
+ require 'rails/generators'
2
+
3
+ module IiifPrint
4
+ class AssetsGenerator < Rails::Generators::Base
5
+ desc "This generator installs the iiif_print CSS assets into your application"
6
+
7
+ source_root File.expand_path('../templates', __FILE__)
8
+
9
+ def inject_css
10
+ copy_file "iiif_print.scss", "app/assets/stylesheets/iiif_print.scss"
11
+ end
12
+
13
+ def inject_js
14
+ return if iiif_print_js_installed?
15
+ insert_into_file 'app/assets/javascripts/application.js', after: '//= require hyrax' do
16
+ <<-JS.strip_heredoc
17
+
18
+ //= require iiif_print
19
+ JS
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def iiif_print_js_installed?
26
+ IO.read("app/assets/javascripts/application.js").include?('iiif_print')
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,32 @@
1
+ # adds controller-scope behavior to the implementing application
2
+ require 'rails/generators'
3
+
4
+ module IiifPrint
5
+ class CatalogControllerGenerator < Rails::Generators::Base
6
+ desc "
7
+ This generator makes the following changes to your app:
8
+ 1. Adds index fields in CatalogController
9
+ 2. Adjusts Blacklight IIIF Search configuration settings in CatalogController
10
+ "
11
+
12
+ def add_index_fields_to_catalog_controller
13
+ marker = 'configure_blacklight do |config|'
14
+ inject_into_file 'app/controllers/catalog_controller.rb', after: marker do
15
+ "\n\n # IiifPrint index fields\n"\
16
+ " config.add_index_field 'all_text_tsimv', highlight: true, helper_method: :render_ocr_snippets\n"
17
+ end
18
+ end
19
+
20
+ def adjust_catalog_controller_all_text_config
21
+ gsub_file('app/controllers/catalog_controller.rb',
22
+ " full_text_field: 'text',",
23
+ " full_text_field: 'all_text_tsimv',")
24
+ end
25
+
26
+ def adjust_catalog_controller_is_page_of_config
27
+ gsub_file('app/controllers/catalog_controller.rb',
28
+ " object_relation_field: 'is_page_of_s',",
29
+ " object_relation_field: 'is_page_of_ssim',")
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,52 @@
1
+ require 'rails/generators'
2
+
3
+ module IiifPrint
4
+ # Install Generator Class
5
+ # rubocop:disable Metrics/ClassLength
6
+ class InstallGenerator < Rails::Generators::Base
7
+ source_root File.expand_path('../templates', __FILE__)
8
+
9
+ def copy_migrations
10
+ rake "iiif_print:install:migrations"
11
+ end
12
+
13
+ def verify_biiif_installed
14
+ return if IO.read('app/controllers/catalog_controller.rb').include?('include BlacklightIiifSearch::Controller')
15
+ say_status('info',
16
+ 'BLACKLIGHT IIIF SEARCH NOT INSTALLED; INSTALLING BLACKLIGHT IIIF SEARCH',
17
+ :blue)
18
+ generate 'blacklight_iiif_search:install'
19
+ end
20
+
21
+ def catalog_controller_configuration
22
+ generate 'iiif_print:catalog_controller'
23
+ end
24
+
25
+ def inject_configuration
26
+ copy_file 'config/initializers/iiif_print.rb'
27
+ end
28
+
29
+ def inject_assets
30
+ generate 'iiif_print:assets'
31
+ end
32
+
33
+ # Blacklight IIIF Search generator has some linting that does not agree with CircleCI on Hyku
34
+ # ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/controller_generator.rb
35
+ # the follow two methods does a clean up to appease Rubocop
36
+ def lint_catalog_controller
37
+ file = "app/controllers/catalog_controller.rb"
38
+ contents = File.read(file)
39
+ contents.gsub!(/\n\s*\n\s*# IiifPrint index fields/, "\n # IiifPrint index fields")
40
+ contents.gsub!(/\n\s*\n\s*# configuration for Blacklight IIIF Content Search/, "\n\n # configuration for Blacklight IIIF Content Search")
41
+ File.write(file, contents)
42
+ end
43
+
44
+ # ref https://github.com/boston-library/blacklight_iiif_search/blob/v1.0.0/lib/generators/blacklight_iiif_search/templates/iiif_search_builder.rb
45
+ def lint_iiif_search_builder
46
+ file = "app/models/iiif_search_builder.rb"
47
+ contents = File.read(file)
48
+ contents.insert(0, "# frozen_string_literal: true\n\n")
49
+ File.write(file, contents)
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,22 @@
1
+ IiifPrint.config do |config|
2
+ # NOTE: WorkTypes and models are used synonymously here.
3
+ # Add models to be excluded from search so the user
4
+ # would not see them in the search results.
5
+ # by default, use the human readable versions like:
6
+ # @example
7
+ # # config.excluded_model_name_solr_field_values = ['Generic Work', 'Image']
8
+ #
9
+ # config.excluded_model_name_solr_field_values = []
10
+
11
+ # Add configurable solr field key for searching,
12
+ # default key is: 'human_readable_type_sim'
13
+ # if another key is used, make sure to adjust the
14
+ # config.excluded_model_name_solr_field_values to match
15
+ # @example
16
+ # config.excluded_model_name_solr_field_key = 'some_solr_field_key'
17
+
18
+ # Configure how the manifest sorts the canvases, by default it sorts by :title,
19
+ # but a different model property may be desired such as :date_published
20
+ # @example
21
+ # config.sort_iiif_manifest_canvases_by = :date_published
22
+ end
@@ -0,0 +1 @@
1
+ @import 'iiif_print/iiif_print';
@@ -0,0 +1,113 @@
1
+ module IiifPrint
2
+ # Base type for IiifPrint derivative services
3
+ class BaseDerivativeService
4
+ attr_reader :file_set, :master_format
5
+ delegate :uri, to: :file_set
6
+
7
+ class_attribute :target_extension, default: nil
8
+
9
+ def initialize(file_set)
10
+ @file_set = file_set
11
+ @dest_path = nil
12
+ @source_path = nil
13
+ @source_meta = nil
14
+ end
15
+
16
+ ##
17
+ # We assume that for the file set's parent that this is an acceptable plugin. Now, we ask for
18
+ # this specific file_set is it valid. For example, we would not attempt to extract text from a
19
+ # movie even though the parent work says to attempt to extract text on any attached file sets.
20
+ # Put another way, we can upload a PDF or a Movie to the parent.
21
+ #
22
+ # In subclass, you'll want to consider the attributes of the file_set and whether that subclass
23
+ # should process the given file_set.
24
+ #
25
+ # @see IiifPrint::PluggableDerivativeService#plugins_for
26
+ # @return [Boolean]
27
+ def valid?
28
+ true
29
+ end
30
+
31
+ def derivative_path_factory
32
+ Hyrax::DerivativePath
33
+ end
34
+
35
+ # prepare full path for passed extension/destination name, return path
36
+ def prepare_path(extension)
37
+ dest_path = derivative_path_factory.derivative_path_for_reference(
38
+ @file_set,
39
+ extension
40
+ )
41
+ dir = File.join(dest_path.split('/')[0..-2])
42
+ FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
43
+ dest_path
44
+ end
45
+
46
+ # calculate and ensure directory components for singular @dest_path
47
+ # should only be used by subclasses producing a single derivative
48
+ def load_destpath
49
+ @dest_path = prepare_path(target_extension)
50
+ end
51
+
52
+ def identify
53
+ return @source_meta unless @source_meta.nil?
54
+ @source_meta = IiifPrint::ImageTool.new(@source_path).metadata
55
+ end
56
+
57
+ def mime_type
58
+ identify[:content_type]
59
+ end
60
+
61
+ def use_color?
62
+ identify[:color] == 'color'
63
+ end
64
+
65
+ # is source one-bit monochrome?
66
+ def one_bit?
67
+ identify[:color] == 'monochrome'
68
+ end
69
+
70
+ def create_derivatives(filename)
71
+ # presuming that filename is full path to source file
72
+ @source_path = filename
73
+
74
+ # Get destination path from Hyrax for file extension defined in
75
+ # self.target_extension constant on respective derivative service subclass.
76
+ load_destpath
77
+ end
78
+
79
+ def cleanup_derivatives(extension = target_extension, *_args)
80
+ derivative_path_factory.derivatives_for_reference(file_set).each do |path|
81
+ FileUtils.rm_f(path) if path.ends_with?(extension)
82
+ end
83
+ end
84
+
85
+ def jp2_to_intermediate
86
+ intermediate_path = File.join(Dir.mktmpdir, 'intermediate.tif')
87
+ jp2_cmd = "opj_decompress -i #{@source_path} -o #{intermediate_path}"
88
+ # make intermediate, then...
89
+ `#{jp2_cmd}`
90
+ intermediate_path
91
+ end
92
+
93
+ def convert_cmd
94
+ raise NotImplementedError, 'Calling subclass missing convert_cmd method'
95
+ end
96
+
97
+ # convert non-JP2 source/primary file to PDF derivative with ImageMagick6
98
+ # calls convert_cmd on calling subclasses
99
+ def im_convert
100
+ `#{convert_cmd}`
101
+ end
102
+
103
+ # convert JP2 source/primary file to PDF derivative, via
104
+ # opj_decompress to intermediate TIFF, then ImageMagick6 convert
105
+ def jp2_convert
106
+ # jp2 source -> intermediate
107
+ intermediate_path = jp2_to_intermediate
108
+ @source_path = intermediate_path
109
+ # intermediate -> PDF
110
+ im_convert
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,84 @@
1
+ # customize behavior for IiifSearch
2
+ module IiifPrint
3
+ module BlacklightIiifSearch
4
+ module AnnotationDecorator
5
+ ##
6
+ # Create a URL for the annotation
7
+ # use a Hyrax-y URL syntax:
8
+ # protocol://host:port/concern/model_type/work_id/manifest/canvas/file_set_id/annotation/index
9
+ # @return [String]
10
+ def annotation_id
11
+ "#{base_url}/manifest/canvas/#{file_set_id}/annotation/#{hl_index}"
12
+ end
13
+
14
+ ##
15
+ # Create a URL for the canvas that the annotation refers to
16
+ # match the Hyrax default canvas URL syntax:
17
+ # protocol://host:port/concern/model_type/work_id/manifest/canvas/file_set_id
18
+ # @return [String]
19
+ def canvas_uri_for_annotation
20
+ "#{base_url}/manifest/canvas/#{file_set_id}#{coordinates}"
21
+ end
22
+
23
+ private
24
+
25
+ ##
26
+ # return a string like "#xywh=100,100,250,20"
27
+ # corresponding to coordinates of query term on image
28
+ # @return [String]
29
+ def coordinates
30
+ return default_coords if query.blank?
31
+ coords_json = fetch_and_parse_coords
32
+ return default_coords unless coords_json && coords_json['coords']
33
+ query_terms = query.split(' ').map(&:downcase)
34
+ matches = coords_json['coords'].select do |k, _v|
35
+ k.downcase =~ /(#{query_terms.join('|')})/
36
+ end
37
+ return default_coords if matches.blank?
38
+ coords_array = matches.values.flatten(1)[hl_index]
39
+ return default unless coords_array
40
+ "#xywh=#{coords_array.join(',')}"
41
+ end
42
+
43
+ ##
44
+ # return the JSON word-coordinates file contents
45
+ # @return [JSON]
46
+ def fetch_and_parse_coords
47
+ coords = IiifPrint::Data::WorkDerivatives.data(from: file_set_id, of_type: 'json')
48
+ return nil if coords.blank?
49
+ begin
50
+ JSON.parse(coords)
51
+ rescue JSON::ParserError
52
+ nil
53
+ end
54
+ end
55
+
56
+ ##
57
+ # a default set of coordinates
58
+ # @return [String]
59
+ def default_coords
60
+ '#xywh=0,0,0,0'
61
+ end
62
+
63
+ ##
64
+ # the base URL for the Newspaper object
65
+ # use polymorphic_url, since we deal with multiple object types
66
+ # @return [String]
67
+ def base_url
68
+ host = controller.request.base_url
69
+ controller.polymorphic_url(parent_document, host: host, locale: nil)
70
+ end
71
+
72
+ ##
73
+ # return the first file set id
74
+ # @return [String]
75
+ def file_set_id
76
+ return document['id'] if document.file_set?
77
+
78
+ file_set_ids = document['file_set_ids_ssim']
79
+ raise "#{self.class}: NO FILE SET ID" if file_set_ids.blank?
80
+ file_set_ids.first
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,31 @@
1
+ require 'hyrax/catalog_search_builder'
2
+
3
+ module IiifPrint
4
+ # This class extends the base Hyrax::CatalogSearchBuilder by:
5
+ #
6
+ # - supporting highlighting of snippets in results
7
+ # - excluding models from search result; with complex works you might want to skip some of those
8
+ # works.
9
+ class CatalogSearchBuilder < Hyrax::CatalogSearchBuilder
10
+ # TODO: Do we need the following as a module? It hides the behavior
11
+ include IiifPrint::HighlightSearchParams
12
+ # TODO: Do we need the following as a module? It hides the behavior
13
+ include IiifPrint::ExcludeModels
14
+
15
+ # NOTE: If you are using advanced_search, the :exclude_models and :highlight_search_params must
16
+ # be added after the advanced_search methods (which are not part of this gem). In other tests,
17
+ # we found that having the advanced search processing after the two aforementioned processors
18
+ # resulted in improper evaluation of keyword querying.
19
+ self.default_processor_chain += [:exclude_models, :highlight_search_params, :show_parents_only]
20
+
21
+ # rubocop:enable Naming/PredicateName
22
+ def show_parents_only(solr_parameters)
23
+ query = if blacklight_params["include_child_works"] == 'true'
24
+ ActiveFedora::SolrQueryBuilder.construct_query(is_child_bsi: 'true')
25
+ else
26
+ ActiveFedora::SolrQueryBuilder.construct_query(is_child_bsi: nil)
27
+ end
28
+ solr_parameters[:fq] += [query]
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,99 @@
1
+ module IiifPrint
2
+ class Configuration
3
+ attr_writer :after_create_fileset_handler
4
+
5
+ # @param file_set [FileSet]
6
+ # @param user [User]
7
+ def handle_after_create_fileset(file_set, user)
8
+ if defined? @after_create_fileset_handler
9
+ @after_create_fileset_handler.call(file_set, user)
10
+ else
11
+ IiifPrint::Data.handle_after_create_fileset(file_set, user)
12
+ end
13
+ end
14
+
15
+ attr_writer :excluded_model_name_solr_field_values
16
+ # By default, this uses an array of human readable types
17
+ # ex: ['Generic Work', 'Image']
18
+ # @return [Array<String>]
19
+ def excluded_model_name_solr_field_values
20
+ return @excluded_model_name_solr_field_values unless @excluded_model_name_solr_field_values.nil?
21
+ @excluded_model_name_solr_field_values = []
22
+ end
23
+
24
+ # This method wraps Hyrax's configuration so we can sniff out the correct method to use. The
25
+ # {Hyrax::Configuration#whitelisted_ingest_dirs} is deprecated in favor of
26
+ # {Hyrax::Configuration#registered_ingest_dirs}.
27
+ #
28
+ # @return [Array<String>]
29
+ def registered_ingest_dirs
30
+ if Hyrax.config.respond_to?(:registered_ingest_dirs)
31
+ Hyrax.config.registered_ingest_dirs
32
+ else
33
+ Hyrax.config.whitelisted_ingest_dirs
34
+ end
35
+ end
36
+
37
+ attr_writer :excluded_model_name_solr_field_key
38
+ # A string of a solr field key
39
+ # @return [String]
40
+ def excluded_model_name_solr_field_key
41
+ return "human_readable_type_sim" unless defined?(@excluded_model_name_solr_field_key)
42
+ @excluded_model_name_solr_field_key
43
+ end
44
+
45
+ attr_writer :default_iiif_manifest_version
46
+ def default_iiif_manifest_version
47
+ @default_iiif_manifest_version || 2
48
+ end
49
+
50
+ attr_writer :metadata_fields
51
+ # rubocop:disable Metrics/MethodLength
52
+ # @api private
53
+ # @note These fields will appear in rendering order.
54
+ # @todo To move this to an `@api public` state, we need to consider what a proper configuration looks like.
55
+ def metadata_fields
56
+ @metadata_fields ||= {
57
+ title: {},
58
+ description: {},
59
+ collection: {},
60
+ abstract: {},
61
+ date_modified: {},
62
+ creator: { render_as: :faceted },
63
+ contributor: { render_as: :faceted },
64
+ subject: { render_as: :faceted },
65
+ publisher: { render_as: :faceted },
66
+ language: { render_as: :faceted },
67
+ identifier: { render_as: :linked },
68
+ keyword: { render_as: :faceted },
69
+ date_created: { render_as: :linked },
70
+ based_near_label: {},
71
+ related_url: { render_as: :external_link },
72
+ resource_type: { render_as: :faceted },
73
+ source: {},
74
+ extent: {},
75
+ rights_statement: { render_as: :rights_statement },
76
+ rights_notes: {},
77
+ access_right: {},
78
+ license: { render_as: :license },
79
+ searchable_text: {}
80
+ }
81
+ end
82
+ # rubocop:enable Metrics/MethodLength
83
+
84
+ attr_writer :sort_iiif_manifest_canvases_by
85
+ def sort_iiif_manifest_canvases_by
86
+ @sort_iiif_manifest_canvases_by || :title
87
+ end
88
+
89
+ attr_writer :additional_tessearct_options
90
+ ##
91
+ # The additional options to pass to the Tesseract configuration
92
+ #
93
+ # @see https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html
94
+ # @return [String]
95
+ def additional_tessearct_options
96
+ @additional_tessearct_options || ""
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,25 @@
1
+ module IiifPrint
2
+ module Data
3
+ # Mixin module for fileset methods for work, presumes an @work
4
+ # instance attribute refering to a work object
5
+ module FilesetHelper
6
+ def fileset_id
7
+ # if context is itself a string, presume it is a file set id
8
+ return @work if @work.is_a? String
9
+ # if context is not a String, presume a work or fileset context:
10
+ fileset.nil? ? nil : fileset.id
11
+ end
12
+
13
+ def first_fileset
14
+ # if context is fileset id (e.g. caller is view partial) string,
15
+ # get the fileset from that id
16
+ return FileSet.find(@work) if @work.is_a?(String)
17
+ # if "work" context is a FileSet, not actual work, return it
18
+ return @work if @work.is_a? FileSet
19
+ # in most cases, get from work's members:
20
+ filesets = @work.members.select { |m| m.is_a? FileSet }
21
+ filesets.empty? ? nil : filesets[0]
22
+ end
23
+ end
24
+ end
25
+ end