iiif_print 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  4. data/.github/workflows/build-lint-test-action.yaml +4 -5
  5. data/.gitignore +5 -4
  6. data/.rubocop.yml +1 -0
  7. data/.solargraph.yml +19 -0
  8. data/Gemfile.lock +1025 -0
  9. data/README.md +102 -9
  10. data/Rakefile +6 -0
  11. data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
  12. data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
  13. data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
  14. data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
  15. data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
  16. data/app/helpers/iiif_print_helper.rb +0 -20
  17. data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
  18. data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
  19. data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
  20. data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
  21. data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
  22. data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
  23. data/app/listeners/iiif_print/listener.rb +31 -0
  24. data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
  25. data/app/models/concerns/iiif_print/solr/document.rb +19 -3
  26. data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
  27. data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
  28. data/app/models/iiif_print/pending_relationship.rb +3 -0
  29. data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
  30. data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
  31. data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
  32. data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
  33. data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
  34. data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
  35. data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
  36. data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
  37. data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
  38. data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
  39. data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
  40. data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
  41. data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
  42. data/app/views/catalog/_index_header_list_default.html.erb +13 -0
  43. data/app/views/hyrax/base/_representative_media.html.erb +4 -3
  44. data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
  45. data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
  46. data/config/initializers/simple_schema_loader.rb +1 -0
  47. data/config/locales/iiif_print.en.yml +4 -0
  48. data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
  49. data/config/routes.rb +3 -0
  50. data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
  51. data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
  52. data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
  53. data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
  54. data/docker-compose.yml +2 -2
  55. data/iiif_print.gemspec +11 -10
  56. data/lib/generators/iiif_print/install_generator.rb +21 -1
  57. data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
  58. data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
  59. data/lib/iiif_print/base_derivative_service.rb +14 -2
  60. data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
  61. data/lib/iiif_print/catalog_search_builder.rb +7 -3
  62. data/lib/iiif_print/configuration.rb +205 -8
  63. data/lib/iiif_print/data/fileset_helper.rb +3 -3
  64. data/lib/iiif_print/data/work_derivatives.rb +4 -4
  65. data/lib/iiif_print/engine.rb +53 -15
  66. data/lib/iiif_print/errors.rb +18 -0
  67. data/lib/iiif_print/homepage_search_builder.rb +17 -0
  68. data/lib/iiif_print/image_tool.rb +12 -8
  69. data/lib/iiif_print/jp2_derivative_service.rb +4 -1
  70. data/lib/iiif_print/lineage_service.rb +47 -13
  71. data/lib/iiif_print/metadata.rb +67 -48
  72. data/lib/iiif_print/pdf_derivative_service.rb +3 -1
  73. data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
  74. data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
  75. data/lib/iiif_print/persistence_layer.rb +118 -0
  76. data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
  77. data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
  78. data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
  79. data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
  80. data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
  81. data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
  82. data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
  83. data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
  84. data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
  85. data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
  86. data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
  87. data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
  88. data/lib/iiif_print/tiff_derivative_service.rb +3 -1
  89. data/lib/iiif_print/version.rb +1 -1
  90. data/lib/iiif_print.rb +210 -20
  91. data/lib/samvera/derivatives/configuration.rb +83 -0
  92. data/lib/samvera/derivatives/hyrax.rb +129 -0
  93. data/lib/samvera/derivatives.rb +238 -0
  94. data/tasks/copy_authorities_to_test_app.rake +11 -0
  95. data/tasks/iiif_print_dev.rake +4 -4
  96. metadata +111 -196
  97. data/app/helpers/hyrax/iiif_helper.rb +0 -22
  98. data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
  99. data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
  100. data/bin/rails +0 -13
  101. data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
  102. data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
  103. data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
  104. data/spec/.keep.txt +0 -1
  105. data/spec/factories/ability.rb +0 -6
  106. data/spec/factories/newspaper_issue.rb +0 -7
  107. data/spec/factories/newspaper_page.rb +0 -7
  108. data/spec/factories/newspaper_page_solr_document.rb +0 -12
  109. data/spec/factories/newspaper_title.rb +0 -8
  110. data/spec/factories/uploaded_pdf_file.rb +0 -9
  111. data/spec/factories/uploaded_txt_file.rb +0 -9
  112. data/spec/factories/user.rb +0 -13
  113. data/spec/fixtures/files/4.1.07.jp2 +0 -0
  114. data/spec/fixtures/files/4.1.07.tiff +0 -0
  115. data/spec/fixtures/files/README.md +0 -7
  116. data/spec/fixtures/files/alto-2-0.xsd +0 -714
  117. data/spec/fixtures/files/broken-truncated.pdf +0 -0
  118. data/spec/fixtures/files/credits.md +0 -16
  119. data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
  120. data/spec/fixtures/files/minimal-1-page.pdf +0 -0
  121. data/spec/fixtures/files/minimal-2-page.pdf +0 -0
  122. data/spec/fixtures/files/minimal-alto.xml +0 -31
  123. data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
  124. data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
  125. data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
  126. data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
  127. data/spec/fixtures/files/ocr_alto.xml +0 -202
  128. data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
  129. data/spec/fixtures/files/ocr_color.tiff +0 -0
  130. data/spec/fixtures/files/ocr_gray.jp2 +0 -0
  131. data/spec/fixtures/files/ocr_gray.tiff +0 -0
  132. data/spec/fixtures/files/ocr_mono.tiff +0 -0
  133. data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
  134. data/spec/fixtures/files/page1.tiff +0 -0
  135. data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
  136. data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
  137. data/spec/fixtures/files/thumbnail.jpg +0 -0
  138. data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
  139. data/spec/helpers/iiif_print_helper_spec.rb +0 -43
  140. data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
  141. data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
  142. data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
  143. data/spec/iiif_print/configuration_spec.rb +0 -67
  144. data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
  145. data/spec/iiif_print/data/work_file_spec.rb +0 -99
  146. data/spec/iiif_print/data/work_files_spec.rb +0 -237
  147. data/spec/iiif_print/image_tool_spec.rb +0 -109
  148. data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
  149. data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
  150. data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
  151. data/spec/iiif_print/lineage_service_spec.rb +0 -13
  152. data/spec/iiif_print/metadata_spec.rb +0 -115
  153. data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
  154. data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
  155. data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
  156. data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
  157. data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
  158. data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
  159. data/spec/iiif_print_spec.rb +0 -51
  160. data/spec/misc_shared.rb +0 -111
  161. data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
  162. data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
  163. data/spec/models/solr_document_spec.rb +0 -14
  164. data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
  165. data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
  166. data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
  167. data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
  168. data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
  169. data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
  170. data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
  171. data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
  172. data/spec/spec_helper.rb +0 -181
  173. data/spec/support/controller_level_helpers.rb +0 -28
  174. data/spec/support/iiif_print_models.rb +0 -127
  175. data/spec/test_app_templates/blacklight.yml +0 -9
  176. data/spec/test_app_templates/fedora.yml +0 -15
  177. data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
  178. data/spec/test_app_templates/redis.yml +0 -9
  179. data/spec/test_app_templates/solr/conf/schema.xml +0 -362
  180. data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
  181. data/spec/test_app_templates/solr.yml +0 -7
@@ -1,107 +0,0 @@
1
- module IiifPrint
2
- module Jobs
3
- class ChildWorksFromPdfJob < IiifPrint::Jobs::ApplicationJob
4
- # Break a pdf into individual pages
5
- # @param parent_work
6
- # @param pdf_paths: [<Array => String>] paths to pdfs
7
- # @param user: [User]
8
- # @param admin_set_id: [<String>]
9
- # @param prior_pdfs: [<Integer>] count of pdfs already on parent work
10
- def perform(parent_work, pdf_paths, user, admin_set_id, prior_pdfs)
11
- @parent_work = parent_work
12
- @child_admin_set_id = admin_set_id
13
- child_model = @parent_work.iiif_print_config.pdf_split_child_model
14
-
15
- # handle each input pdf
16
- pdf_paths.each_with_index do |path, pdf_idx|
17
- split_pdf(path, pdf_idx, user, prior_pdfs, child_model)
18
- end
19
-
20
- # Link newly created child works to the parent
21
- # @param user: [User] user
22
- # @param parent_id: [<String>] parent work id
23
- # @param parent_model: [<String>] parent model
24
- # @param child_model: [<String>] child model
25
- IiifPrint::Jobs::CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
26
- user: user,
27
- parent_id: @parent_work.id,
28
- parent_model: @parent_work.class.to_s,
29
- child_model: child_model.to_s
30
- )
31
-
32
- # TODO: clean up image_files and pdf_paths
33
- end
34
-
35
- private
36
-
37
- def split_pdf(path, pdf_idx, user, prior_pdfs_count, child_model)
38
- image_files = @parent_work.iiif_print_config.pdf_splitter_service.new(path).to_a
39
- return if image_files.blank?
40
-
41
- pdf_sequence = pdf_idx + prior_pdfs_count
42
- prepare_import_data(pdf_sequence, image_files, user)
43
-
44
- # submit the job to create all the child works for one PDF
45
- # @param [User] user
46
- # @param [Hash<String => String>] titles
47
- # @param [Hash<String => String>] resource_types (optional)
48
- # @param [Array<String>] uploaded_files Hyrax::UploadedFile IDs
49
- # @param [Hash] attributes attributes to apply to all works, including :model
50
- # @param [Hyrax::BatchCreateOperation] operation
51
- operation = Hyrax::BatchCreateOperation.create!(
52
- user: user,
53
- operation_type: "PDF Batch Create"
54
- )
55
- BatchCreateJob.perform_later(user,
56
- @child_work_titles,
57
- {},
58
- @uploaded_files,
59
- attributes.merge!(model: child_model.to_s).with_indifferent_access,
60
- operation)
61
- end
62
-
63
- def prepare_import_data(pdf_sequence, image_files, user)
64
- @uploaded_files = []
65
- @child_work_titles = {}
66
- image_files.each_with_index do |image_path, idx|
67
- file_id = create_uploaded_file(user, image_path).to_s
68
- file_title = set_title(@parent_work.title.first, pdf_sequence, idx)
69
- @uploaded_files << file_id
70
- @child_work_titles[file_id] = file_title
71
- # save child work info to create the member relationships
72
- PendingRelationship.create!(child_title: file_title,
73
- parent_id: @parent_work.id,
74
- child_order: sort_order(pdf_sequence, idx))
75
- end
76
- end
77
-
78
- def sort_order(pdf_sequence, idx)
79
- "#{pdf_sequence} #{idx}"
80
- end
81
-
82
- def create_uploaded_file(user, path)
83
- uf = Hyrax::UploadedFile.new
84
- uf.user_id = user.id
85
- uf.file = CarrierWave::SanitizedFile.new(path)
86
- uf.save!
87
- uf.id
88
- end
89
-
90
- def set_title(title, pdf_sequence, idx)
91
- pdf_index = "Pdf Nbr #{pdf_sequence + 1}"
92
- page_number = "Page #{idx + 1}"
93
- "#{title}: #{pdf_index}, #{page_number}"
94
- end
95
-
96
- # TODO: what attributes do we need to fill in from the parent work? What about AllinsonFlex?
97
- def attributes
98
- {
99
- admin_set_id: @child_admin_set_id.to_s,
100
- creator: @parent_work.creator.to_a,
101
- rights_statement: @parent_work.rights_statement.to_a,
102
- visibility: @parent_work.visibility.to_s
103
- }
104
- end
105
- end
106
- end
107
- end
@@ -1,78 +0,0 @@
1
- module IiifPrint
2
- module Jobs
3
- # Break a pdf into individual pages
4
- class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob
5
- # Link newly created child works to the parent
6
- # @param user: [User] user
7
- # @param parent_id: [<String>] parent work id
8
- # @param parent_model: [<String>] parent model
9
- # @param child_model: [<String>] child model
10
- def perform(user:, parent_id:, parent_model:, child_model:)
11
- if completed_child_data_for(parent_id, child_model)
12
- # add the members
13
- parent_work = parent_model.constantize.find(parent_id)
14
- create_relationships(user: user, parent: parent_work, ordered_children: @child_works)
15
- @pending_children.each(&:destroy)
16
- else
17
- # reschedule the job and end this one normally
18
- #
19
- # TODO: Depending on how things shake out, we could be infinitely rescheduling this job.
20
- # Consider a time to live parameter.
21
- reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model)
22
- end
23
- end
24
-
25
- private
26
-
27
- # load @child_works, and return true or false
28
- def completed_child_data_for(parent_id, child_model)
29
- @child_works = []
30
- found_all_children = true
31
-
32
- # find and sequence all pending children
33
- @pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc')
34
-
35
- # find child works (skip out if any haven't yet been created)
36
- @pending_children.each do |child|
37
- # find by title... if any aren't found, the child works are not yet ready
38
- found_children = find_children_by_title_for(child.child_title, child_model)
39
- found_all_children = false if found_children.empty?
40
- break unless found_all_children == true
41
- @child_works += found_children
42
- end
43
- # return boolean
44
- found_all_children
45
- end
46
-
47
- def find_children_by_title_for(title, model)
48
- # We should only find one, but there is no guarantee of that and `:where` returns an array.
49
- model.constantize.where(title: title)
50
- end
51
-
52
- def reschedule(user:, parent_id:, parent_model:, child_model:)
53
- CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
54
- user: user,
55
- parent_id: parent_id,
56
- parent_model: parent_model,
57
- child_model: child_model
58
- )
59
- end
60
-
61
- def create_relationships(user:, parent:, ordered_children:)
62
- records_hash = {}
63
- ordered_children.map(&:id).each_with_index do |child_id, i|
64
- records_hash[i.to_s] = { id: child_id }
65
- end
66
- attrs = { work_members_attributes: records_hash }
67
- parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
68
- env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs)
69
-
70
- Hyrax::CurationConcern.actor.update(env)
71
- # need to reindex all file_sets to make all ancestors are indexed
72
- ordered_children.each do |child_work|
73
- child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets)
74
- end
75
- end
76
- end
77
- end
78
- end
@@ -1,130 +0,0 @@
1
- require 'open3'
2
- require 'securerandom'
3
- require 'tmpdir'
4
- require 'iiif_print/split_pdfs/pdf_image_extraction_service'
5
-
6
- module IiifPrint
7
- module SplitPdfs
8
- class PagesIntoImagesService
9
- include Enumerable
10
-
11
- def initialize(path)
12
- @baseid = SecureRandom.uuid
13
- @pdfpath = path
14
- @info = nil
15
- @entries = nil
16
- @tmpdir = nil
17
- @size = nil
18
- @pagecount = nil
19
- @pdftext = nil
20
- @compression = 'lzw'
21
- end
22
-
23
- # return
24
- def pdfinfo
25
- @info = IiifPrint::SplitPdfs::PdfImageExtractionService.new(@pdfpath) if @info.nil?
26
- @info
27
- end
28
-
29
- # TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.
30
- def invalid_pdf?
31
- return true if pdfinfo.color.include?(nil) || pdfinfo.width.nil? || pdfinfo.height.nil? || pdfinfo.entries.length.zero?
32
- false
33
- end
34
-
35
- def tmpdir
36
- @tmpdir = Dir.mktmpdir if @tmpdir.nil?
37
- @tmpdir
38
- end
39
-
40
- def colordevice(channels, bpc)
41
- bits = bpc * channels
42
- # will be either 8bpc/16bpd color TIFF,
43
- # with any CMYK source transformed to 8bpc RBG
44
- bits = 24 unless [24, 48].include? bits
45
- "tiff#{bits}nc"
46
- end
47
-
48
- def gsdevice
49
- color, channels, bpc = pdfinfo.color
50
- device = nil
51
- # CCITT Group 4 Black and White, if applicable:
52
- if color == 'gray' && bpc == 1
53
- device = 'tiffg4'
54
- @compression = 'g4'
55
- end
56
- # 8 Bit Grayscale, if applicable:
57
- device = 'tiffgray' if color == 'gray' && bpc > 1
58
- # otherwise color:
59
- device = colordevice(channels, bpc) if device.nil?
60
- device
61
- end
62
-
63
- # TODO: this method came from newspaper gem but appears to be unused. Is it needed anywhere?
64
- # def gstext
65
- # cmd = "gs -q -dNOPAUSE -dBATCH -sDEVICE=txtwrite " \
66
- # "-sOutputFile=- -f #{@pdfpath}"
67
- # Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
68
- # @pdftext = stdout.read
69
- # end
70
- # @pdftext
71
- # end
72
-
73
- def pagecount
74
- cmd = "pdfinfo #{@pdfpath}"
75
- Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
76
- output = stdout.read.split("\n")
77
- # rubocop:disable Performance/Detect
78
- pages_e = output.select { |e| e.start_with?('Pages:') }[0]
79
- # rubocop:enable Performance/Detect
80
- @pagecount = pages_e.split[-1].to_i
81
- end
82
- @pagecount
83
- end
84
-
85
- def looks_scanned
86
- max_image_px = pdfinfo.width * pdfinfo.height
87
- single_image_per_page = pdfinfo.entries.length == pagecount
88
- # single 10mp+ image per page?
89
- single_image_per_page && max_image_px > 1024 * 1024 * 10
90
- end
91
-
92
- def ppi
93
- unless looks_scanned
94
- # 400 dpi for something that does not look like scanned media:
95
- return 400
96
- end
97
- # For scanned media, defer to detected image PPI:
98
- pdfinfo.ppi
99
- end
100
-
101
- # ghostscript convert all pages to TIFF
102
- def gsconvert
103
- output_base = File.join(tmpdir, "#{@baseid}-page%d.tiff")
104
- cmd = "gs -dNOPAUSE -dBATCH -sDEVICE=#{gsdevice} " \
105
- "-dTextAlphaBits=4 -sCompression=#{@compression} " \
106
- "-sOutputFile=#{output_base} -r#{ppi} -f #{@pdfpath}"
107
- Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
108
- output = stdout.read.split("\n")
109
- # rubocop:disable Performance/Count
110
- @size = output.select { |e| e.start_with?('Page ') }.length
111
- # rubocop:enable Performance/Count
112
- end
113
- # Return an array of expected filenames
114
- (1..@size).map { |n| File.join(tmpdir, "#{@baseid}-page#{n}.tiff") }
115
- end
116
-
117
- # entries for each page
118
- def entries
119
- @entries = gsconvert if @entries.nil?
120
- @entries
121
- end
122
-
123
- def each
124
- entries.each do |e|
125
- yield(e)
126
- end
127
- end
128
- end
129
- end
130
- end
data/spec/.keep.txt DELETED
@@ -1 +0,0 @@
1
- spec dir for RSpec
@@ -1,6 +0,0 @@
1
- FactoryBot.define do
2
- factory :ability do
3
- user
4
- initialize_with { new(user) }
5
- end
6
- end
@@ -1,7 +0,0 @@
1
- # will infer, create a NewspaperIssue object
2
- FactoryBot.define do
3
- factory :newspaper_issue do
4
- title { ['Here and There'] }
5
- depositor { User.batch_user.user_key }
6
- end
7
- end
@@ -1,7 +0,0 @@
1
- # will infer, create a NewspaperPage object
2
- FactoryBot.define do
3
- factory :newspaper_page do
4
- title { ['Here and There'] }
5
- depositor { User.batch_user.user_key }
6
- end
7
- end
@@ -1,12 +0,0 @@
1
- FactoryBot.define do
2
- factory :newspaper_page_solr_document, class: SolrDocument do
3
- initialize_with do
4
- new(id: '123456',
5
- title_tesim: ['Page 1'],
6
- has_model_ssim: ['NewspaperPage'],
7
- issue_id_ssi: 'abc123',
8
- file_set_ids_ssim: ['7891011'],
9
- thumbnail_path_ss: '/downloads/123456?file=thumbnail')
10
- end
11
- end
12
- end
@@ -1,8 +0,0 @@
1
- # will infer, create a NewspaperTitle object
2
- FactoryBot.define do
3
- factory :newspaper_title do
4
- title { ['ACME Press'] }
5
- lccn { 'sn2036999999' }
6
- depositor { User.batch_user.user_key }
7
- end
8
- end
@@ -1,9 +0,0 @@
1
- FactoryBot.define do
2
- factory :uploaded_pdf_file, class: Hyrax::UploadedFile do
3
- initialize_with do
4
- base = File.join(IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files')
5
- pdf_path = File.join(base, 'minimal-2-page.pdf')
6
- new(file: File.open(pdf_path), user: create(:user))
7
- end
8
- end
9
- end
@@ -1,9 +0,0 @@
1
- FactoryBot.define do
2
- factory :uploaded_txt_file, class: Hyrax::UploadedFile do
3
- initialize_with do
4
- base = File.join(IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files')
5
- file_path = File.join(base, 'ndnp-sample1-txt.txt')
6
- new(file: File.open(file_path), user: create(:user))
7
- end
8
- end
9
- end
@@ -1,13 +0,0 @@
1
- FactoryBot.define do
2
- factory :user do
3
- id { "skroob" }
4
- email { "spaceballs@example.com" }
5
- password { "password_is_12345" }
6
- initialize_with do
7
- User.find_or_create_by(id: id) do |user|
8
- user.email = email
9
- user.password = password
10
- end
11
- end
12
- end
13
- end
Binary file
Binary file
@@ -1,7 +0,0 @@
1
- ## Sample / fixture file manifest
2
-
3
- * sample-4page-issue.pdf
4
- - original: chicopee-weekly-journal_1856-05-24-p1.pdf
5
- * page1.tiff
6
- - first page from sample-4page-issue.pdf, converted to CCITT Group 4
7
- encoded (lossless compressed) TIFF via Ghostscript.