iiif_print 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +102 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_work_indexer.rb +27 -0
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +45 -17
- data/{lib → app/jobs}/iiif_print/jobs/application_job.rb +2 -1
- data/app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb +153 -0
- data/app/jobs/iiif_print/jobs/create_relationships_job.rb +117 -0
- data/app/jobs/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/app/listeners/iiif_print/listener.rb +31 -0
- data/app/models/concerns/iiif_print/set_child_flag.rb +10 -1
- data/app/models/concerns/iiif_print/solr/document.rb +19 -3
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/file_set_presenter_decorator.rb +11 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +23 -11
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +90 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +8 -10
- data/app/services/iiif_print/simple_schema_loader_decorator.rb +11 -0
- data/app/transactions/hyrax/transactions/iiif_print_container_decorator.rb +34 -0
- data/app/transactions/hyrax/transactions/steps/conditionally_destroy_children_from_split.rb +32 -0
- data/app/transactions/hyrax/transactions/steps/delete_all_file_sets_decorator.rb +35 -0
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/initializers/simple_schema_loader.rb +1 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/metadata/child_works_from_pdf_splitting.yaml +21 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20181214181358_create_iiif_print_derivative_attachments.rb +8 -6
- data/db/migrate/20190107165909_create_iiif_print_ingest_file_relations.rb +7 -5
- data/db/migrate/20230109000000_create_iiif_print_pending_relationships.rb +8 -6
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +11 -10
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +14 -2
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +58 -6
- data/lib/iiif_print/catalog_search_builder.rb +7 -3
- data/lib/iiif_print/configuration.rb +205 -8
- data/lib/iiif_print/data/fileset_helper.rb +3 -3
- data/lib/iiif_print/data/work_derivatives.rb +4 -4
- data/lib/iiif_print/engine.rb +53 -15
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jp2_derivative_service.rb +4 -1
- data/lib/iiif_print/lineage_service.rb +47 -13
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/pdf_derivative_service.rb +3 -1
- data/lib/iiif_print/persistence_layer/active_fedora_adapter.rb +189 -0
- data/lib/iiif_print/persistence_layer/valkyrie_adapter.rb +183 -0
- data/lib/iiif_print/persistence_layer.rb +118 -0
- data/lib/iiif_print/split_pdfs/base_splitter.rb +153 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +83 -37
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +22 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/text_extraction_derivative_service.rb +4 -2
- data/lib/iiif_print/text_formats_from_alto_service.rb +3 -1
- data/lib/iiif_print/tiff_derivative_service.rb +3 -1
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +210 -20
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +111 -196
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/app/indexers/concerns/iiif_print/child_indexer.rb +0 -34
- data/app/views/hyrax/file_sets/_actions.html.erb +0 -45
- data/bin/rails +0 -13
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +0 -107
- data/lib/iiif_print/jobs/create_relationships_job.rb +0 -78
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/.keep.txt +0 -1
- data/spec/factories/ability.rb +0 -6
- data/spec/factories/newspaper_issue.rb +0 -7
- data/spec/factories/newspaper_page.rb +0 -7
- data/spec/factories/newspaper_page_solr_document.rb +0 -12
- data/spec/factories/newspaper_title.rb +0 -8
- data/spec/factories/uploaded_pdf_file.rb +0 -9
- data/spec/factories/uploaded_txt_file.rb +0 -9
- data/spec/factories/user.rb +0 -13
- data/spec/fixtures/files/4.1.07.jp2 +0 -0
- data/spec/fixtures/files/4.1.07.tiff +0 -0
- data/spec/fixtures/files/README.md +0 -7
- data/spec/fixtures/files/alto-2-0.xsd +0 -714
- data/spec/fixtures/files/broken-truncated.pdf +0 -0
- data/spec/fixtures/files/credits.md +0 -16
- data/spec/fixtures/files/lowres-gray-via-ndnp-sample.tiff +0 -0
- data/spec/fixtures/files/minimal-1-page.pdf +0 -0
- data/spec/fixtures/files/minimal-2-page.pdf +0 -0
- data/spec/fixtures/files/minimal-alto.xml +0 -31
- data/spec/fixtures/files/ndnp-alto-sample.xml +0 -24
- data/spec/fixtures/files/ndnp-sample1-json.json +0 -1
- data/spec/fixtures/files/ndnp-sample1-txt.txt +0 -1
- data/spec/fixtures/files/ndnp-sample1.pdf +0 -0
- data/spec/fixtures/files/ocr_alto.xml +0 -202
- data/spec/fixtures/files/ocr_alto_scaled_4pts_per_px.xml +0 -202
- data/spec/fixtures/files/ocr_color.tiff +0 -0
- data/spec/fixtures/files/ocr_gray.jp2 +0 -0
- data/spec/fixtures/files/ocr_gray.tiff +0 -0
- data/spec/fixtures/files/ocr_mono.tiff +0 -0
- data/spec/fixtures/files/ocr_mono_text_hocr.html +0 -78
- data/spec/fixtures/files/page1.tiff +0 -0
- data/spec/fixtures/files/sample-4page-issue.pdf +0 -0
- data/spec/fixtures/files/sample-color-newsletter.pdf +0 -0
- data/spec/fixtures/files/thumbnail.jpg +0 -0
- data/spec/helpers/hyrax/iiif_helper_spec.rb +0 -65
- data/spec/helpers/iiif_print_helper_spec.rb +0 -43
- data/spec/iiif_print/base_derivative_service_spec.rb +0 -11
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +0 -51
- data/spec/iiif_print/catalog_search_builder_spec.rb +0 -60
- data/spec/iiif_print/configuration_spec.rb +0 -67
- data/spec/iiif_print/data/work_derivatives_spec.rb +0 -245
- data/spec/iiif_print/data/work_file_spec.rb +0 -99
- data/spec/iiif_print/data/work_files_spec.rb +0 -237
- data/spec/iiif_print/image_tool_spec.rb +0 -109
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +0 -30
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +0 -17
- data/spec/iiif_print/jp2_image_metadata_spec.rb +0 -37
- data/spec/iiif_print/lineage_service_spec.rb +0 -13
- data/spec/iiif_print/metadata_spec.rb +0 -115
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
- data/spec/iiif_print/text_extraction/alto_reader_spec.rb +0 -49
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +0 -45
- data/spec/iiif_print/text_extraction/page_ocr_spec.rb +0 -84
- data/spec/iiif_print/text_extraction/render_alto_spec.rb +0 -54
- data/spec/iiif_print/text_extraction/word_coords_builder_spec.rb +0 -44
- data/spec/iiif_print_spec.rb +0 -51
- data/spec/misc_shared.rb +0 -111
- data/spec/models/iiif_print/derivative_attachment_spec.rb +0 -37
- data/spec/models/iiif_print/ingest_file_relation_spec.rb +0 -56
- data/spec/models/solr_document_spec.rb +0 -14
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +0 -19
- data/spec/presenters/iiif_print/iiif_manifest_presenter_factory_behavior_spec.rb +0 -49
- data/spec/services/iiif_print/jp2_derivative_service_spec.rb +0 -59
- data/spec/services/iiif_print/pdf_derivative_service_spec.rb +0 -66
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +0 -178
- data/spec/services/iiif_print/text_extraction_derivative_service_spec.rb +0 -82
- data/spec/services/iiif_print/text_formats_from_alto_service_spec.rb +0 -127
- data/spec/services/iiif_print/tiff_derivative_service_spec.rb +0 -65
- data/spec/spec_helper.rb +0 -181
- data/spec/support/controller_level_helpers.rb +0 -28
- data/spec/support/iiif_print_models.rb +0 -127
- data/spec/test_app_templates/blacklight.yml +0 -9
- data/spec/test_app_templates/fedora.yml +0 -15
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +0 -40
- data/spec/test_app_templates/redis.yml +0 -9
- data/spec/test_app_templates/solr/conf/schema.xml +0 -362
- data/spec/test_app_templates/solr/conf/solrconfig.xml +0 -322
- data/spec/test_app_templates/solr.yml +0 -7
@@ -1,107 +0,0 @@
|
|
1
|
-
module IiifPrint
|
2
|
-
module Jobs
|
3
|
-
class ChildWorksFromPdfJob < IiifPrint::Jobs::ApplicationJob
|
4
|
-
# Break a pdf into individual pages
|
5
|
-
# @param parent_work
|
6
|
-
# @param pdf_paths: [<Array => String>] paths to pdfs
|
7
|
-
# @param user: [User]
|
8
|
-
# @param admin_set_id: [<String>]
|
9
|
-
# @param prior_pdfs: [<Integer>] count of pdfs already on parent work
|
10
|
-
def perform(parent_work, pdf_paths, user, admin_set_id, prior_pdfs)
|
11
|
-
@parent_work = parent_work
|
12
|
-
@child_admin_set_id = admin_set_id
|
13
|
-
child_model = @parent_work.iiif_print_config.pdf_split_child_model
|
14
|
-
|
15
|
-
# handle each input pdf
|
16
|
-
pdf_paths.each_with_index do |path, pdf_idx|
|
17
|
-
split_pdf(path, pdf_idx, user, prior_pdfs, child_model)
|
18
|
-
end
|
19
|
-
|
20
|
-
# Link newly created child works to the parent
|
21
|
-
# @param user: [User] user
|
22
|
-
# @param parent_id: [<String>] parent work id
|
23
|
-
# @param parent_model: [<String>] parent model
|
24
|
-
# @param child_model: [<String>] child model
|
25
|
-
IiifPrint::Jobs::CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
26
|
-
user: user,
|
27
|
-
parent_id: @parent_work.id,
|
28
|
-
parent_model: @parent_work.class.to_s,
|
29
|
-
child_model: child_model.to_s
|
30
|
-
)
|
31
|
-
|
32
|
-
# TODO: clean up image_files and pdf_paths
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
def split_pdf(path, pdf_idx, user, prior_pdfs_count, child_model)
|
38
|
-
image_files = @parent_work.iiif_print_config.pdf_splitter_service.new(path).to_a
|
39
|
-
return if image_files.blank?
|
40
|
-
|
41
|
-
pdf_sequence = pdf_idx + prior_pdfs_count
|
42
|
-
prepare_import_data(pdf_sequence, image_files, user)
|
43
|
-
|
44
|
-
# submit the job to create all the child works for one PDF
|
45
|
-
# @param [User] user
|
46
|
-
# @param [Hash<String => String>] titles
|
47
|
-
# @param [Hash<String => String>] resource_types (optional)
|
48
|
-
# @param [Array<String>] uploaded_files Hyrax::UploadedFile IDs
|
49
|
-
# @param [Hash] attributes attributes to apply to all works, including :model
|
50
|
-
# @param [Hyrax::BatchCreateOperation] operation
|
51
|
-
operation = Hyrax::BatchCreateOperation.create!(
|
52
|
-
user: user,
|
53
|
-
operation_type: "PDF Batch Create"
|
54
|
-
)
|
55
|
-
BatchCreateJob.perform_later(user,
|
56
|
-
@child_work_titles,
|
57
|
-
{},
|
58
|
-
@uploaded_files,
|
59
|
-
attributes.merge!(model: child_model.to_s).with_indifferent_access,
|
60
|
-
operation)
|
61
|
-
end
|
62
|
-
|
63
|
-
def prepare_import_data(pdf_sequence, image_files, user)
|
64
|
-
@uploaded_files = []
|
65
|
-
@child_work_titles = {}
|
66
|
-
image_files.each_with_index do |image_path, idx|
|
67
|
-
file_id = create_uploaded_file(user, image_path).to_s
|
68
|
-
file_title = set_title(@parent_work.title.first, pdf_sequence, idx)
|
69
|
-
@uploaded_files << file_id
|
70
|
-
@child_work_titles[file_id] = file_title
|
71
|
-
# save child work info to create the member relationships
|
72
|
-
PendingRelationship.create!(child_title: file_title,
|
73
|
-
parent_id: @parent_work.id,
|
74
|
-
child_order: sort_order(pdf_sequence, idx))
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def sort_order(pdf_sequence, idx)
|
79
|
-
"#{pdf_sequence} #{idx}"
|
80
|
-
end
|
81
|
-
|
82
|
-
def create_uploaded_file(user, path)
|
83
|
-
uf = Hyrax::UploadedFile.new
|
84
|
-
uf.user_id = user.id
|
85
|
-
uf.file = CarrierWave::SanitizedFile.new(path)
|
86
|
-
uf.save!
|
87
|
-
uf.id
|
88
|
-
end
|
89
|
-
|
90
|
-
def set_title(title, pdf_sequence, idx)
|
91
|
-
pdf_index = "Pdf Nbr #{pdf_sequence + 1}"
|
92
|
-
page_number = "Page #{idx + 1}"
|
93
|
-
"#{title}: #{pdf_index}, #{page_number}"
|
94
|
-
end
|
95
|
-
|
96
|
-
# TODO: what attributes do we need to fill in from the parent work? What about AllinsonFlex?
|
97
|
-
def attributes
|
98
|
-
{
|
99
|
-
admin_set_id: @child_admin_set_id.to_s,
|
100
|
-
creator: @parent_work.creator.to_a,
|
101
|
-
rights_statement: @parent_work.rights_statement.to_a,
|
102
|
-
visibility: @parent_work.visibility.to_s
|
103
|
-
}
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
@@ -1,78 +0,0 @@
|
|
1
|
-
module IiifPrint
|
2
|
-
module Jobs
|
3
|
-
# Break a pdf into individual pages
|
4
|
-
class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob
|
5
|
-
# Link newly created child works to the parent
|
6
|
-
# @param user: [User] user
|
7
|
-
# @param parent_id: [<String>] parent work id
|
8
|
-
# @param parent_model: [<String>] parent model
|
9
|
-
# @param child_model: [<String>] child model
|
10
|
-
def perform(user:, parent_id:, parent_model:, child_model:)
|
11
|
-
if completed_child_data_for(parent_id, child_model)
|
12
|
-
# add the members
|
13
|
-
parent_work = parent_model.constantize.find(parent_id)
|
14
|
-
create_relationships(user: user, parent: parent_work, ordered_children: @child_works)
|
15
|
-
@pending_children.each(&:destroy)
|
16
|
-
else
|
17
|
-
# reschedule the job and end this one normally
|
18
|
-
#
|
19
|
-
# TODO: Depending on how things shake out, we could be infinitely rescheduling this job.
|
20
|
-
# Consider a time to live parameter.
|
21
|
-
reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
private
|
26
|
-
|
27
|
-
# load @child_works, and return true or false
|
28
|
-
def completed_child_data_for(parent_id, child_model)
|
29
|
-
@child_works = []
|
30
|
-
found_all_children = true
|
31
|
-
|
32
|
-
# find and sequence all pending children
|
33
|
-
@pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc')
|
34
|
-
|
35
|
-
# find child works (skip out if any haven't yet been created)
|
36
|
-
@pending_children.each do |child|
|
37
|
-
# find by title... if any aren't found, the child works are not yet ready
|
38
|
-
found_children = find_children_by_title_for(child.child_title, child_model)
|
39
|
-
found_all_children = false if found_children.empty?
|
40
|
-
break unless found_all_children == true
|
41
|
-
@child_works += found_children
|
42
|
-
end
|
43
|
-
# return boolean
|
44
|
-
found_all_children
|
45
|
-
end
|
46
|
-
|
47
|
-
def find_children_by_title_for(title, model)
|
48
|
-
# We should only find one, but there is no guarantee of that and `:where` returns an array.
|
49
|
-
model.constantize.where(title: title)
|
50
|
-
end
|
51
|
-
|
52
|
-
def reschedule(user:, parent_id:, parent_model:, child_model:)
|
53
|
-
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
54
|
-
user: user,
|
55
|
-
parent_id: parent_id,
|
56
|
-
parent_model: parent_model,
|
57
|
-
child_model: child_model
|
58
|
-
)
|
59
|
-
end
|
60
|
-
|
61
|
-
def create_relationships(user:, parent:, ordered_children:)
|
62
|
-
records_hash = {}
|
63
|
-
ordered_children.map(&:id).each_with_index do |child_id, i|
|
64
|
-
records_hash[i.to_s] = { id: child_id }
|
65
|
-
end
|
66
|
-
attrs = { work_members_attributes: records_hash }
|
67
|
-
parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
68
|
-
env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs)
|
69
|
-
|
70
|
-
Hyrax::CurationConcern.actor.update(env)
|
71
|
-
# need to reindex all file_sets to make all ancestors are indexed
|
72
|
-
ordered_children.each do |child_work|
|
73
|
-
child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
@@ -1,130 +0,0 @@
|
|
1
|
-
require 'open3'
|
2
|
-
require 'securerandom'
|
3
|
-
require 'tmpdir'
|
4
|
-
require 'iiif_print/split_pdfs/pdf_image_extraction_service'
|
5
|
-
|
6
|
-
module IiifPrint
|
7
|
-
module SplitPdfs
|
8
|
-
class PagesIntoImagesService
|
9
|
-
include Enumerable
|
10
|
-
|
11
|
-
def initialize(path)
|
12
|
-
@baseid = SecureRandom.uuid
|
13
|
-
@pdfpath = path
|
14
|
-
@info = nil
|
15
|
-
@entries = nil
|
16
|
-
@tmpdir = nil
|
17
|
-
@size = nil
|
18
|
-
@pagecount = nil
|
19
|
-
@pdftext = nil
|
20
|
-
@compression = 'lzw'
|
21
|
-
end
|
22
|
-
|
23
|
-
# return
|
24
|
-
def pdfinfo
|
25
|
-
@info = IiifPrint::SplitPdfs::PdfImageExtractionService.new(@pdfpath) if @info.nil?
|
26
|
-
@info
|
27
|
-
end
|
28
|
-
|
29
|
-
# TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.
|
30
|
-
def invalid_pdf?
|
31
|
-
return true if pdfinfo.color.include?(nil) || pdfinfo.width.nil? || pdfinfo.height.nil? || pdfinfo.entries.length.zero?
|
32
|
-
false
|
33
|
-
end
|
34
|
-
|
35
|
-
def tmpdir
|
36
|
-
@tmpdir = Dir.mktmpdir if @tmpdir.nil?
|
37
|
-
@tmpdir
|
38
|
-
end
|
39
|
-
|
40
|
-
def colordevice(channels, bpc)
|
41
|
-
bits = bpc * channels
|
42
|
-
# will be either 8bpc/16bpd color TIFF,
|
43
|
-
# with any CMYK source transformed to 8bpc RBG
|
44
|
-
bits = 24 unless [24, 48].include? bits
|
45
|
-
"tiff#{bits}nc"
|
46
|
-
end
|
47
|
-
|
48
|
-
def gsdevice
|
49
|
-
color, channels, bpc = pdfinfo.color
|
50
|
-
device = nil
|
51
|
-
# CCITT Group 4 Black and White, if applicable:
|
52
|
-
if color == 'gray' && bpc == 1
|
53
|
-
device = 'tiffg4'
|
54
|
-
@compression = 'g4'
|
55
|
-
end
|
56
|
-
# 8 Bit Grayscale, if applicable:
|
57
|
-
device = 'tiffgray' if color == 'gray' && bpc > 1
|
58
|
-
# otherwise color:
|
59
|
-
device = colordevice(channels, bpc) if device.nil?
|
60
|
-
device
|
61
|
-
end
|
62
|
-
|
63
|
-
# TODO: this method came from newspaper gem but appears to be unused. Is it needed anywhere?
|
64
|
-
# def gstext
|
65
|
-
# cmd = "gs -q -dNOPAUSE -dBATCH -sDEVICE=txtwrite " \
|
66
|
-
# "-sOutputFile=- -f #{@pdfpath}"
|
67
|
-
# Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
68
|
-
# @pdftext = stdout.read
|
69
|
-
# end
|
70
|
-
# @pdftext
|
71
|
-
# end
|
72
|
-
|
73
|
-
def pagecount
|
74
|
-
cmd = "pdfinfo #{@pdfpath}"
|
75
|
-
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
76
|
-
output = stdout.read.split("\n")
|
77
|
-
# rubocop:disable Performance/Detect
|
78
|
-
pages_e = output.select { |e| e.start_with?('Pages:') }[0]
|
79
|
-
# rubocop:enable Performance/Detect
|
80
|
-
@pagecount = pages_e.split[-1].to_i
|
81
|
-
end
|
82
|
-
@pagecount
|
83
|
-
end
|
84
|
-
|
85
|
-
def looks_scanned
|
86
|
-
max_image_px = pdfinfo.width * pdfinfo.height
|
87
|
-
single_image_per_page = pdfinfo.entries.length == pagecount
|
88
|
-
# single 10mp+ image per page?
|
89
|
-
single_image_per_page && max_image_px > 1024 * 1024 * 10
|
90
|
-
end
|
91
|
-
|
92
|
-
def ppi
|
93
|
-
unless looks_scanned
|
94
|
-
# 400 dpi for something that does not look like scanned media:
|
95
|
-
return 400
|
96
|
-
end
|
97
|
-
# For scanned media, defer to detected image PPI:
|
98
|
-
pdfinfo.ppi
|
99
|
-
end
|
100
|
-
|
101
|
-
# ghostscript convert all pages to TIFF
|
102
|
-
def gsconvert
|
103
|
-
output_base = File.join(tmpdir, "#{@baseid}-page%d.tiff")
|
104
|
-
cmd = "gs -dNOPAUSE -dBATCH -sDEVICE=#{gsdevice} " \
|
105
|
-
"-dTextAlphaBits=4 -sCompression=#{@compression} " \
|
106
|
-
"-sOutputFile=#{output_base} -r#{ppi} -f #{@pdfpath}"
|
107
|
-
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
108
|
-
output = stdout.read.split("\n")
|
109
|
-
# rubocop:disable Performance/Count
|
110
|
-
@size = output.select { |e| e.start_with?('Page ') }.length
|
111
|
-
# rubocop:enable Performance/Count
|
112
|
-
end
|
113
|
-
# Return an array of expected filenames
|
114
|
-
(1..@size).map { |n| File.join(tmpdir, "#{@baseid}-page#{n}.tiff") }
|
115
|
-
end
|
116
|
-
|
117
|
-
# entries for each page
|
118
|
-
def entries
|
119
|
-
@entries = gsconvert if @entries.nil?
|
120
|
-
@entries
|
121
|
-
end
|
122
|
-
|
123
|
-
def each
|
124
|
-
entries.each do |e|
|
125
|
-
yield(e)
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
data/spec/.keep.txt
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
spec dir for RSpec
|
data/spec/factories/ability.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
FactoryBot.define do
|
2
|
-
factory :newspaper_page_solr_document, class: SolrDocument do
|
3
|
-
initialize_with do
|
4
|
-
new(id: '123456',
|
5
|
-
title_tesim: ['Page 1'],
|
6
|
-
has_model_ssim: ['NewspaperPage'],
|
7
|
-
issue_id_ssi: 'abc123',
|
8
|
-
file_set_ids_ssim: ['7891011'],
|
9
|
-
thumbnail_path_ss: '/downloads/123456?file=thumbnail')
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
@@ -1,9 +0,0 @@
|
|
1
|
-
FactoryBot.define do
|
2
|
-
factory :uploaded_pdf_file, class: Hyrax::UploadedFile do
|
3
|
-
initialize_with do
|
4
|
-
base = File.join(IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files')
|
5
|
-
pdf_path = File.join(base, 'minimal-2-page.pdf')
|
6
|
-
new(file: File.open(pdf_path), user: create(:user))
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
@@ -1,9 +0,0 @@
|
|
1
|
-
FactoryBot.define do
|
2
|
-
factory :uploaded_txt_file, class: Hyrax::UploadedFile do
|
3
|
-
initialize_with do
|
4
|
-
base = File.join(IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files')
|
5
|
-
file_path = File.join(base, 'ndnp-sample1-txt.txt')
|
6
|
-
new(file: File.open(file_path), user: create(:user))
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
data/spec/factories/user.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
FactoryBot.define do
|
2
|
-
factory :user do
|
3
|
-
id { "skroob" }
|
4
|
-
email { "spaceballs@example.com" }
|
5
|
-
password { "password_is_12345" }
|
6
|
-
initialize_with do
|
7
|
-
User.find_or_create_by(id: id) do |user|
|
8
|
-
user.email = email
|
9
|
-
user.password = password
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
Binary file
|
Binary file
|