iiif_print 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/build-lint-test-action.yaml +4 -5
- data/.gitignore +5 -4
- data/.rubocop.yml +1 -0
- data/.solargraph.yml +19 -0
- data/Gemfile.lock +1025 -0
- data/README.md +98 -9
- data/Rakefile +6 -0
- data/app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb +24 -0
- data/app/actors/iiif_print/actors/file_set_actor_decorator.rb +30 -28
- data/app/controllers/iiif_print/split_pdfs_controller.rb +38 -0
- data/app/helpers/iiif_print/iiif_helper_decorator.rb +32 -0
- data/app/helpers/iiif_print/iiif_print_helper_behavior.rb +23 -0
- data/app/helpers/iiif_print_helper.rb +0 -20
- data/app/indexers/concerns/iiif_print/child_indexer.rb +9 -3
- data/app/indexers/concerns/iiif_print/file_set_indexer.rb +17 -4
- data/app/models/concerns/iiif_print/set_child_flag.rb +9 -0
- data/app/models/concerns/iiif_print/solr/document.rb +14 -0
- data/app/models/iiif_print/iiif_search_decorator.rb +35 -0
- data/app/models/iiif_print/iiif_search_response_decorator.rb +25 -2
- data/app/models/iiif_print/pending_relationship.rb +3 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_behavior.rb +120 -0
- data/app/presenters/iiif_print/iiif_manifest_presenter_factory_behavior.rb +1 -1
- data/app/presenters/iiif_print/work_show_presenter_decorator.rb +19 -10
- data/app/search_builders/concerns/iiif_print/allinson_flex_fields.rb +15 -0
- data/app/search_builders/concerns/iiif_print/highlight_search_params.rb +2 -1
- data/app/services/iiif_print/derivative_rodeo_service.rb +382 -0
- data/app/services/iiif_print/manifest_builder_service_behavior.rb +88 -31
- data/app/services/iiif_print/pluggable_derivative_service.rb +3 -9
- data/app/views/catalog/_index_header_list_default.html.erb +13 -0
- data/app/views/hyrax/base/_representative_media.html.erb +4 -3
- data/app/views/hyrax/base/iiif_viewers/_universal_viewer.html.erb +1 -1
- data/app/views/hyrax/file_sets/_actions.html.erb +2 -1
- data/app/views/hyrax/file_sets/_show_actions.html.erb +24 -0
- data/config/locales/iiif_print.en.yml +4 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20231110163052_add_model_details_to_iiif_print_pending_relationships.rb +7 -0
- data/docker-compose.yml +2 -2
- data/iiif_print.gemspec +10 -9
- data/lib/generators/iiif_print/install_generator.rb +21 -1
- data/lib/generators/iiif_print/templates/config/initializers/iiif_print.rb +11 -4
- data/lib/generators/iiif_print/templates/helpers/iiif_print_helper.rb +5 -0
- data/lib/iiif_print/base_derivative_service.rb +2 -1
- data/lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb +57 -5
- data/lib/iiif_print/catalog_search_builder.rb +5 -1
- data/lib/iiif_print/configuration.rb +145 -8
- data/lib/iiif_print/data/fileset_helper.rb +1 -1
- data/lib/iiif_print/data/work_derivatives.rb +3 -3
- data/lib/iiif_print/engine.rb +7 -13
- data/lib/iiif_print/errors.rb +18 -0
- data/lib/iiif_print/homepage_search_builder.rb +17 -0
- data/lib/iiif_print/image_tool.rb +12 -8
- data/lib/iiif_print/jobs/child_works_from_pdf_job.rb +74 -33
- data/lib/iiif_print/jobs/create_relationships_job.rb +80 -31
- data/lib/iiif_print/jobs/request_split_pdf_job.rb +31 -0
- data/lib/iiif_print/lineage_service.rb +29 -8
- data/lib/iiif_print/metadata.rb +67 -48
- data/lib/iiif_print/split_pdfs/base_splitter.rb +142 -0
- data/lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb +68 -32
- data/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +166 -0
- data/lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb +33 -0
- data/lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb +19 -0
- data/lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb +26 -0
- data/lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb +41 -0
- data/lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb +64 -59
- data/lib/iiif_print/text_extraction/hocr_reader.rb +7 -3
- data/lib/iiif_print/text_extraction/page_ocr.rb +5 -4
- data/lib/iiif_print/version.rb +1 -1
- data/lib/iiif_print.rb +167 -12
- data/lib/samvera/derivatives/configuration.rb +83 -0
- data/lib/samvera/derivatives/hyrax.rb +129 -0
- data/lib/samvera/derivatives.rb +238 -0
- data/spec/factories/newspaper_page_solr_document.rb +9 -1
- data/spec/fixtures/authorities/licenses.yml +4 -0
- data/spec/fixtures/authorities/rights_statements.yml +4 -0
- data/spec/iiif_print/base_derivative_service_spec.rb +20 -3
- data/spec/iiif_print/blacklight_iiif_search/annotation_decorator_spec.rb +11 -3
- data/spec/iiif_print/catalog_search_builder_spec.rb +1 -1
- data/spec/iiif_print/configuration_spec.rb +141 -15
- data/spec/iiif_print/jobs/child_works_from_pdf_job_spec.rb +7 -2
- data/spec/iiif_print/jobs/create_relationships_job_spec.rb +110 -9
- data/spec/iiif_print/lineage_service_spec.rb +1 -1
- data/spec/iiif_print/metadata_spec.rb +157 -23
- data/spec/iiif_print/split_pdfs/base_splitter_spec.rb +27 -0
- data/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb +80 -0
- data/spec/iiif_print/split_pdfs/destroy_pdf_child_works_service_spec.rb +92 -0
- data/spec/iiif_print/split_pdfs/pages_to_jpgs_splitter_spec.rb +22 -0
- data/spec/iiif_print/split_pdfs/pages_to_pngs_splitter_spec.rb +18 -0
- data/spec/iiif_print/split_pdfs/pages_to_tiffs_splitter_spec.rb +19 -0
- data/spec/iiif_print/text_extraction/hocr_reader_spec.rb +2 -2
- data/spec/iiif_print_spec.rb +125 -5
- data/spec/models/iiif_print/iiif_search_decorator_spec.rb +27 -0
- data/spec/presenters/iiif_print/iiif_manifest_presenter_behavior_spec.rb +51 -0
- data/spec/samvera/derivatives/configuration_spec.rb +41 -0
- data/spec/samvera/derivatives/hyrax_spec.rb +62 -0
- data/spec/samvera/derivatives_spec.rb +54 -0
- data/spec/services/iiif_print/derivative_rodeo_service_spec.rb +103 -0
- data/spec/services/iiif_print/manifest_builder_service_behavior_spec.rb +20 -0
- data/spec/services/iiif_print/pluggable_derivative_service_spec.rb +8 -11
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +1 -1
- data/tasks/copy_authorities_to_test_app.rake +11 -0
- data/tasks/iiif_print_dev.rake +4 -4
- metadata +123 -35
- data/app/helpers/hyrax/iiif_helper.rb +0 -22
- data/lib/iiif_print/split_pdfs/pages_into_images_service.rb +0 -130
- data/spec/iiif_print/split_pdfs/pages_into_images_service_spec.rb +0 -6
@@ -1,41 +1,67 @@
|
|
1
1
|
module IiifPrint
|
2
2
|
module Jobs
|
3
|
-
#
|
3
|
+
# Link newly created child works to the parent
|
4
4
|
class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob
|
5
|
-
|
6
|
-
|
5
|
+
include Hyrax::Lockable
|
6
|
+
|
7
|
+
RETRY_MAX = 10
|
8
|
+
|
7
9
|
# @param parent_id: [<String>] parent work id
|
8
10
|
# @param parent_model: [<String>] parent model
|
9
11
|
# @param child_model: [<String>] child model
|
10
|
-
|
11
|
-
|
12
|
+
# @param retries: [<Integer>] count used during rescheduling to prevent infinite retries
|
13
|
+
def perform(parent_id:, parent_model:, child_model:, retries: 0, **)
|
14
|
+
@parent_id = parent_id
|
15
|
+
@parent_model = parent_model
|
16
|
+
@child_model = child_model
|
17
|
+
@retries = retries + 1
|
18
|
+
|
19
|
+
@number_of_successes = 0
|
20
|
+
@number_of_failures = 0
|
21
|
+
@parent_record_members_added = false
|
22
|
+
@errors = []
|
23
|
+
|
24
|
+
# Because we need our children in the correct order, we can't create any
|
25
|
+
# relationships until all child works have been created.
|
26
|
+
if completed_child_data
|
12
27
|
# add the members
|
13
|
-
|
14
|
-
|
15
|
-
|
28
|
+
add_children_to_parent
|
29
|
+
if @number_of_failures.zero? && @number_of_successes == @pending_children.count
|
30
|
+
# remove pending relationships upon valid completion
|
31
|
+
@pending_children.each(&:destroy)
|
32
|
+
elsif @number_of_failures.zero? && @number_of_successes > @pending_children.count
|
33
|
+
# remove pending relationships but raise error that too many relationships formed
|
34
|
+
@pending_children.each(&:destroy)
|
35
|
+
raise "CreateRelationshipsJob for parent id: #{@parent_id} " \
|
36
|
+
"added #{@number_of_successes} children, " \
|
37
|
+
"expected #{@pending_children} children."
|
38
|
+
else
|
39
|
+
# report failures & keep pending relationships
|
40
|
+
raise "CreateRelationshipsJob failed for parent id: #{@parent_id} " \
|
41
|
+
"had #{@number_of_successes} successes & #{@number_of_failures} failures, " \
|
42
|
+
"with errors: #{@errors}. Wanted #{@pending_children} children."
|
43
|
+
end
|
16
44
|
else
|
17
|
-
# reschedule the job and end this one normally
|
18
|
-
|
19
|
-
# TODO: Depending on how things shake out, we could be infinitely rescheduling this job.
|
20
|
-
# Consider a time to live parameter.
|
21
|
-
reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model)
|
45
|
+
# if we aren't ready yet, reschedule the job and end this one normally
|
46
|
+
reschedule_job
|
22
47
|
end
|
23
48
|
end
|
24
49
|
|
25
50
|
private
|
26
51
|
|
27
|
-
# load @child_works
|
28
|
-
|
52
|
+
# load @child_works and @pending children, and
|
53
|
+
# return boolean indicating whether all chilren are present
|
54
|
+
def completed_child_data
|
29
55
|
@child_works = []
|
30
56
|
found_all_children = true
|
31
57
|
|
32
58
|
# find and sequence all pending children
|
33
|
-
@pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc')
|
59
|
+
@pending_children = IiifPrint::PendingRelationship.where(parent_id: @parent_id).order('child_order asc')
|
34
60
|
|
35
61
|
# find child works (skip out if any haven't yet been created)
|
36
62
|
@pending_children.each do |child|
|
37
63
|
# find by title... if any aren't found, the child works are not yet ready
|
38
|
-
found_children = find_children_by_title_for(child.child_title, child_model)
|
64
|
+
found_children = find_children_by_title_for(child.child_title, @child_model)
|
39
65
|
found_all_children = false if found_children.empty?
|
40
66
|
break unless found_all_children == true
|
41
67
|
@child_works += found_children
|
@@ -49,30 +75,53 @@ module IiifPrint
|
|
49
75
|
model.constantize.where(title: title)
|
50
76
|
end
|
51
77
|
|
52
|
-
def
|
78
|
+
def add_children_to_parent
|
79
|
+
parent_work = @parent_model.constantize.find(@parent_id)
|
80
|
+
create_relationships(parent: parent_work, ordered_children: @child_works)
|
81
|
+
end
|
82
|
+
|
83
|
+
def reschedule_job
|
84
|
+
return if @retries > RETRY_MAX
|
53
85
|
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
86
|
+
parent_id: @parent_id,
|
87
|
+
parent_model: @parent_model,
|
88
|
+
child_model: @child_model,
|
89
|
+
retries: @retries
|
58
90
|
)
|
59
91
|
end
|
60
92
|
|
61
|
-
def create_relationships(
|
62
|
-
|
63
|
-
|
64
|
-
|
93
|
+
def create_relationships(parent:, ordered_children:)
|
94
|
+
acquire_lock_for(parent.id) do
|
95
|
+
# Not sure uncached is needed here, but kept
|
96
|
+
# for consistency with Bulkrax's relationships logic
|
97
|
+
ActiveRecord::Base.uncached do
|
98
|
+
ordered_children.each do |child|
|
99
|
+
add_to_work(child_record: child, parent_record: parent)
|
100
|
+
@number_of_successes += 1
|
101
|
+
rescue => e
|
102
|
+
@number_of_failures += 1
|
103
|
+
@errors << e
|
104
|
+
end
|
105
|
+
end
|
106
|
+
parent.save! if @parent_record_members_added && @number_of_failures.zero?
|
65
107
|
end
|
66
|
-
attrs = { work_members_attributes: records_hash }
|
67
|
-
parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
|
68
|
-
env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs)
|
69
108
|
|
70
|
-
|
71
|
-
# need to
|
109
|
+
# Bulkrax no longer reindexes file_sets, but IiifPrint needs both to add is_page_of_ssim for universal viewer.
|
110
|
+
# This is where child works need to be indexed (AFTER the parent save), as opposed to how Bulkrax does it.
|
72
111
|
ordered_children.each do |child_work|
|
112
|
+
child_work.update_index
|
73
113
|
child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets)
|
74
114
|
end
|
75
115
|
end
|
116
|
+
|
117
|
+
def add_to_work(child_record:, parent_record:)
|
118
|
+
return true if parent_record.ordered_members.to_a.include?(child_record)
|
119
|
+
|
120
|
+
parent_record.ordered_members << child_record
|
121
|
+
@parent_record_members_added = true
|
122
|
+
# Bulkrax does child_record.save! here, but it makes no sense
|
123
|
+
# as there is nothing to save or index at this point.
|
124
|
+
end
|
76
125
|
end
|
77
126
|
end
|
78
127
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module IiifPrint
|
2
|
+
module Jobs
|
3
|
+
##
|
4
|
+
# Encapsulates logic for cleanup when the PDF is destroyed after pdf splitting into child works
|
5
|
+
class RequestSplitPdfJob < IiifPrint::Jobs::ApplicationJob
|
6
|
+
##
|
7
|
+
# @param file_set [FileSet]
|
8
|
+
# @param user [User]
|
9
|
+
# rubocop:disable Metrics/MethodLength
|
10
|
+
def perform(file_set:, user:)
|
11
|
+
return true unless file_set.pdf?
|
12
|
+
|
13
|
+
work = IiifPrint.parent_for(file_set)
|
14
|
+
|
15
|
+
# Woe is ye who changes the configuration of the model, thus removing the splitting.
|
16
|
+
raise WorkNotConfiguredToSplitFileSetError.new(work: work, file_set: file_set) unless work&.iiif_print_config&.pdf_splitter_job&.presence
|
17
|
+
|
18
|
+
# clean up any existing spawned child works of this file_set
|
19
|
+
IiifPrint::SplitPdfs::DestroyPdfChildWorksService.conditionally_destroy_spawned_children_of(
|
20
|
+
file_set: file_set,
|
21
|
+
work: work
|
22
|
+
)
|
23
|
+
|
24
|
+
location = Hyrax::WorkingDirectory.find_or_retrieve(file_set.files.first.id, file_set.id)
|
25
|
+
|
26
|
+
IiifPrint.conditionally_submit_split_for(work: work, file_set: file_set, locations: [location], user: user)
|
27
|
+
end
|
28
|
+
# rubocop:enable Metrics/MethodLength
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -2,7 +2,7 @@ module IiifPrint
|
|
2
2
|
# The purpose of this module is to encode lineage related services:
|
3
3
|
#
|
4
4
|
# - {.ancestor_ids_for}
|
5
|
-
# - {.
|
5
|
+
# - {.descendent_member_ids_for}
|
6
6
|
#
|
7
7
|
# The ancestor and descendent_file_sets are useful for ensuring we index together related items.
|
8
8
|
# For example, when I have a work that is a book, and one file set per page of that book, when I
|
@@ -18,24 +18,45 @@ module IiifPrint
|
|
18
18
|
def self.ancestor_ids_for(object)
|
19
19
|
ancestor_ids ||= []
|
20
20
|
object.in_works.each do |work|
|
21
|
-
ancestor_ids << work
|
21
|
+
ancestor_ids << ancestry_identifier_for(work)
|
22
22
|
ancestor_ids += ancestor_ids_for(work) if work.is_child
|
23
23
|
end
|
24
24
|
ancestor_ids.flatten.compact.uniq
|
25
25
|
end
|
26
26
|
|
27
|
+
##
|
28
|
+
# @api public
|
29
|
+
#
|
30
|
+
# Given the :work return it's identifier
|
31
|
+
#
|
32
|
+
# @param [Object]
|
33
|
+
# @return [String]
|
34
|
+
def self.ancestry_identifier_for(work)
|
35
|
+
IiifPrint.config.ancestory_identifier_function.call(work)
|
36
|
+
end
|
37
|
+
|
27
38
|
##
|
28
39
|
# @param object [#ordered_works, #file_sets, #member_ids]
|
29
|
-
# @return [Array<String>] the ids of associated file sets
|
30
|
-
|
40
|
+
# @return [Array<String>] the ids of associated file sets and child works
|
41
|
+
#
|
42
|
+
# @see
|
43
|
+
# https://github.com/samvera/hyrax/blob/2b807fe101176d594129ef8a8fe466d3d03a372b/app/indexers/hyrax/work_indexer.rb#L15-L18
|
44
|
+
# for "clarification" of the comingling of file_set_ids and member_ids
|
45
|
+
def self.descendent_member_ids_for(object)
|
31
46
|
# enables us to return parents when searching for child OCR
|
32
|
-
|
47
|
+
#
|
48
|
+
# https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/models/concerns/work_behavior.rb#L90-L92
|
49
|
+
#
|
50
|
+
# The Hydara::Works implementation of file_set_ids is "members.select(&:file_set?).map(&:id)";
|
51
|
+
# so no sense doing `object.file_set_ids + object.member_ids`
|
52
|
+
file_set_ids = object.member_ids
|
33
53
|
object.ordered_works&.each do |child|
|
34
|
-
file_set_ids +=
|
54
|
+
file_set_ids += descendent_member_ids_for(child)
|
35
55
|
end
|
36
|
-
# enables us to return parents when searching for child metadata
|
37
|
-
file_set_ids += object.member_ids
|
38
56
|
file_set_ids.flatten.uniq.compact
|
39
57
|
end
|
58
|
+
class << self
|
59
|
+
alias descendent_file_set_ids_for descendent_member_ids_for
|
60
|
+
end
|
40
61
|
end
|
41
62
|
end
|
data/lib/iiif_print/metadata.rb
CHANGED
@@ -17,48 +17,43 @@ module IiifPrint
|
|
17
17
|
@base_url = base_url
|
18
18
|
end
|
19
19
|
|
20
|
-
attr_reader :work, :version, :fields
|
20
|
+
attr_reader :work, :version, :fields, :current_ability
|
21
21
|
|
22
22
|
def build_metadata
|
23
|
-
send("build_metadata_for_v#{version}")
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def build_metadata_for_v2
|
29
23
|
fields.map do |field|
|
30
|
-
|
31
|
-
if field.name == :collection && member_of_collection?
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
next if field_is_empty?(field)
|
38
|
-
{ 'label' => label,
|
39
|
-
'value' => cast_to_value(field_name: field.name, options: field.options) }
|
24
|
+
values = values_for(field_name: field)
|
25
|
+
if field.name == :collection && member_of_collection? && viewable_collections.present?
|
26
|
+
{ 'label' => metadata_map(field, :label),
|
27
|
+
'value' => metadata_map(field, :collection) }
|
28
|
+
elsif values.present? && !empty_string?(values)
|
29
|
+
{ 'label' => metadata_map(field, :label),
|
30
|
+
'value' => metadata_map(field, :value) }
|
40
31
|
end
|
41
32
|
end.compact
|
42
33
|
end
|
43
34
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
}
|
57
|
-
|
35
|
+
private
|
36
|
+
|
37
|
+
def metadata_map(field, property)
|
38
|
+
if version == 2
|
39
|
+
case property
|
40
|
+
when :label then field.label
|
41
|
+
when :value then cast_to_value(field_name: field.name, options: field.options)
|
42
|
+
when :collection then make_collection_link(viewable_collections)
|
43
|
+
end
|
44
|
+
elsif version == 3
|
45
|
+
case property
|
46
|
+
when :label then { I18n.locale.to_s => [field.label] }
|
47
|
+
when :value then { 'none' => cast_to_value(field_name: field.name, options: field.options) }
|
48
|
+
when :collection then { 'none' => make_collection_link(viewable_collections) }
|
49
|
+
end
|
50
|
+
end
|
58
51
|
end
|
59
52
|
|
60
|
-
|
61
|
-
|
53
|
+
# Bulkrax imports values as [""] if there isn't a value but still a header,
|
54
|
+
# these fields should not show in the metadata pane
|
55
|
+
def empty_string?(values)
|
56
|
+
values.uniq.size == 1 ? values.first == "" : false
|
62
57
|
end
|
63
58
|
|
64
59
|
def member_of_collection?
|
@@ -71,21 +66,41 @@ module IiifPrint
|
|
71
66
|
|
72
67
|
def cast_to_value(field_name:, options:)
|
73
68
|
if options&.[](:render_as) == :faceted
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
"f[#{search_field}][]": value, locale: I18n.locale
|
78
|
-
)
|
79
|
-
path += '&include_child_works=true' if work["is_child_bsi"] == true
|
80
|
-
"<a href='#{File.join(@base_url, path)}'>#{value}</a>"
|
81
|
-
end
|
69
|
+
faceted_values_for(field_name: field_name)
|
70
|
+
elsif qa_field?(field_name: options&.dig(:render_as) || field_name)
|
71
|
+
authority_values_for(field_name: field_name)
|
82
72
|
else
|
83
73
|
make_link(values_for(field_name: field_name))
|
84
74
|
end
|
85
75
|
end
|
86
76
|
|
77
|
+
def faceted_values_for(field_name:)
|
78
|
+
values_for(field_name: field_name).map do |value|
|
79
|
+
search_field = field_name.to_s + "_sim"
|
80
|
+
path = Rails.application.routes.url_helpers.search_catalog_path(
|
81
|
+
"f[#{search_field}][]": value, locale: I18n.locale
|
82
|
+
)
|
83
|
+
path += '&include_child_works=true' if work["is_child_bsi"] == true
|
84
|
+
"<a href='#{File.join(@base_url, path)}'>#{value}</a>"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def qa_field?(field_name:, questioning_authority_fields: IiifPrint.config.questioning_authority_fields)
|
89
|
+
questioning_authority_fields.include?(field_name.to_s)
|
90
|
+
end
|
91
|
+
|
92
|
+
def authority_values_for(field_name:)
|
93
|
+
authority = Qa::Authorities::Local.subauthority_for(field_name.to_s.pluralize)
|
94
|
+
values_for(field_name: field_name).map do |value|
|
95
|
+
id, term = authority.find(value).values_at('id', 'term')
|
96
|
+
"<a href='#{id}'>#{term}</a>"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
87
100
|
def values_for(field_name:)
|
88
|
-
|
101
|
+
field_name = field_name.try(:name) || field_name
|
102
|
+
# TODO: we are assuming tesim or dtsi (for dates), might want to account for other suffixes in the future
|
103
|
+
Array(work["#{field_name}_tesim"] || work["#{field_name}_dtsi"]&.to_date.try(:to_formatted_s, :standard))
|
89
104
|
end
|
90
105
|
|
91
106
|
def make_collection_link(collection_documents)
|
@@ -94,11 +109,16 @@ module IiifPrint
|
|
94
109
|
end
|
95
110
|
end
|
96
111
|
|
97
|
-
|
112
|
+
def viewable_collections
|
113
|
+
Hyrax::CollectionMemberService.run(SolrDocument.find(work.id), current_ability)
|
114
|
+
end
|
115
|
+
|
116
|
+
# @note This method turns link looking strings into links and assumes https if not protocol was given
|
98
117
|
def make_link(texts)
|
99
118
|
texts.map do |t|
|
100
119
|
t.to_s.gsub(MAKE_LINK_REGEX) do |url|
|
101
|
-
|
120
|
+
protocol = url.start_with?('www.') ? 'https://' : ''
|
121
|
+
"<a href='#{protocol}#{url}' target='_blank'>#{url}</a>"
|
102
122
|
end
|
103
123
|
end
|
104
124
|
end
|
@@ -106,10 +126,9 @@ module IiifPrint
|
|
106
126
|
MAKE_LINK_REGEX = %r{
|
107
127
|
\b
|
108
128
|
(
|
109
|
-
(?:
|
110
|
-
(?:
|
111
|
-
|
112
|
-
[a-z0-9.\-]+[.][a-z]{2,4}/
|
129
|
+
(?:
|
130
|
+
(?:https?://) |
|
131
|
+
(?:www\.)
|
113
132
|
)
|
114
133
|
(?:
|
115
134
|
[^\s()<>]+ | \(([^\s()<>]+|(\([^\s()<>]+\)))*\)
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'securerandom'
|
3
|
+
require 'tmpdir'
|
4
|
+
require 'iiif_print/split_pdfs/pdf_image_extraction_service'
|
5
|
+
|
6
|
+
module IiifPrint
|
7
|
+
module SplitPdfs
|
8
|
+
# @abstract
|
9
|
+
#
|
10
|
+
# The purpose of this class is to split the PDF into constituent image files.
|
11
|
+
#
|
12
|
+
# @see .call
|
13
|
+
class BaseSplitter
|
14
|
+
##
|
15
|
+
# @api public
|
16
|
+
#
|
17
|
+
# @param path [String] local path to the PDF that we will split.
|
18
|
+
# @return [Enumerable]
|
19
|
+
#
|
20
|
+
# @see #each
|
21
|
+
#
|
22
|
+
# @note We're including the ** args to provide method conformity; other services require
|
23
|
+
# additional information (such as the FileSet)
|
24
|
+
#
|
25
|
+
# @see IiifPrint::SplitPdfs::DerivativeRodeoSplitter
|
26
|
+
def self.call(path, **)
|
27
|
+
new(path).to_a
|
28
|
+
end
|
29
|
+
|
30
|
+
class_attribute :image_extension
|
31
|
+
class_attribute :compression, default: nil
|
32
|
+
class_attribute :quality, default: nil
|
33
|
+
|
34
|
+
def initialize(path, tmpdir: Dir.mktmpdir, default_dpi: 400)
|
35
|
+
@baseid = SecureRandom.uuid
|
36
|
+
@pdfpath = path
|
37
|
+
@pdfinfo = IiifPrint::SplitPdfs::PdfImageExtractionService.new(pdfpath)
|
38
|
+
@tmpdir = tmpdir
|
39
|
+
@default_dpi = default_dpi
|
40
|
+
end
|
41
|
+
|
42
|
+
# In creating {#each} we get many of the methods of array operation (e.g. #to_a).
|
43
|
+
include Enumerable
|
44
|
+
|
45
|
+
# @api public
|
46
|
+
#
|
47
|
+
# @yieldparam [String] the path to the page's tiff.
|
48
|
+
def each
|
49
|
+
entries.each do |e|
|
50
|
+
yield(e)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# @api private
|
55
|
+
#
|
56
|
+
# TODO: put this test somewhere to prevent invalid pdfs from crashing the image service.
|
57
|
+
def invalid_pdf?
|
58
|
+
return true if pdfinfo.color.include?(nil) || pdfinfo.width.nil? || pdfinfo.height.nil? || pdfinfo.page_count.zero?
|
59
|
+
false
|
60
|
+
end
|
61
|
+
|
62
|
+
attr_reader :pdfinfo, :tmpdir, :baseid, :default_dpi, :pdfpath
|
63
|
+
private :pdfinfo, :tmpdir, :baseid, :default_dpi, :pdfpath
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
# entries for each page
|
68
|
+
def entries
|
69
|
+
return @entries if defined? @entries
|
70
|
+
|
71
|
+
@entries = Array.wrap(gsconvert)
|
72
|
+
end
|
73
|
+
|
74
|
+
# rubocop:disable Metrics/MethodLength
|
75
|
+
# ghostscript convert all pages to TIFF
|
76
|
+
def gsconvert
|
77
|
+
output_base = File.join(tmpdir, "#{baseid}-page%d.#{image_extension}")
|
78
|
+
# NOTE: you must call gsdevice before compression, as compression is
|
79
|
+
# updated during the gsdevice call.
|
80
|
+
cmd = "gs -dNOPAUSE -dBATCH -sDEVICE=#{gsdevice} -dTextAlphaBits=4"
|
81
|
+
cmd += " -sCompression=#{compression}" if compression?
|
82
|
+
cmd += " -dJPEGQ=#{quality}" if quality?
|
83
|
+
cmd += " -sOutputFile=#{output_base} -r#{ppi} -f #{pdfpath}"
|
84
|
+
filenames = []
|
85
|
+
|
86
|
+
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
87
|
+
page_number = 0
|
88
|
+
stdout.read.split("\n").each do |line|
|
89
|
+
next unless line.start_with?('Page ')
|
90
|
+
|
91
|
+
page_number += 1
|
92
|
+
filenames << File.join(tmpdir, "#{baseid}-page#{page_number}.#{image_extension}")
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
filenames
|
97
|
+
end
|
98
|
+
# rubocop:enable Metrics/MethodLength
|
99
|
+
|
100
|
+
def gsdevice
|
101
|
+
raise NotImplementedError
|
102
|
+
end
|
103
|
+
|
104
|
+
PAGE_COUNT_REGEXP = %r{^Pages: +(\d+)$}.freeze
|
105
|
+
|
106
|
+
def pagecount
|
107
|
+
return @pagecount if defined? @pagecount
|
108
|
+
|
109
|
+
cmd = "pdfinfo #{pdfpath}"
|
110
|
+
Open3.popen3(cmd) do |_stdin, stdout, _stderr, _wait_thr|
|
111
|
+
match = PAGE_COUNT_REGEXP.match(stdout.read)
|
112
|
+
@pagecount = match[1].to_i
|
113
|
+
end
|
114
|
+
@pagecount
|
115
|
+
end
|
116
|
+
|
117
|
+
def ppi
|
118
|
+
if looks_scanned?
|
119
|
+
# For scanned media, defer to detected image PPI:
|
120
|
+
pdfinfo.ppi
|
121
|
+
else
|
122
|
+
# 400 dpi for something that does not look like scanned media:
|
123
|
+
default_dpi
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def looks_scanned?
|
128
|
+
max_image_px = pdfinfo.width * pdfinfo.height
|
129
|
+
# single 10mp+ image per page?
|
130
|
+
single_image_per_page? && max_image_px > 1024 * 1024 * 10
|
131
|
+
end
|
132
|
+
|
133
|
+
def single_image_per_page?
|
134
|
+
pdfinfo.page_count == pagecount
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
require "iiif_print/split_pdfs/pages_to_jpgs_splitter"
|
141
|
+
require "iiif_print/split_pdfs/pages_to_pngs_splitter"
|
142
|
+
require "iiif_print/split_pdfs/pages_to_tiffs_splitter"
|