wax_tasks 1.1.4 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ namespace :derivatives do
7
+ desc 'generate simple derivatives from local image files'
8
+ task :simple do
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:simple'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.generate_derivatives(a, 'simple') }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'generate collection md pages from yaml or csv data source'
7
+ task :pages do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:pages').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_pages a }
14
+ end
15
+
16
+ # alias :pagemaster to wax:pages for backwards compatibility
17
+ task :pagemaster do
18
+ t = Rake::Task['wax:pages']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'build lunr search index (with default UI if UI=true)'
7
+ task :search do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:search').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_static_search a }
14
+ end
15
+
16
+ # alias lunr to search for backwards compatibility
17
+ task :lunr do
18
+ t = Rake::Task['wax:search']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
data/lib/wax_tasks.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # stdlib
7
+ require 'csv'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ # 3rd party
12
+ require 'rainbow'
13
+ require 'safe_yaml'
14
+
15
+ # relative
16
+ require_relative 'wax_tasks/asset'
17
+ require_relative 'wax_tasks/collection'
18
+ require_relative 'wax_tasks/config'
19
+ require_relative 'wax_tasks/error'
20
+ require_relative 'wax_tasks/index'
21
+ require_relative 'wax_tasks/item'
22
+ require_relative 'wax_tasks/record'
23
+ require_relative 'wax_tasks/site'
24
+ require_relative 'wax_tasks/utils'
25
+ require_relative 'wax_tasks/version'
26
+
27
+ #
28
+ module WaxTasks
29
+ DEFAULT_CONFIG_FILE = './_config.yml'
30
+ #
31
+ #
32
+ def self.config_from_file(file = nil)
33
+ Utils.validate_yaml(file || DEFAULT_CONFIG_FILE)
34
+ rescue StandardError => e
35
+ raise WaxTasks::Error::InvalidConfig, "Cannot open config file '#{file}'.\n #{e}"
36
+ end
37
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ Derivative = Struct.new(:path, :label, :img)
6
+
7
+ #
8
+ class Asset
9
+ attr_reader :id, :path
10
+
11
+ def initialize(path, pid, variants)
12
+ @path = path
13
+ @pid = pid
14
+ @id = asset_id
15
+ @variants = variants
16
+ end
17
+
18
+ #
19
+ #
20
+ def asset_id
21
+ id = File.basename @path, '.*'
22
+ id.prepend "#{@pid}_" unless id == @pid
23
+ id
24
+ end
25
+
26
+ #
27
+ #
28
+ def simple_derivatives
29
+ @variants.map do |label, width|
30
+ img = MiniMagick::Image.open @path
31
+ if width > img.width
32
+ warn Rainbow("Tried to create derivative #{width}px wide, but asset #{@id} for item #{@pid} only has a width of #{img.width}px.").yellow
33
+ else
34
+ img.resize width
35
+ end
36
+
37
+ img.format 'jpg'
38
+ Derivative.new("#{@id}/#{label}.jpg", label, img)
39
+ end
40
+ end
41
+
42
+ #
43
+ #
44
+ def to_iiif_image_record(is_only, index, base_opts)
45
+ opts = base_opts.clone
46
+
47
+ opts[:is_primary] = index.zero?
48
+ opts[:section_label] = "Page #{index + 1}" unless is_only
49
+ opts[:path] = @path
50
+ opts[:manifest_id] = @pid
51
+ opts[:id] = @id
52
+ opts[:variants] = @variants
53
+
54
+ WaxIiif::ImageRecord.new(opts)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'collection/images'
4
+ require_relative 'collection/metadata'
5
+
6
+ module WaxTasks
7
+ #
8
+ class Collection
9
+ attr_reader :name, :config, :ext, :search_fields,
10
+ :page_source, :metadata_source, :imagedata_source,
11
+ :iiif_derivative_source, :simple_derivative_source
12
+
13
+ include Collection::Metadata
14
+ include Collection::Images
15
+
16
+ IMAGE_DERIVATIVE_DIRECTORY = 'img/derivatives'
17
+ DEFAULT_VARIANTS = { 'thumbnail' => 250, 'fullwidth' => 1140 }.freeze
18
+
19
+ #
20
+ #
21
+ def initialize(name, config, source, collections_dir, ext)
22
+ @name = name
23
+ @config = config
24
+ @page_extension = ext
25
+ @site_source = source
26
+ @page_source = Utils.safe_join source, collections_dir, "_#{@name}"
27
+ @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source')
28
+ @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source')
29
+ @iiif_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif'
30
+ @simple_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'simple'
31
+ @search_fields = %w[pid label thumbnail permalink collection]
32
+ @image_variants = image_variants
33
+ end
34
+
35
+ #
36
+ #
37
+ def image_variants
38
+ vars = @config.dig('images', 'variants') || {}
39
+ DEFAULT_VARIANTS.merge vars
40
+ end
41
+
42
+ #
43
+ #
44
+ def clobber_pages
45
+ return unless Dir.exist? @page_source
46
+ puts Rainbow("Removing pages from #{@page_source}").cyan
47
+ FileUtils.remove_dir @page_source, true
48
+ end
49
+
50
+ #
51
+ #
52
+ def clobber_derivatives
53
+ [@iiif_derivative_source, @simple_derivative_source].each do |dir|
54
+ if Dir.exist? dir
55
+ puts Rainbow("Removing derivatives from #{dir}").cyan
56
+ FileUtils.remove_dir dir, true
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_magick'
4
+ require 'progress_bar'
5
+ require 'wax_iiif'
6
+
7
+ #
8
+ module WaxTasks
9
+ #
10
+ class Collection
11
+ #
12
+ module Images
13
+ #
14
+ #
15
+ def items_from_imagedata
16
+ raise Error::MissingSource, "Cannot find image data source '#{@imagedata_source}'" unless Dir.exist? @imagedata_source
17
+
18
+ pre_process_pdfs
19
+ records = records_from_metadata
20
+ Dir.glob(Utils.safe_join(@imagedata_source, '*')).map do |path|
21
+ item = WaxTasks::Item.new(path, @image_variants)
22
+ next if item.type == '.pdf'
23
+ next puts Rainbow("Skipping #{path} because type #{item.type} is not an accepted format").yellow unless item.valid?
24
+
25
+ item.record = records.find { |r| r.pid == item.pid }
26
+ item.iiif_config = @config.dig 'images', 'iiif'
27
+ warn Rainbow("\nCould not find record in #{@metadata_source} for image item #{path}.\n").orange if item.record.nil?
28
+ item
29
+ end.compact
30
+ end
31
+
32
+ #
33
+ #
34
+ def pre_process_pdfs
35
+ Dir.glob(Utils.safe_join(@imagedata_source, '*.pdf')).each do |path|
36
+ target_dir = path.gsub '.pdf', ''
37
+ next unless Dir.glob("#{target_dir}/*").empty?
38
+
39
+ puts Rainbow("\nPreprocessing #{path} into image files. This may take a minute.\n").cyan
40
+
41
+ opts = { output_dir: File.dirname(target_dir) }
42
+ WaxIiif::Utilities::PdfSplitter.split(path, opts)
43
+ end
44
+ end
45
+
46
+ #
47
+ #
48
+ def write_simple_derivatives
49
+ puts Rainbow("Generating simple image derivatives for collection '#{@name}'\nThis might take awhile.").cyan
50
+
51
+ bar = ProgressBar.new(items_from_imagedata.length)
52
+ bar.write
53
+ items_from_imagedata.map do |item|
54
+ item.simple_derivatives.each do |d|
55
+ path = "#{@simple_derivative_source}/#{d.path}"
56
+ FileUtils.mkdir_p File.dirname(path)
57
+ next if File.exist? path
58
+
59
+ d.img.write path
60
+ item.record.set d.label, "/#{path}" if item.record?
61
+ end
62
+ bar.increment!
63
+ bar.write
64
+ item
65
+ end.flat_map(&:record).compact
66
+ end
67
+
68
+ #
69
+ #
70
+ def iiif_builder(dir)
71
+ build_opts = {
72
+ base_url: "{{ '/' | absolute_url }}#{dir}",
73
+ output_dir: dir,
74
+ collection_label: @name,
75
+ variants: @image_variants.dup.tap { |h| h.delete 'full' }
76
+ }
77
+ WaxIiif::Builder.new build_opts
78
+ end
79
+
80
+ #
81
+ #
82
+ def add_font_matter_to_json_files(dir)
83
+ Dir.glob("#{dir}/**/*.json").each do |f|
84
+ Utils.add_yaml_front_matter_to_file f
85
+ end
86
+ end
87
+
88
+ #
89
+ #
90
+ def add_iiif_results_to_records(records, manifests)
91
+ records.map do |record|
92
+ next nil if record.nil?
93
+
94
+ manifest = manifests.find { |m| m.base_id == record.pid }
95
+ next record if manifest.nil?
96
+
97
+ json = JSON.parse manifest.to_json
98
+ @image_variants.each do |k, _v|
99
+ value = json.fetch k, ''
100
+ record.set k, "/#{Utils.content_clean(value)}" unless value.empty?
101
+ end
102
+
103
+ record.set 'manifest', "/#{Utils.content_clean(manifest.id)}"
104
+ record
105
+ end.compact
106
+ end
107
+
108
+ #
109
+ #
110
+ def write_iiif_derivatives
111
+ items = items_from_imagedata
112
+ iiif_data = items.map(&:iiif_image_records).flatten
113
+ builder = iiif_builder @iiif_derivative_source
114
+
115
+ builder.load iiif_data
116
+
117
+ puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan
118
+ builder.process_data
119
+ records = items.map(&:record).compact
120
+
121
+ add_font_matter_to_json_files @iiif_derivative_source
122
+ add_iiif_results_to_records records, builder.manifests
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ #
6
+ class Collection
7
+ #
8
+ module Metadata
9
+ #
10
+ #
11
+ def search_fields=(fields)
12
+ @search_fields.concat(fields).flatten.compact.uniq
13
+ end
14
+
15
+ #
16
+ #
17
+ def records_from_pages
18
+ paths = Dir.glob("#{@page_source}/*.{md, markdown}")
19
+ warn Rainbow("There are no pages in #{@page_source} to index.").orange if paths.empty?
20
+
21
+ paths.map do |path|
22
+ begin
23
+ content = WaxTasks::Utils.content_clean File.read(path)
24
+ Record.new(SafeYAML.load_file(path)).tap do |r|
25
+ r.set 'content', content
26
+ r.set 'permalink', "/#{@name}/#{r.pid}#{@ext}" unless r.permalink?
27
+ end
28
+ rescue StandardError => e
29
+ raise Error::PageLoad, "Cannot load page #{path}\n#{e}"
30
+ end
31
+ end
32
+ end
33
+
34
+ #
35
+ #
36
+ def records_from_metadata
37
+ raise Error::MissingSource, "Cannot find metadata source '#{@metadata_source}'" unless File.exist? @metadata_source
38
+
39
+ metadata = Utils.ingest @metadata_source
40
+ metadata.each_with_index.map do |meta, i|
41
+ Record.new(meta).tap do |r|
42
+ r.set 'order', Utils.padded_int(i, metadata.length) unless r.order?
43
+ r.set 'layout', @config['layout'] if @config.key? 'layout'
44
+ r.set 'collection', @name
45
+ end
46
+ end
47
+ end
48
+
49
+ #
50
+ #
51
+ def update_metadata(update)
52
+ records = consolidate_records records_from_metadata, update
53
+ reformatted = case File.extname @metadata_source
54
+ when '.csv'
55
+ csv_string records
56
+ when '.json'
57
+ json_string records
58
+ when /\.ya?ml/
59
+ yaml_string records
60
+ end
61
+ File.open(@metadata_source, 'w') { |f| f.puts reformatted }
62
+ end
63
+
64
+ #
65
+ #
66
+ def consolidate_records(original, new)
67
+ lost_record_pids = original.map(&:pid) - new.map(&:pid)
68
+ lost_record_pids.each do |pid|
69
+ new << original.find { |r| r.pid == pid }
70
+ end
71
+ new.sort_by(&:order)
72
+ end
73
+
74
+ #
75
+ #
76
+ def csv_string(records)
77
+ keys = records.flat_map(&:keys).uniq
78
+ CSV.generate do |csv|
79
+ csv << keys
80
+ records.each do |r|
81
+ csv << keys.map { |k| r.hash.fetch(k, '') }
82
+ end
83
+ end
84
+ end
85
+
86
+ #
87
+ #
88
+ def json_string(records)
89
+ hashes = records.map(&:hash)
90
+ JSON.pretty_generate hashes
91
+ end
92
+
93
+ #
94
+ #
95
+ def yaml_string(records)
96
+ hashes = records.map(&:hash)
97
+ hashes.to_yaml
98
+ end
99
+ end
100
+ end
101
+ end