wax_tasks 0.3.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wax_tasks might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d88624cfe6327eb5449dda2246712718f4bdfb2fea155e12e1593b6a5e10af08
4
- data.tar.gz: d6aa6846dedcbaa295d4ae3f909c1f22fa68c55db1524b7aea1e7a958ef241ad
3
+ metadata.gz: de0380de7a7fbba0a355c3983945d2517460f0ed7e0848e518896140ea3a1992
4
+ data.tar.gz: 609510c447fa45acd2cd638141fe8f81180a27e9b1e43d6fae714e2b052e1add
5
5
  SHA512:
6
- metadata.gz: b3c8dd46e8631bbff1bcdb2792255882ac37cb1afd4b8ed9e193e24e0625c934eeda807d56417832df1fbfbdc9dec435df02df5058c3907794761bf704734fcc
7
- data.tar.gz: 954bbf0a5a9561ad4b74bf751de5e45e6cc5ec7de21a5fd82720789483470c29f1efc161fa62af1e69c2b4b8479956f82a4acd73277d3efd42bf36cfb0a2ef97
6
+ metadata.gz: 513a791a3d0a622af9f0cd386d5ad4d95c76a7c2feae1cd18126f2cdd83df605f86d81884a42e95a8ff152ab784898457b18c151a5c5db2ab71ea13b34c84a6b
7
+ data.tar.gz: 158e96f76794b2fbafc26c6881cca091b5621fe7cdf4c0700353af4d89dfd1fe43ab3754820332e516fb8bfa04101c76a5f96e95d6438eb9ff68932d1b84f808
data/Gemfile CHANGED
@@ -1,8 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
  gemspec
3
5
 
4
6
  # dev/test utilities
7
+ gem 'bundle-audit', require: false
5
8
  gem 'diane', require: false
6
9
  gem 'rubocop', require: false
7
- gem 'simplecov', require: false
10
+ gem 'simplecov', '0.17.1', require: false
8
11
  gem 'yard', require: false
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'destroy wax-generated collection files, including pages, derivatives, and search index(es)'
7
+ task :clobber do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:clobber'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.clobber a }
15
+ end
16
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ namespace :derivatives do
7
+ desc 'generate iiif derivatives from local image files'
8
+ task :iiif do
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+
12
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:iiif'").magenta if args.empty?
13
+
14
+ site = WaxTasks::Site.new
15
+ args.each { |a| site.generate_derivatives(a, 'iiif') }
16
+ end
17
+ end
18
+
19
+ # alias wax:iiif to wax:derivatives:iiif for backwards compatibility
20
+ task :iiif do
21
+ t = Rake::Task['wax:derivatives:iiif']
22
+ desc t.full_comment if t.full_comment
23
+ arguments = ARGV.drop(1).each { |a| task a.to_sym }
24
+ t.invoke(*arguments)
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ namespace :derivatives do
7
+ desc 'generate iiif derivatives from local image files'
8
+ task :simple do
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:simple'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.generate_derivatives(a, 'simple') }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'generate collection md pages from yaml or csv data source'
7
+ task :pages do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:pages').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_pages a }
14
+ end
15
+
16
+ # alias :pagemaster to wax:pages for backwards compatibility
17
+ task :pagemaster do
18
+ t = Rake::Task['wax:pages']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'build lunr search index (with default UI if UI=true)'
7
+ task :search do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:search').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_static_search a }
14
+ end
15
+
16
+ # alias lunr to search for backwards compatibility
17
+ task :lunr do
18
+ t = Rake::Task['wax:search']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -1,37 +1,36 @@
1
- require_relative 'wax_tasks/branch'
1
+ # frozen_string_literal: true
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # stdlib
7
+ require 'csv'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ # 3rd party
12
+ require 'rainbow'
13
+ require 'safe_yaml'
14
+
15
+ # relative
16
+ require_relative 'wax_tasks/asset'
2
17
  require_relative 'wax_tasks/collection'
18
+ require_relative 'wax_tasks/config'
3
19
  require_relative 'wax_tasks/error'
4
- require_relative 'wax_tasks/iiif_collection'
5
- require_relative 'wax_tasks/local_branch'
6
- require_relative 'wax_tasks/lunr_collection'
7
- require_relative 'wax_tasks/lunr_index'
8
- require_relative 'wax_tasks/pagemaster_collection'
9
- require_relative 'wax_tasks/task_runner'
10
- require_relative 'wax_tasks/travis_branch'
20
+ require_relative 'wax_tasks/index'
21
+ require_relative 'wax_tasks/item'
22
+ require_relative 'wax_tasks/record'
23
+ require_relative 'wax_tasks/site'
11
24
  require_relative 'wax_tasks/utils'
12
25
 
13
- # The WaxTasks module powers the Rake tasks in `./tasks`, including:
14
- #
15
- # wax:pagemaster :: generate collection md pages from csv, json, or yaml file
16
- # wax:lunr :: build lunr search index (with default UI if UI=true)
17
- # wax:iiif :: generate iiif derivatives from local image files
18
- # wax:jspackage :: write a simple package.json for monitoring js dependencies
19
- # wax:push :: push compiled Jekyll site to git branch
20
- # wax:test :: run htmlproofer, rspec if .rspec file exists
21
26
  #
22
- # Tasks are run by a WaxTasks::TaskRunner object which is resposible
23
- # for reading in site config from `_config.yml`
24
27
  module WaxTasks
25
- # ----------
26
- # CONSTANTS
27
- # ----------
28
-
29
- # @return [String] The path to load Jekyll site config
30
- DEFAULT_CONFIG = '_config.yml'.freeze
31
- # @return [String] The path to write WaxTasks::LunrIndex
32
- LUNR_INDEX_PATH = 'js/lunr-index.json'.freeze
33
- # @return [String] The path to write default LunrUI
34
- LUNR_UI_PATH = 'js/lunr-ui.js'.freeze
35
- # @return [String] The path to the compiled Jekyll site
36
- SITE_DIR = './_site'.freeze
28
+ DEFAULT_CONFIG_FILE = './_config.yml'
29
+ #
30
+ #
31
+ def self.config_from_file(file = nil)
32
+ Utils.validate_yaml(file || DEFAULT_CONFIG_FILE)
33
+ rescue StandardError => e
34
+ raise WaxTasks::Error::InvalidConfig, "Cannot open config file '#{file}'.\n #{e}"
35
+ end
37
36
  end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ Derivative = Struct.new(:path, :label, :img)
6
+
7
+ #
8
+ class Asset
9
+ attr_reader :id, :path
10
+
11
+ DEFAULT_VARIANTS = { 'thumbnail' => 250, 'full' => 1140 }.freeze
12
+
13
+ def initialize(path, pid, variants)
14
+ @path = path
15
+ @pid = pid
16
+ @id = asset_id
17
+ @variants = DEFAULT_VARIANTS.merge variants
18
+ end
19
+
20
+ #
21
+ #
22
+ def asset_id
23
+ id = File.basename @path, '.*'
24
+ id.prepend "#{@pid}_" unless id == @pid
25
+ id
26
+ end
27
+
28
+ #
29
+ #
30
+ def simple_derivatives
31
+ @variants.map do |label, width|
32
+ img = MiniMagick::Image.open @path
33
+ if width > img.width
34
+ warn Rainbow("Tried to create derivative #{width}px wide, but asset #{@id} for item #{@pid} only has a width of #{img.width}px.").yellow
35
+ else
36
+ img.resize width
37
+ end
38
+
39
+ img.format 'jpg'
40
+ Derivative.new("#{@id}/#{label}.jpg", label, img)
41
+ end
42
+ end
43
+
44
+ #
45
+ #
46
+ def to_iiif_image_record(is_only, index, base_opts)
47
+ opts = base_opts.clone
48
+
49
+ opts[:is_primary] = index.zero?
50
+ opts[:section_label] = "Page #{index + 1}" unless is_only
51
+ opts[:path] = @path
52
+ opts[:manifest_id] = @pid
53
+ opts[:id] = @id
54
+ opts[:variants] = @variants
55
+
56
+ WaxIiif::ImageRecord.new(opts)
57
+ end
58
+ end
59
+ end
@@ -1,65 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'collection/images'
4
+ require_relative 'collection/metadata'
5
+
1
6
  module WaxTasks
2
- # Parent class representing a Jekyll collection
3
- # that cannot be created directly. Only child classes
4
- # (IiifCollection, LunrCollection, PagemasterCollection)
5
- # can be initialized.
6
7
  #
7
- # @attr config [Hash] the collection config within site config
8
- # @attr name [String] the name of the collection in site:collections
9
- # @attr page_dir [String] the directory path for generated collection pages
10
- # @attr site [Hash] the site config
11
8
  class Collection
12
- attr_reader :name, :page_dir
13
- private_class_method :new
9
+ attr_reader :name, :config, :ext, :search_fields,
10
+ :page_source, :metadata_source, :imagedata_source,
11
+ :iiif_derivative_source, :simple_derivative_source
14
12
 
15
- # This method ensures child classes can be instantiated though
16
- # Collection.new cannot be.
17
- def self.inherited(*)
18
- public_class_method :new
19
- end
13
+ include Collection::Metadata
14
+ include Collection::Images
20
15
 
21
- # Creates a new collection with name @name given site config @site
16
+ IMAGE_DERIVATIVE_DIRECTORY = 'img/derivatives'
17
+
18
+ #
22
19
  #
23
- # @param name [String] the name of the collection in site:collections
24
- # @param site [Hash] the site config
25
- def initialize(name, site)
26
- @name = name
27
- @site = site
28
- @config = collection_config
29
- @page_dir = Utils.make_path(@site[:source_dir],
30
- @site[:collections_dir],
31
- "_#{@name}")
20
+ def initialize(name, config, source, collections_dir, ext)
21
+ @name = name
22
+ @config = config
23
+ @page_extension = ext
24
+ @site_source = source
25
+ @page_source = Utils.safe_join source, collections_dir, "_#{@name}"
26
+ @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source')
27
+ @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source')
28
+ @iiif_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif'
29
+ @simple_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'simple'
30
+ @search_fields = %w[pid label thumbnail permalink collection]
31
+ @image_variants = @config.dig('images', 'variants') || {}
32
32
  end
33
33
 
34
- # Finds the collection config within the site config
35
34
  #
36
- # @return [Hash] the config for the collection
37
- def collection_config
38
- @site[:collections].fetch(@name)
39
- rescue StandardError => e
40
- raise Error::InvalidCollection, "Cannot load collection config for #{@name}.\n#{e}"
35
+ #
36
+ def clobber_pages
37
+ return unless Dir.exist? @page_source
38
+ puts Rainbow("Removing pages from #{@page_source}").cyan
39
+ FileUtils.remove_dir @page_source, true
41
40
  end
42
41
 
43
- # Ingests the collection source data as an Array of Hashes
44
42
  #
45
- # @param source [String] the path to the CSV, JSON, or YAML source file
46
- # @return [Array] the collection data
47
- def ingest_file(source)
48
- raise Error::MissingSource, "Cannot find #{source}" unless File.exist? source
49
-
50
- data = case File.extname(source)
51
- when '.csv'
52
- WaxTasks::Utils.validate_csv(source)
53
- when '.json'
54
- WaxTasks::Utils.validate_json(source)
55
- when /\.ya?ml/
56
- WaxTasks::Utils.validate_yaml(source)
57
- else
58
- raise Error::InvalidSource, "Can't load #{File.extname(source)} files. Culprit: #{source}"
59
- end
60
-
61
- WaxTasks::Utils.assert_pids(data)
62
- WaxTasks::Utils.assert_unique(data)
43
+ #
44
+ def clobber_derivatives
45
+ [@iiif_derivative_source, @simple_derivative_source].each do |dir|
46
+ if Dir.exist? dir
47
+ puts Rainbow("Removing derivatives from #{dir}").cyan
48
+ FileUtils.remove_dir dir, true
49
+ end
50
+ end
63
51
  end
64
52
  end
65
53
  end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_magick'
4
+ require 'progress_bar'
5
+ require 'wax_iiif'
6
+
7
+ #
8
+ module WaxTasks
9
+ #
10
+ class Collection
11
+ #
12
+ module Images
13
+ #
14
+ #
15
+ def items_from_imagedata
16
+ raise Error::MissingSource, "Cannot find image data source '#{@imagedata_source}'" unless Dir.exist? @imagedata_source
17
+
18
+ pre_process_pdfs
19
+ records = records_from_metadata
20
+ Dir.glob(Utils.safe_join(@imagedata_source, '*')).map do |path|
21
+ item = WaxTasks::Item.new(path, @image_variants)
22
+ if item.valid?
23
+ item.record = records.find { |r| r.pid == item.pid }
24
+ item.iiif_config = @config.dig 'images', 'iiif'
25
+ warn Rainbow("\nCould not find record in #{@metadata_source} for image item #{path}.\n").orange if item.record.nil?
26
+ item
27
+ else
28
+ puts Rainbow("Skipping #{path} because type #{item.type} is not an accepted format").yellow unless item.type == '.pdf'
29
+ end
30
+ end.compact
31
+ end
32
+
33
+ #
34
+ #
35
+ def pre_process_pdfs
36
+ Dir.glob(Utils.safe_join(@imagedata_source, '*.pdf')).each do |path|
37
+ target_dir = path.gsub '.pdf', ''
38
+ next unless Dir.glob("#{target_dir}/*").empty?
39
+
40
+ puts Rainbow("\nPreprocessing #{path} into image files. This may take a minute.\n").cyan
41
+
42
+ opts = { output_dir: File.dirname(target_dir) }
43
+ WaxIiif::Utilities::PdfSplitter.split(path, opts)
44
+ end
45
+ end
46
+
47
+ #
48
+ #
49
+ def write_simple_derivatives
50
+ puts Rainbow("Generating simple image derivatives for collection '#{@name}'\nThis might take awhile.").cyan
51
+
52
+ bar = ProgressBar.new(items_from_imagedata.length)
53
+ bar.write
54
+ items_from_imagedata.map do |item|
55
+ item.simple_derivatives.each do |d|
56
+ path = "#{@simple_derivative_source}/#{d.path}"
57
+ FileUtils.mkdir_p File.dirname(path)
58
+ next if File.exist? path
59
+
60
+ d.img.write path
61
+ item.record.set d.label, path if item.record?
62
+ end
63
+ bar.increment!
64
+ bar.write
65
+ item
66
+ end.flat_map(&:record).compact
67
+ end
68
+
69
+ #
70
+ #
71
+ def iiif_builder(dir)
72
+ build_opts = {
73
+ base_url: "{{ '/' | absolute_url }}#{dir}",
74
+ output_dir: dir,
75
+ collection_label: @name,
76
+ variants: @image_variants.dup.tap { |h| h.delete 'full' }
77
+ }
78
+ WaxIiif::Builder.new build_opts
79
+ end
80
+
81
+ #
82
+ #
83
+ def add_font_matter_to_json_files(dir)
84
+ Dir.glob("#{dir}/**/*.json").each do |f|
85
+ Utils.add_yaml_front_matter_to_file f
86
+ end
87
+ end
88
+
89
+ #
90
+ #
91
+ def add_iiif_results_to_records(records, manifests)
92
+ records.map do |record|
93
+ next nil if record.nil?
94
+
95
+ manifest = manifests.find { |m| m.base_id == record.pid }
96
+ next record if manifest.nil?
97
+
98
+ json = JSON.parse manifest.to_json
99
+ @image_variants.each do |k, _v|
100
+ value = json.dig k
101
+ record.set k, "/#{Utils.content_clean(value)}" unless value.nil?
102
+ end
103
+
104
+ record.set 'manifest', "/#{Utils.content_clean(manifest.id)}"
105
+ record
106
+ end.compact
107
+ end
108
+
109
+ #
110
+ #
111
+ def write_iiif_derivatives
112
+ items = items_from_imagedata
113
+ iiif_data = items.map(&:iiif_image_records).flatten
114
+ builder = iiif_builder @iiif_derivative_source
115
+
116
+ builder.load iiif_data
117
+
118
+ puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan
119
+ builder.process_data
120
+ records = items.map(&:record).compact
121
+
122
+ add_font_matter_to_json_files @iiif_derivative_source
123
+ add_iiif_results_to_records records, builder.manifests
124
+ end
125
+ end
126
+ end
127
+ end