wax_tasks 0.3.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of wax_tasks might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d88624cfe6327eb5449dda2246712718f4bdfb2fea155e12e1593b6a5e10af08
4
- data.tar.gz: d6aa6846dedcbaa295d4ae3f909c1f22fa68c55db1524b7aea1e7a958ef241ad
3
+ metadata.gz: de0380de7a7fbba0a355c3983945d2517460f0ed7e0848e518896140ea3a1992
4
+ data.tar.gz: 609510c447fa45acd2cd638141fe8f81180a27e9b1e43d6fae714e2b052e1add
5
5
  SHA512:
6
- metadata.gz: b3c8dd46e8631bbff1bcdb2792255882ac37cb1afd4b8ed9e193e24e0625c934eeda807d56417832df1fbfbdc9dec435df02df5058c3907794761bf704734fcc
7
- data.tar.gz: 954bbf0a5a9561ad4b74bf751de5e45e6cc5ec7de21a5fd82720789483470c29f1efc161fa62af1e69c2b4b8479956f82a4acd73277d3efd42bf36cfb0a2ef97
6
+ metadata.gz: 513a791a3d0a622af9f0cd386d5ad4d95c76a7c2feae1cd18126f2cdd83df605f86d81884a42e95a8ff152ab784898457b18c151a5c5db2ab71ea13b34c84a6b
7
+ data.tar.gz: 158e96f76794b2fbafc26c6881cca091b5621fe7cdf4c0700353af4d89dfd1fe43ab3754820332e516fb8bfa04101c76a5f96e95d6438eb9ff68932d1b84f808
data/Gemfile CHANGED
@@ -1,8 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
  gemspec
3
5
 
4
6
  # dev/test utilities
7
+ gem 'bundle-audit', require: false
5
8
  gem 'diane', require: false
6
9
  gem 'rubocop', require: false
7
- gem 'simplecov', require: false
10
+ gem 'simplecov', '0.17.1', require: false
8
11
  gem 'yard', require: false
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'destroy wax-generated collection files, including pages, derivatives, and search index(es)'
7
+ task :clobber do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:clobber'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.clobber a }
15
+ end
16
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ namespace :derivatives do
7
+ desc 'generate iiif derivatives from local image files'
8
+ task :iiif do
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+
12
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:iiif'").magenta if args.empty?
13
+
14
+ site = WaxTasks::Site.new
15
+ args.each { |a| site.generate_derivatives(a, 'iiif') }
16
+ end
17
+ end
18
+
19
+ # alias wax:iiif to wax:derivatives:iiif for backwards compatibility
20
+ task :iiif do
21
+ t = Rake::Task['wax:derivatives:iiif']
22
+ desc t.full_comment if t.full_comment
23
+ arguments = ARGV.drop(1).each { |a| task a.to_sym }
24
+ t.invoke(*arguments)
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ namespace :derivatives do
7
+ desc 'generate iiif derivatives from local image files'
8
+ task :simple do
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:simple'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.generate_derivatives(a, 'simple') }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'generate collection md pages from yaml or csv data source'
7
+ task :pages do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:pages').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_pages a }
14
+ end
15
+
16
+ # alias :pagemaster to wax:pages for backwards compatibility
17
+ task :pagemaster do
18
+ t = Rake::Task['wax:pages']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'build lunr search index (with default UI if UI=true)'
7
+ task :search do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:search').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_static_search a }
14
+ end
15
+
16
+ # alias lunr to search for backwards compatibility
17
+ task :lunr do
18
+ t = Rake::Task['wax:search']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -1,37 +1,36 @@
1
- require_relative 'wax_tasks/branch'
1
+ # frozen_string_literal: true
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # stdlib
7
+ require 'csv'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ # 3rd party
12
+ require 'rainbow'
13
+ require 'safe_yaml'
14
+
15
+ # relative
16
+ require_relative 'wax_tasks/asset'
2
17
  require_relative 'wax_tasks/collection'
18
+ require_relative 'wax_tasks/config'
3
19
  require_relative 'wax_tasks/error'
4
- require_relative 'wax_tasks/iiif_collection'
5
- require_relative 'wax_tasks/local_branch'
6
- require_relative 'wax_tasks/lunr_collection'
7
- require_relative 'wax_tasks/lunr_index'
8
- require_relative 'wax_tasks/pagemaster_collection'
9
- require_relative 'wax_tasks/task_runner'
10
- require_relative 'wax_tasks/travis_branch'
20
+ require_relative 'wax_tasks/index'
21
+ require_relative 'wax_tasks/item'
22
+ require_relative 'wax_tasks/record'
23
+ require_relative 'wax_tasks/site'
11
24
  require_relative 'wax_tasks/utils'
12
25
 
13
- # The WaxTasks module powers the Rake tasks in `./tasks`, including:
14
- #
15
- # wax:pagemaster :: generate collection md pages from csv, json, or yaml file
16
- # wax:lunr :: build lunr search index (with default UI if UI=true)
17
- # wax:iiif :: generate iiif derivatives from local image files
18
- # wax:jspackage :: write a simple package.json for monitoring js dependencies
19
- # wax:push :: push compiled Jekyll site to git branch
20
- # wax:test :: run htmlproofer, rspec if .rspec file exists
21
26
  #
22
- # Tasks are run by a WaxTasks::TaskRunner object which is resposible
23
- # for reading in site config from `_config.yml`
24
27
  module WaxTasks
25
- # ----------
26
- # CONSTANTS
27
- # ----------
28
-
29
- # @return [String] The path to load Jekyll site config
30
- DEFAULT_CONFIG = '_config.yml'.freeze
31
- # @return [String] The path to write WaxTasks::LunrIndex
32
- LUNR_INDEX_PATH = 'js/lunr-index.json'.freeze
33
- # @return [String] The path to write default LunrUI
34
- LUNR_UI_PATH = 'js/lunr-ui.js'.freeze
35
- # @return [String] The path to the compiled Jekyll site
36
- SITE_DIR = './_site'.freeze
28
+ DEFAULT_CONFIG_FILE = './_config.yml'
29
+ #
30
+ #
31
+ def self.config_from_file(file = nil)
32
+ Utils.validate_yaml(file || DEFAULT_CONFIG_FILE)
33
+ rescue StandardError => e
34
+ raise WaxTasks::Error::InvalidConfig, "Cannot open config file '#{file}'.\n #{e}"
35
+ end
37
36
  end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ Derivative = Struct.new(:path, :label, :img)
6
+
7
+ #
8
+ class Asset
9
+ attr_reader :id, :path
10
+
11
+ DEFAULT_VARIANTS = { 'thumbnail' => 250, 'full' => 1140 }.freeze
12
+
13
+ def initialize(path, pid, variants)
14
+ @path = path
15
+ @pid = pid
16
+ @id = asset_id
17
+ @variants = DEFAULT_VARIANTS.merge variants
18
+ end
19
+
20
+ #
21
+ #
22
+ def asset_id
23
+ id = File.basename @path, '.*'
24
+ id.prepend "#{@pid}_" unless id == @pid
25
+ id
26
+ end
27
+
28
+ #
29
+ #
30
+ def simple_derivatives
31
+ @variants.map do |label, width|
32
+ img = MiniMagick::Image.open @path
33
+ if width > img.width
34
+ warn Rainbow("Tried to create derivative #{width}px wide, but asset #{@id} for item #{@pid} only has a width of #{img.width}px.").yellow
35
+ else
36
+ img.resize width
37
+ end
38
+
39
+ img.format 'jpg'
40
+ Derivative.new("#{@id}/#{label}.jpg", label, img)
41
+ end
42
+ end
43
+
44
+ #
45
+ #
46
+ def to_iiif_image_record(is_only, index, base_opts)
47
+ opts = base_opts.clone
48
+
49
+ opts[:is_primary] = index.zero?
50
+ opts[:section_label] = "Page #{index + 1}" unless is_only
51
+ opts[:path] = @path
52
+ opts[:manifest_id] = @pid
53
+ opts[:id] = @id
54
+ opts[:variants] = @variants
55
+
56
+ WaxIiif::ImageRecord.new(opts)
57
+ end
58
+ end
59
+ end
@@ -1,65 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'collection/images'
4
+ require_relative 'collection/metadata'
5
+
1
6
  module WaxTasks
2
- # Parent class representing a Jekyll collection
3
- # that cannot be created directly. Only child classes
4
- # (IiifCollection, LunrCollection, PagemasterCollection)
5
- # can be initialized.
6
7
  #
7
- # @attr config [Hash] the collection config within site config
8
- # @attr name [String] the name of the collection in site:collections
9
- # @attr page_dir [String] the directory path for generated collection pages
10
- # @attr site [Hash] the site config
11
8
  class Collection
12
- attr_reader :name, :page_dir
13
- private_class_method :new
9
+ attr_reader :name, :config, :ext, :search_fields,
10
+ :page_source, :metadata_source, :imagedata_source,
11
+ :iiif_derivative_source, :simple_derivative_source
14
12
 
15
- # This method ensures child classes can be instantiated though
16
- # Collection.new cannot be.
17
- def self.inherited(*)
18
- public_class_method :new
19
- end
13
+ include Collection::Metadata
14
+ include Collection::Images
20
15
 
21
- # Creates a new collection with name @name given site config @site
16
+ IMAGE_DERIVATIVE_DIRECTORY = 'img/derivatives'
17
+
18
+ #
22
19
  #
23
- # @param name [String] the name of the collection in site:collections
24
- # @param site [Hash] the site config
25
- def initialize(name, site)
26
- @name = name
27
- @site = site
28
- @config = collection_config
29
- @page_dir = Utils.make_path(@site[:source_dir],
30
- @site[:collections_dir],
31
- "_#{@name}")
20
+ def initialize(name, config, source, collections_dir, ext)
21
+ @name = name
22
+ @config = config
23
+ @page_extension = ext
24
+ @site_source = source
25
+ @page_source = Utils.safe_join source, collections_dir, "_#{@name}"
26
+ @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source')
27
+ @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source')
28
+ @iiif_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif'
29
+ @simple_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'simple'
30
+ @search_fields = %w[pid label thumbnail permalink collection]
31
+ @image_variants = @config.dig('images', 'variants') || {}
32
32
  end
33
33
 
34
- # Finds the collection config within the site config
35
34
  #
36
- # @return [Hash] the config for the collection
37
- def collection_config
38
- @site[:collections].fetch(@name)
39
- rescue StandardError => e
40
- raise Error::InvalidCollection, "Cannot load collection config for #{@name}.\n#{e}"
35
+ #
36
+ def clobber_pages
37
+ return unless Dir.exist? @page_source
38
+ puts Rainbow("Removing pages from #{@page_source}").cyan
39
+ FileUtils.remove_dir @page_source, true
41
40
  end
42
41
 
43
- # Ingests the collection source data as an Array of Hashes
44
42
  #
45
- # @param source [String] the path to the CSV, JSON, or YAML source file
46
- # @return [Array] the collection data
47
- def ingest_file(source)
48
- raise Error::MissingSource, "Cannot find #{source}" unless File.exist? source
49
-
50
- data = case File.extname(source)
51
- when '.csv'
52
- WaxTasks::Utils.validate_csv(source)
53
- when '.json'
54
- WaxTasks::Utils.validate_json(source)
55
- when /\.ya?ml/
56
- WaxTasks::Utils.validate_yaml(source)
57
- else
58
- raise Error::InvalidSource, "Can't load #{File.extname(source)} files. Culprit: #{source}"
59
- end
60
-
61
- WaxTasks::Utils.assert_pids(data)
62
- WaxTasks::Utils.assert_unique(data)
43
+ #
44
+ def clobber_derivatives
45
+ [@iiif_derivative_source, @simple_derivative_source].each do |dir|
46
+ if Dir.exist? dir
47
+ puts Rainbow("Removing derivatives from #{dir}").cyan
48
+ FileUtils.remove_dir dir, true
49
+ end
50
+ end
63
51
  end
64
52
  end
65
53
  end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_magick'
4
+ require 'progress_bar'
5
+ require 'wax_iiif'
6
+
7
+ #
8
+ module WaxTasks
9
+ #
10
+ class Collection
11
+ #
12
+ module Images
13
+ #
14
+ #
15
+ def items_from_imagedata
16
+ raise Error::MissingSource, "Cannot find image data source '#{@imagedata_source}'" unless Dir.exist? @imagedata_source
17
+
18
+ pre_process_pdfs
19
+ records = records_from_metadata
20
+ Dir.glob(Utils.safe_join(@imagedata_source, '*')).map do |path|
21
+ item = WaxTasks::Item.new(path, @image_variants)
22
+ if item.valid?
23
+ item.record = records.find { |r| r.pid == item.pid }
24
+ item.iiif_config = @config.dig 'images', 'iiif'
25
+ warn Rainbow("\nCould not find record in #{@metadata_source} for image item #{path}.\n").orange if item.record.nil?
26
+ item
27
+ else
28
+ puts Rainbow("Skipping #{path} because type #{item.type} is not an accepted format").yellow unless item.type == '.pdf'
29
+ end
30
+ end.compact
31
+ end
32
+
33
+ #
34
+ #
35
+ def pre_process_pdfs
36
+ Dir.glob(Utils.safe_join(@imagedata_source, '*.pdf')).each do |path|
37
+ target_dir = path.gsub '.pdf', ''
38
+ next unless Dir.glob("#{target_dir}/*").empty?
39
+
40
+ puts Rainbow("\nPreprocessing #{path} into image files. This may take a minute.\n").cyan
41
+
42
+ opts = { output_dir: File.dirname(target_dir) }
43
+ WaxIiif::Utilities::PdfSplitter.split(path, opts)
44
+ end
45
+ end
46
+
47
+ #
48
+ #
49
+ def write_simple_derivatives
50
+ puts Rainbow("Generating simple image derivatives for collection '#{@name}'\nThis might take awhile.").cyan
51
+
52
+ bar = ProgressBar.new(items_from_imagedata.length)
53
+ bar.write
54
+ items_from_imagedata.map do |item|
55
+ item.simple_derivatives.each do |d|
56
+ path = "#{@simple_derivative_source}/#{d.path}"
57
+ FileUtils.mkdir_p File.dirname(path)
58
+ next if File.exist? path
59
+
60
+ d.img.write path
61
+ item.record.set d.label, path if item.record?
62
+ end
63
+ bar.increment!
64
+ bar.write
65
+ item
66
+ end.flat_map(&:record).compact
67
+ end
68
+
69
+ #
70
+ #
71
+ def iiif_builder(dir)
72
+ build_opts = {
73
+ base_url: "{{ '/' | absolute_url }}#{dir}",
74
+ output_dir: dir,
75
+ collection_label: @name,
76
+ variants: @image_variants.dup.tap { |h| h.delete 'full' }
77
+ }
78
+ WaxIiif::Builder.new build_opts
79
+ end
80
+
81
+ #
82
+ #
83
+ def add_font_matter_to_json_files(dir)
84
+ Dir.glob("#{dir}/**/*.json").each do |f|
85
+ Utils.add_yaml_front_matter_to_file f
86
+ end
87
+ end
88
+
89
+ #
90
+ #
91
+ def add_iiif_results_to_records(records, manifests)
92
+ records.map do |record|
93
+ next nil if record.nil?
94
+
95
+ manifest = manifests.find { |m| m.base_id == record.pid }
96
+ next record if manifest.nil?
97
+
98
+ json = JSON.parse manifest.to_json
99
+ @image_variants.each do |k, _v|
100
+ value = json.dig k
101
+ record.set k, "/#{Utils.content_clean(value)}" unless value.nil?
102
+ end
103
+
104
+ record.set 'manifest', "/#{Utils.content_clean(manifest.id)}"
105
+ record
106
+ end.compact
107
+ end
108
+
109
+ #
110
+ #
111
+ def write_iiif_derivatives
112
+ items = items_from_imagedata
113
+ iiif_data = items.map(&:iiif_image_records).flatten
114
+ builder = iiif_builder @iiif_derivative_source
115
+
116
+ builder.load iiif_data
117
+
118
+ puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan
119
+ builder.process_data
120
+ records = items.map(&:record).compact
121
+
122
+ add_font_matter_to_json_files @iiif_derivative_source
123
+ add_iiif_results_to_records records, builder.manifests
124
+ end
125
+ end
126
+ end
127
+ end