wax_tasks 1.0.0.pre.beta → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6df6c21281405044f2958da14b8eda608a4a265337f684c1371612817468767a
4
- data.tar.gz: f1a7248a2b83fa1cf34cfd15580baecdefdd91601c7328b987d9ac247316e154
3
+ metadata.gz: 1f09ee093e87d8111dc4f0ee522cb33a6f3298fdd82a8f80c2c48d4f2c24baa5
4
+ data.tar.gz: 12bb3dc4a77e92ebbec52f8b9900a8e6d2349592fc5fdff70a875111e11cc76e
5
5
  SHA512:
6
- metadata.gz: 2a2c5464d13a57208f0c79bbf44a4355b6fad5efd3d2e511ad30ce7a0b4075321868a906a70b680b76ced742d5394fb758a746aff42f0601ea9137ad774dd448
7
- data.tar.gz: 87e84935bcda6fcc92bab5a5fecc7b8fec29d19e3348b6b1f0404dd3b6e47595b778a1b7a25562e0ce7b5c187c700be7c9c20a728bc7adbf98284bba26e7b691
6
+ metadata.gz: 53bee380df63833cf919d3fa2b890617c7dea35bc350dd31fd1cbfd331032a17edf479565b841168f1db65a0ddae29fa14ab32870677ef9a1b3f33d9a0d5e27a
7
+ data.tar.gz: dcd8227c7274a19473a62ea9f5507e813ca70985f1fca21cb1d9955b0e6d300225165b81028fa8aeae07c186da84f8b2c6abb21cd5273cf18a1cd1c01084185f
data/Gemfile CHANGED
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
  gemspec
3
5
 
4
6
  # dev/test utilities
5
- gem 'diane', require: false
6
- gem 'rubocop', '0.59.0', require: false
7
+ gem 'rubocop', require: false
7
8
  gem 'simplecov', require: false
8
9
  gem 'yard', require: false
@@ -1,13 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'wax_tasks'
2
4
 
3
5
  namespace :wax do
4
6
  namespace :derivatives do
5
7
  desc 'generate iiif derivatives from local image files'
6
8
  task :iiif do
7
- arguments = ARGV.drop(1).each { |a| task a.to_sym }
8
- raise WaxTasks::Error::MissingArguments, "You must specify a collection after 'wax:derivatives:iiif'" if arguments.empty?
9
- task_runner = WaxTasks::TaskRunner.new
10
- task_runner.derivatives_iiif(arguments)
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:iiif'").magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_derivatives(a, 'iiif') }
11
14
  end
12
15
  end
13
16
 
@@ -1,13 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'wax_tasks'
2
4
 
3
5
  namespace :wax do
4
6
  namespace :derivatives do
5
7
  desc 'generate iiif derivatives from local image files'
6
8
  task :simple do
7
- arguments = ARGV.drop(1).each { |a| task a.to_sym }
8
- raise WaxTasks::Error::MissingArguments, "You must specify a collection after 'wax:derivatives:simple'" if arguments.empty?
9
- task_runner = WaxTasks::TaskRunner.new
10
- task_runner.derivatives_simple(arguments)
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:simple'").magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_derivatives(a, 'simple') }
11
14
  end
12
15
  end
13
16
  end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'generate collection md pages from yaml or csv data source'
7
+ task :pages do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:pages').magenta if args.empty?
10
+
11
+ site = WaxTasks::Site.new
12
+ args.each { |a| site.generate_pages a }
13
+ end
14
+
15
+ # alias :pagemaster to wax:pages for backwards compatibility
16
+ task :pagemaster do
17
+ t = Rake::Task['wax:pages']
18
+ desc t.full_comment if t.full_comment
19
+ args = ARGV.drop(1).each { |a| task a.to_sym }
20
+ t.invoke(*args)
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'build lunr search index (with default UI if UI=true)'
7
+ task :search do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:search').magenta if args.empty?
10
+
11
+ site = WaxTasks::Site.new
12
+ args.each { |a| site.generate_static_search a }
13
+ end
14
+
15
+ # alias lunr to search for backwards compatibility
16
+ task :lunr do
17
+ t = Rake::Task['wax:search']
18
+ desc t.full_comment if t.full_comment
19
+ args = ARGV.drop(1).each { |a| task a.to_sym }
20
+ t.invoke(*args)
21
+ end
22
+ end
data/lib/wax_tasks.rb CHANGED
@@ -1,43 +1,36 @@
1
- require_relative 'wax_tasks/branch'
1
+ # frozen_string_literal: true
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # stdlib
7
+ require 'csv'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ # 3rd party
12
+ require 'rainbow'
13
+ require 'safe_yaml'
14
+
15
+ # relative
16
+ require_relative 'wax_tasks/asset'
2
17
  require_relative 'wax_tasks/collection'
18
+ require_relative 'wax_tasks/config'
3
19
  require_relative 'wax_tasks/error'
4
- require_relative 'wax_tasks/image_collection'
5
- require_relative 'wax_tasks/local_branch'
6
- require_relative 'wax_tasks/lunr/index'
7
- require_relative 'wax_tasks/pagemaster_collection'
8
- require_relative 'wax_tasks/task_runner'
9
- require_relative 'wax_tasks/travis_branch'
20
+ require_relative 'wax_tasks/index'
21
+ require_relative 'wax_tasks/item'
22
+ require_relative 'wax_tasks/record'
23
+ require_relative 'wax_tasks/site'
10
24
  require_relative 'wax_tasks/utils'
11
25
 
12
- # The WaxTasks module powers the Rake tasks in `./tasks`, including:
13
- #
14
- # wax:pagemaster :: generate collection md pages from csv, json, or yaml file
15
- # wax:lunr :: build lunr search index (with default UI if UI=true)
16
- # wax:derivatives:simple :: generate simple image derivatives from local image files
17
- # wax:derivatves:iiif :: generate iiif derivatives from local image files
18
- # wax:jspackage :: write a simple package.json for monitoring js dependencies
19
- # wax:push :: push compiled Jekyll site to git branch
20
- # wax:test :: run htmlproofer, rspec if .rspec file exists
21
26
  #
22
- # Tasks are run by a WaxTasks::TaskRunner object which is resposible
23
- # for reading in site config from `_config.yml`
24
27
  module WaxTasks
25
- # ----------
26
- # CONSTANTS
27
- # ----------
28
-
29
- # @return [String] The path to load Jekyll site config
30
- DEFAULT_CONFIG = '_config.yml'.freeze
31
-
32
- # @return [String] The path to write default LunrUI
33
- LUNR_UI_PATH = 'js/lunr-ui.js'.freeze
34
-
35
- # @return [String] The path to the compiled Jekyll site
36
- SITE_DIR = '_site'.freeze
37
-
38
- # @return [String] Default image variant/derivative widths to generate
39
- DEFAULT_IMAGE_VARIANTS = { thumbnail: 250, full: 1140 }.freeze
40
-
41
- # @return [String] The path where image derivatives should be generated
42
- DEFAULT_DERIVATIVE_DIR = 'img/derivatives'.freeze
28
+ DEFAULT_CONFIG_FILE = './_config.yml'
29
+ #
30
+ #
31
+ def self.config_from_file(file = nil)
32
+ Utils.validate_yaml(file || DEFAULT_CONFIG_FILE)
33
+ rescue StandardError => e
34
+ raise WaxTasks::Error::InvalidConfig, "Cannot open config file '#{file}'.\n #{e}"
35
+ end
43
36
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ Derivative = Struct.new(:path, :label, :img)
6
+ attr_reader :id, :path
7
+
8
+ #
9
+ class Asset
10
+ def initialize(path, pid, variants)
11
+ @path = path
12
+ @pid = pid
13
+ @id = asset_id
14
+ @variants = variants
15
+ end
16
+
17
+ #
18
+ #
19
+ def asset_id
20
+ id = File.basename(@path, '.*')
21
+ id.prepend "#{@pid}_" unless id == @pid
22
+ id
23
+ end
24
+
25
+ #
26
+ #
27
+ def simple_derivatives
28
+ @variants.map do |label, width|
29
+ img = MiniMagick::Image.open(@path)
30
+ raise WaxTasks::Error::InvalidConfig, "Requested variant width '#{width}' is larger than original image width." if width > img.width
31
+
32
+ img.resize width
33
+ img.format 'jpg'
34
+ Derivative.new("#{@id}/#{label}.jpg", label, img)
35
+ end
36
+ end
37
+
38
+ #
39
+ #
40
+ def to_iiif_image_record(is_only, index, base_opts)
41
+ opts = base_opts.clone
42
+ opts[:is_primary] = index.zero?
43
+ opts[:section_label] = "Page #{index + 1}" unless is_only
44
+ opts[:path] = @path
45
+ opts[:manifest_id] = @pid
46
+ opts[:id] = @id
47
+
48
+ WaxIiif::ImageRecord.new(opts)
49
+ end
50
+ end
51
+ end
@@ -1,92 +1,29 @@
1
- module WaxTasks
2
- # Parent class representing a Jekyll collection
3
- # that cannot be created directly. Only child classes
4
- # (IiifCollection, LunrCollection, PagemasterCollection)
5
- # can be initialized.
6
- class Collection
7
- attr_accessor :name, :site
8
- private_class_method :new
1
+ # frozen_string_literal: true
9
2
 
10
- # This method ensures child classes can be instantiated though
11
- # Collection.new cannot be.
12
- def self.inherited(*)
13
- public_class_method :new
14
- end
3
+ require_relative 'collection/images'
4
+ require_relative 'collection/metadata'
15
5
 
16
- # Creates a new collection with name @name given site configuration @site
17
- #
18
- # @param name [String] name of the collection in site:collections
19
- # @param site [Hash] site config
20
- def initialize(name, site)
21
- @name = name
22
- @site = site
23
- @config = self.config
24
- end
25
-
26
- # Finds the collection config within the site config
27
- #
28
- # @return [Hash] the config for the collection
29
- def config
30
- @site[:collections].fetch(@name)
31
- rescue StandardError => e
32
- raise Error::InvalidCollection, "Cannot load collection config for #{@name}.\n#{e}"
33
- end
6
+ module WaxTasks
7
+ #
8
+ class Collection
9
+ attr_reader :name, :config, :ext, :search_fields,
10
+ :page_source, :metadata_source, :imagedata_source
34
11
 
35
- # Returns the target directory for generated collection pages
36
- #
37
- # @return [String] path
38
- def page_dir
39
- WaxTasks::Utils.root_path(@site[:source_dir], @site[:collections_dir], "_#{@name}")
40
- end
12
+ include Collection::Metadata
13
+ include Collection::Images
41
14
 
42
- # Constructs the path to the data source file
43
15
  #
44
- # @return [String] the path to the data source file
45
- def metadata_source_path
46
- source = @config.dig('metadata', 'source')
47
- raise WaxTasks::Error::MissingSource, "Missing collection source in _config.yml for #{@name}" if source.nil?
48
- WaxTasks::Utils.root_path(@site[:source_dir], '_data', source)
49
- end
50
-
51
- # Ingests the collection source data as an Array of Hashes
52
16
  #
53
- # @param source [String] the path to the CSV, JSON, or YAML source file
54
- # @return [Array] the collection data
55
- def ingest_file(source)
56
- raise Error::MissingSource, "Cannot find #{source}" unless File.exist? source
57
-
58
- data = case File.extname(source)
59
- when '.csv'
60
- WaxTasks::Utils.validate_csv(source)
61
- when '.json'
62
- WaxTasks::Utils.validate_json(source)
63
- when /\.ya?ml/
64
- WaxTasks::Utils.validate_yaml(source)
65
- else
66
- raise Error::InvalidSource, "Can't load #{File.extname(source)} files. Culprit: #{source}"
67
- end
68
-
69
- WaxTasks::Utils.assert_pids(data)
70
- WaxTasks::Utils.assert_unique(data)
71
- end
72
-
73
- # @return [Nil]
74
- def overwrite_metadata
75
- src = self.metadata_source_path
76
- puts "Writing image derivative info #{src}.".cyan
77
- case File.extname(src)
78
- when '.csv'
79
- keys = @metadata.map(&:keys).inject(&:|)
80
- csv_string = keys.to_csv
81
- @metadata.each { |h| csv_string += h.values_at(*keys).to_csv }
82
- File.open(src, 'w') { |f| f.write(csv_string) }
83
- when '.json'
84
- File.open(src, 'w') { |f| f.write(JSON.pretty_generate(@metadata)) }
85
- when /\.ya?ml/
86
- File.open(src, 'w') { |f| f.write(@metadata.to_yaml) }
87
- else
88
- raise Error::InvalidSource
89
- end
17
+ def initialize(name, config, source, collections_dir, ext)
18
+ @name = name
19
+ @config = config
20
+ @page_extension = ext
21
+ @site_source = source
22
+ @page_source = Utils.safe_join source, collections_dir, "_#{name}"
23
+ @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source')
24
+ @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source')
25
+ @search_fields = %w[pid label thumbnail permalink collection]
26
+ @image_variants = image_variants
90
27
  end
91
28
  end
92
29
  end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_magick'
4
+ require 'progress_bar'
5
+ require 'progress_bar/core_ext/enumerable_with_progress'
6
+ require 'wax_iiif'
7
+
8
+ #
9
+ module WaxTasks
10
+ #
11
+ class Collection
12
+ #
13
+ module Images
14
+ #
15
+ #
16
+ def image_variants
17
+ default_variants = { 'thumbnail' => 250, 'full' => 1140 }
18
+ custom_variants = @config.dig('images', 'variants') || {}
19
+ default_variants.merge custom_variants
20
+ end
21
+
22
+ #
23
+ #
24
+ def items_from_imagedata
25
+ raise Error::MissingSource, "Cannot find image data source '#{@imagedata_source}'" unless Dir.exist? @imagedata_source
26
+
27
+ pre_process_pdfs
28
+ records = records_from_metadata
29
+ Dir.glob(Utils.safe_join(@imagedata_source, '*')).map do |path|
30
+ item = WaxTasks::Item.new(path, @image_variants)
31
+ next unless item.valid?
32
+
33
+ item.record = records.find { |r| r.pid == item.pid }
34
+ item.iiif_config = @config.dig 'images', 'iiif'
35
+ warn Rainbow("\nWarning:\nCould not find record in #{@metadata_source} for image item #{path}.\n").orange if item.record.nil?
36
+ item
37
+ end.compact
38
+ end
39
+
40
+ #
41
+ #
42
+ def pre_process_pdfs
43
+ Dir.glob(Utils.safe_join(@imagedata_source, '*.pdf')).each do |path|
44
+ target_dir = path.gsub '.pdf', ''
45
+ next unless Dir.glob("#{target_dir}/*").empty?
46
+
47
+ puts Rainbow("\nPreprocessing #{path} into image files. This may take a minute.\n").cyan
48
+ opts = { output_dir: File.dirname(target_dir) }
49
+ WaxIiif::Utilities::PdfSplitter.split(path, opts)
50
+ end
51
+ end
52
+
53
+ #
54
+ #
55
+ def write_simple_derivatives(dir)
56
+ puts Rainbow("Generating simple image derivatives for collection '#{@name}'\nThis might take awhile.").cyan
57
+ items_from_imagedata.each_with_progress.map do |item|
58
+ item.simple_derivatives.each do |d|
59
+ path = "#{dir}/#{d.path}"
60
+ FileUtils.mkdir_p File.dirname(path)
61
+ next if File.exist? path
62
+
63
+ d.img.write path
64
+ item.record.set d.label, path if item.record?
65
+ end
66
+ item
67
+ end.flat_map(&:record)
68
+ end
69
+
70
+ #
71
+ #
72
+ def iiif_builder(dir)
73
+ build_opts = {
74
+ base_url: "{{ '/' | absolute_url }}#{dir}",
75
+ output_dir: dir,
76
+ # variants: @image_variants,
77
+ collection_label: @name
78
+ }
79
+ WaxIiif::Builder.new(build_opts)
80
+ end
81
+
82
+ #
83
+ #
84
+ def add_font_matter_to_json_files(dir)
85
+ Dir.glob("#{dir}/**/*.json").each do |f|
86
+ Utils.add_yaml_front_matter_to_file f
87
+ end
88
+ end
89
+
90
+ #
91
+ #
92
+ def add_iiif_results_to_records(records, manifests)
93
+ records.map do |record|
94
+ next nil if record.nil?
95
+
96
+ manifest = manifests.find { |m| m.base_id == record.pid }
97
+ next record if manifest.nil?
98
+
99
+ json = JSON.parse manifest.to_json
100
+ @image_variants.each do |k, _v|
101
+ value = json.dig k
102
+ record.set k, "/#{Utils.content_clean(value)}" unless value.nil?
103
+ end
104
+
105
+ record.set 'manifest', "/#{Utils.content_clean(manifest.id)}"
106
+ record
107
+ end.compact
108
+ end
109
+
110
+ #
111
+ #
112
+ def write_iiif_derivatives(dir)
113
+ items = items_from_imagedata
114
+ iiif_data = items.map(&:iiif_image_records).flatten
115
+ builder = iiif_builder(dir)
116
+
117
+ builder.load iiif_data
118
+
119
+ puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan
120
+ builder.process_data
121
+
122
+ add_font_matter_to_json_files dir
123
+ add_iiif_results_to_records items.map(&:record), builder.manifests
124
+ end
125
+ end
126
+ end
127
+ end