wax_tasks 1.0.0.pre.beta → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6df6c21281405044f2958da14b8eda608a4a265337f684c1371612817468767a
4
- data.tar.gz: f1a7248a2b83fa1cf34cfd15580baecdefdd91601c7328b987d9ac247316e154
3
+ metadata.gz: 1f09ee093e87d8111dc4f0ee522cb33a6f3298fdd82a8f80c2c48d4f2c24baa5
4
+ data.tar.gz: 12bb3dc4a77e92ebbec52f8b9900a8e6d2349592fc5fdff70a875111e11cc76e
5
5
  SHA512:
6
- metadata.gz: 2a2c5464d13a57208f0c79bbf44a4355b6fad5efd3d2e511ad30ce7a0b4075321868a906a70b680b76ced742d5394fb758a746aff42f0601ea9137ad774dd448
7
- data.tar.gz: 87e84935bcda6fcc92bab5a5fecc7b8fec29d19e3348b6b1f0404dd3b6e47595b778a1b7a25562e0ce7b5c187c700be7c9c20a728bc7adbf98284bba26e7b691
6
+ metadata.gz: 53bee380df63833cf919d3fa2b890617c7dea35bc350dd31fd1cbfd331032a17edf479565b841168f1db65a0ddae29fa14ab32870677ef9a1b3f33d9a0d5e27a
7
+ data.tar.gz: dcd8227c7274a19473a62ea9f5507e813ca70985f1fca21cb1d9955b0e6d300225165b81028fa8aeae07c186da84f8b2c6abb21cd5273cf18a1cd1c01084185f
data/Gemfile CHANGED
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
  gemspec
3
5
 
4
6
  # dev/test utilities
5
- gem 'diane', require: false
6
- gem 'rubocop', '0.59.0', require: false
7
+ gem 'rubocop', require: false
7
8
  gem 'simplecov', require: false
8
9
  gem 'yard', require: false
@@ -1,13 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'wax_tasks'
2
4
 
3
5
  namespace :wax do
4
6
  namespace :derivatives do
5
7
  desc 'generate iiif derivatives from local image files'
6
8
  task :iiif do
7
- arguments = ARGV.drop(1).each { |a| task a.to_sym }
8
- raise WaxTasks::Error::MissingArguments, "You must specify a collection after 'wax:derivatives:iiif'" if arguments.empty?
9
- task_runner = WaxTasks::TaskRunner.new
10
- task_runner.derivatives_iiif(arguments)
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:iiif'").magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_derivatives(a, 'iiif') }
11
14
  end
12
15
  end
13
16
 
@@ -1,13 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'wax_tasks'
2
4
 
3
5
  namespace :wax do
4
6
  namespace :derivatives do
5
7
  desc 'generate iiif derivatives from local image files'
6
8
  task :simple do
7
- arguments = ARGV.drop(1).each { |a| task a.to_sym }
8
- raise WaxTasks::Error::MissingArguments, "You must specify a collection after 'wax:derivatives:simple'" if arguments.empty?
9
- task_runner = WaxTasks::TaskRunner.new
10
- task_runner.derivatives_simple(arguments)
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:simple'").magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_derivatives(a, 'simple') }
11
14
  end
12
15
  end
13
16
  end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'generate collection md pages from yaml or csv data source'
7
+ task :pages do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:pages').magenta if args.empty?
10
+
11
+ site = WaxTasks::Site.new
12
+ args.each { |a| site.generate_pages a }
13
+ end
14
+
15
+ # alias :pagemaster to wax:pages for backwards compatibility
16
+ task :pagemaster do
17
+ t = Rake::Task['wax:pages']
18
+ desc t.full_comment if t.full_comment
19
+ args = ARGV.drop(1).each { |a| task a.to_sym }
20
+ t.invoke(*args)
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'build lunr search index (with default UI if UI=true)'
7
+ task :search do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:search').magenta if args.empty?
10
+
11
+ site = WaxTasks::Site.new
12
+ args.each { |a| site.generate_static_search a }
13
+ end
14
+
15
+ # alias lunr to search for backwards compatibility
16
+ task :lunr do
17
+ t = Rake::Task['wax:search']
18
+ desc t.full_comment if t.full_comment
19
+ args = ARGV.drop(1).each { |a| task a.to_sym }
20
+ t.invoke(*args)
21
+ end
22
+ end
data/lib/wax_tasks.rb CHANGED
@@ -1,43 +1,36 @@
1
- require_relative 'wax_tasks/branch'
1
+ # frozen_string_literal: true
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # stdlib
7
+ require 'csv'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ # 3rd party
12
+ require 'rainbow'
13
+ require 'safe_yaml'
14
+
15
+ # relative
16
+ require_relative 'wax_tasks/asset'
2
17
  require_relative 'wax_tasks/collection'
18
+ require_relative 'wax_tasks/config'
3
19
  require_relative 'wax_tasks/error'
4
- require_relative 'wax_tasks/image_collection'
5
- require_relative 'wax_tasks/local_branch'
6
- require_relative 'wax_tasks/lunr/index'
7
- require_relative 'wax_tasks/pagemaster_collection'
8
- require_relative 'wax_tasks/task_runner'
9
- require_relative 'wax_tasks/travis_branch'
20
+ require_relative 'wax_tasks/index'
21
+ require_relative 'wax_tasks/item'
22
+ require_relative 'wax_tasks/record'
23
+ require_relative 'wax_tasks/site'
10
24
  require_relative 'wax_tasks/utils'
11
25
 
12
- # The WaxTasks module powers the Rake tasks in `./tasks`, including:
13
- #
14
- # wax:pagemaster :: generate collection md pages from csv, json, or yaml file
15
- # wax:lunr :: build lunr search index (with default UI if UI=true)
16
- # wax:derivatives:simple :: generate simple image derivatives from local image files
17
- # wax:derivatves:iiif :: generate iiif derivatives from local image files
18
- # wax:jspackage :: write a simple package.json for monitoring js dependencies
19
- # wax:push :: push compiled Jekyll site to git branch
20
- # wax:test :: run htmlproofer, rspec if .rspec file exists
21
26
  #
22
- # Tasks are run by a WaxTasks::TaskRunner object which is resposible
23
- # for reading in site config from `_config.yml`
24
27
  module WaxTasks
25
- # ----------
26
- # CONSTANTS
27
- # ----------
28
-
29
- # @return [String] The path to load Jekyll site config
30
- DEFAULT_CONFIG = '_config.yml'.freeze
31
-
32
- # @return [String] The path to write default LunrUI
33
- LUNR_UI_PATH = 'js/lunr-ui.js'.freeze
34
-
35
- # @return [String] The path to the compiled Jekyll site
36
- SITE_DIR = '_site'.freeze
37
-
38
- # @return [String] Default image variant/derivative widths to generate
39
- DEFAULT_IMAGE_VARIANTS = { thumbnail: 250, full: 1140 }.freeze
40
-
41
- # @return [String] The path where image derivatives should be generated
42
- DEFAULT_DERIVATIVE_DIR = 'img/derivatives'.freeze
28
+ DEFAULT_CONFIG_FILE = './_config.yml'
29
+ #
30
+ #
31
+ def self.config_from_file(file = nil)
32
+ Utils.validate_yaml(file || DEFAULT_CONFIG_FILE)
33
+ rescue StandardError => e
34
+ raise WaxTasks::Error::InvalidConfig, "Cannot open config file '#{file}'.\n #{e}"
35
+ end
43
36
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ Derivative = Struct.new(:path, :label, :img)
6
+ attr_reader :id, :path
7
+
8
+ #
9
+ class Asset
10
+ def initialize(path, pid, variants)
11
+ @path = path
12
+ @pid = pid
13
+ @id = asset_id
14
+ @variants = variants
15
+ end
16
+
17
+ #
18
+ #
19
+ def asset_id
20
+ id = File.basename(@path, '.*')
21
+ id.prepend "#{@pid}_" unless id == @pid
22
+ id
23
+ end
24
+
25
+ #
26
+ #
27
+ def simple_derivatives
28
+ @variants.map do |label, width|
29
+ img = MiniMagick::Image.open(@path)
30
+ raise WaxTasks::Error::InvalidConfig, "Requested variant width '#{width}' is larger than original image width." if width > img.width
31
+
32
+ img.resize width
33
+ img.format 'jpg'
34
+ Derivative.new("#{@id}/#{label}.jpg", label, img)
35
+ end
36
+ end
37
+
38
+ #
39
+ #
40
+ def to_iiif_image_record(is_only, index, base_opts)
41
+ opts = base_opts.clone
42
+ opts[:is_primary] = index.zero?
43
+ opts[:section_label] = "Page #{index + 1}" unless is_only
44
+ opts[:path] = @path
45
+ opts[:manifest_id] = @pid
46
+ opts[:id] = @id
47
+
48
+ WaxIiif::ImageRecord.new(opts)
49
+ end
50
+ end
51
+ end
@@ -1,92 +1,29 @@
1
- module WaxTasks
2
- # Parent class representing a Jekyll collection
3
- # that cannot be created directly. Only child classes
4
- # (IiifCollection, LunrCollection, PagemasterCollection)
5
- # can be initialized.
6
- class Collection
7
- attr_accessor :name, :site
8
- private_class_method :new
1
+ # frozen_string_literal: true
9
2
 
10
- # This method ensures child classes can be instantiated though
11
- # Collection.new cannot be.
12
- def self.inherited(*)
13
- public_class_method :new
14
- end
3
+ require_relative 'collection/images'
4
+ require_relative 'collection/metadata'
15
5
 
16
- # Creates a new collection with name @name given site configuration @site
17
- #
18
- # @param name [String] name of the collection in site:collections
19
- # @param site [Hash] site config
20
- def initialize(name, site)
21
- @name = name
22
- @site = site
23
- @config = self.config
24
- end
25
-
26
- # Finds the collection config within the site config
27
- #
28
- # @return [Hash] the config for the collection
29
- def config
30
- @site[:collections].fetch(@name)
31
- rescue StandardError => e
32
- raise Error::InvalidCollection, "Cannot load collection config for #{@name}.\n#{e}"
33
- end
6
+ module WaxTasks
7
+ #
8
+ class Collection
9
+ attr_reader :name, :config, :ext, :search_fields,
10
+ :page_source, :metadata_source, :imagedata_source
34
11
 
35
- # Returns the target directory for generated collection pages
36
- #
37
- # @return [String] path
38
- def page_dir
39
- WaxTasks::Utils.root_path(@site[:source_dir], @site[:collections_dir], "_#{@name}")
40
- end
12
+ include Collection::Metadata
13
+ include Collection::Images
41
14
 
42
- # Constructs the path to the data source file
43
15
  #
44
- # @return [String] the path to the data source file
45
- def metadata_source_path
46
- source = @config.dig('metadata', 'source')
47
- raise WaxTasks::Error::MissingSource, "Missing collection source in _config.yml for #{@name}" if source.nil?
48
- WaxTasks::Utils.root_path(@site[:source_dir], '_data', source)
49
- end
50
-
51
- # Ingests the collection source data as an Array of Hashes
52
16
  #
53
- # @param source [String] the path to the CSV, JSON, or YAML source file
54
- # @return [Array] the collection data
55
- def ingest_file(source)
56
- raise Error::MissingSource, "Cannot find #{source}" unless File.exist? source
57
-
58
- data = case File.extname(source)
59
- when '.csv'
60
- WaxTasks::Utils.validate_csv(source)
61
- when '.json'
62
- WaxTasks::Utils.validate_json(source)
63
- when /\.ya?ml/
64
- WaxTasks::Utils.validate_yaml(source)
65
- else
66
- raise Error::InvalidSource, "Can't load #{File.extname(source)} files. Culprit: #{source}"
67
- end
68
-
69
- WaxTasks::Utils.assert_pids(data)
70
- WaxTasks::Utils.assert_unique(data)
71
- end
72
-
73
- # @return [Nil]
74
- def overwrite_metadata
75
- src = self.metadata_source_path
76
- puts "Writing image derivative info #{src}.".cyan
77
- case File.extname(src)
78
- when '.csv'
79
- keys = @metadata.map(&:keys).inject(&:|)
80
- csv_string = keys.to_csv
81
- @metadata.each { |h| csv_string += h.values_at(*keys).to_csv }
82
- File.open(src, 'w') { |f| f.write(csv_string) }
83
- when '.json'
84
- File.open(src, 'w') { |f| f.write(JSON.pretty_generate(@metadata)) }
85
- when /\.ya?ml/
86
- File.open(src, 'w') { |f| f.write(@metadata.to_yaml) }
87
- else
88
- raise Error::InvalidSource
89
- end
17
+ def initialize(name, config, source, collections_dir, ext)
18
+ @name = name
19
+ @config = config
20
+ @page_extension = ext
21
+ @site_source = source
22
+ @page_source = Utils.safe_join source, collections_dir, "_#{name}"
23
+ @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source')
24
+ @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source')
25
+ @search_fields = %w[pid label thumbnail permalink collection]
26
+ @image_variants = image_variants
90
27
  end
91
28
  end
92
29
  end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_magick'
4
+ require 'progress_bar'
5
+ require 'progress_bar/core_ext/enumerable_with_progress'
6
+ require 'wax_iiif'
7
+
8
+ #
9
+ module WaxTasks
10
+ #
11
+ class Collection
12
+ #
13
+ module Images
14
+ #
15
+ #
16
+ def image_variants
17
+ default_variants = { 'thumbnail' => 250, 'full' => 1140 }
18
+ custom_variants = @config.dig('images', 'variants') || {}
19
+ default_variants.merge custom_variants
20
+ end
21
+
22
+ #
23
+ #
24
+ def items_from_imagedata
25
+ raise Error::MissingSource, "Cannot find image data source '#{@imagedata_source}'" unless Dir.exist? @imagedata_source
26
+
27
+ pre_process_pdfs
28
+ records = records_from_metadata
29
+ Dir.glob(Utils.safe_join(@imagedata_source, '*')).map do |path|
30
+ item = WaxTasks::Item.new(path, @image_variants)
31
+ next unless item.valid?
32
+
33
+ item.record = records.find { |r| r.pid == item.pid }
34
+ item.iiif_config = @config.dig 'images', 'iiif'
35
+ warn Rainbow("\nWarning:\nCould not find record in #{@metadata_source} for image item #{path}.\n").orange if item.record.nil?
36
+ item
37
+ end.compact
38
+ end
39
+
40
+ #
41
+ #
42
+ def pre_process_pdfs
43
+ Dir.glob(Utils.safe_join(@imagedata_source, '*.pdf')).each do |path|
44
+ target_dir = path.gsub '.pdf', ''
45
+ next unless Dir.glob("#{target_dir}/*").empty?
46
+
47
+ puts Rainbow("\nPreprocessing #{path} into image files. This may take a minute.\n").cyan
48
+ opts = { output_dir: File.dirname(target_dir) }
49
+ WaxIiif::Utilities::PdfSplitter.split(path, opts)
50
+ end
51
+ end
52
+
53
+ #
54
+ #
55
+ def write_simple_derivatives(dir)
56
+ puts Rainbow("Generating simple image derivatives for collection '#{@name}'\nThis might take awhile.").cyan
57
+ items_from_imagedata.each_with_progress.map do |item|
58
+ item.simple_derivatives.each do |d|
59
+ path = "#{dir}/#{d.path}"
60
+ FileUtils.mkdir_p File.dirname(path)
61
+ next if File.exist? path
62
+
63
+ d.img.write path
64
+ item.record.set d.label, path if item.record?
65
+ end
66
+ item
67
+ end.flat_map(&:record)
68
+ end
69
+
70
+ #
71
+ #
72
+ def iiif_builder(dir)
73
+ build_opts = {
74
+ base_url: "{{ '/' | absolute_url }}#{dir}",
75
+ output_dir: dir,
76
+ # variants: @image_variants,
77
+ collection_label: @name
78
+ }
79
+ WaxIiif::Builder.new(build_opts)
80
+ end
81
+
82
+ #
83
+ #
84
+ def add_font_matter_to_json_files(dir)
85
+ Dir.glob("#{dir}/**/*.json").each do |f|
86
+ Utils.add_yaml_front_matter_to_file f
87
+ end
88
+ end
89
+
90
+ #
91
+ #
92
+ def add_iiif_results_to_records(records, manifests)
93
+ records.map do |record|
94
+ next nil if record.nil?
95
+
96
+ manifest = manifests.find { |m| m.base_id == record.pid }
97
+ next record if manifest.nil?
98
+
99
+ json = JSON.parse manifest.to_json
100
+ @image_variants.each do |k, _v|
101
+ value = json.dig k
102
+ record.set k, "/#{Utils.content_clean(value)}" unless value.nil?
103
+ end
104
+
105
+ record.set 'manifest', "/#{Utils.content_clean(manifest.id)}"
106
+ record
107
+ end.compact
108
+ end
109
+
110
+ #
111
+ #
112
+ def write_iiif_derivatives(dir)
113
+ items = items_from_imagedata
114
+ iiif_data = items.map(&:iiif_image_records).flatten
115
+ builder = iiif_builder(dir)
116
+
117
+ builder.load iiif_data
118
+
119
+ puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan
120
+ builder.process_data
121
+
122
+ add_font_matter_to_json_files dir
123
+ add_iiif_results_to_records items.map(&:record), builder.manifests
124
+ end
125
+ end
126
+ end
127
+ end