wax_tasks 1.0.0.pre.beta → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +35 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.md +17 -0
  4. data/.gitignore +7 -0
  5. data/.rspec +3 -0
  6. data/.rubocop.yml +16 -0
  7. data/.travis.yml +27 -0
  8. data/CHANGELOG.md +42 -0
  9. data/CODE_OF_CONDUCT.md +74 -0
  10. data/Gemfile +5 -2
  11. data/LICENSE +21 -0
  12. data/README.md +171 -0
  13. data/lib/tasks/clobber.rake +16 -0
  14. data/lib/tasks/derivatives_iiif.rake +9 -4
  15. data/lib/tasks/derivatives_simple.rake +8 -4
  16. data/lib/tasks/pages.rake +23 -0
  17. data/lib/tasks/search.rake +23 -0
  18. data/lib/wax_tasks.rb +30 -36
  19. data/lib/wax_tasks/asset.rb +57 -0
  20. data/lib/wax_tasks/collection.rb +42 -73
  21. data/lib/wax_tasks/collection/images.rb +126 -0
  22. data/lib/wax_tasks/collection/metadata.rb +101 -0
  23. data/lib/wax_tasks/config.rb +79 -0
  24. data/lib/wax_tasks/error.rb +17 -31
  25. data/lib/wax_tasks/index.rb +45 -0
  26. data/lib/wax_tasks/item.rb +116 -0
  27. data/lib/wax_tasks/record.rb +69 -0
  28. data/lib/wax_tasks/site.rb +86 -0
  29. data/lib/wax_tasks/utils.rb +58 -107
  30. data/lib/wax_tasks/version.rb +5 -0
  31. data/spec/setup.rb +1 -1
  32. data/spec/spec_helper.rb +14 -9
  33. data/wax_tasks.gemspec +33 -0
  34. metadata +52 -44
  35. data/lib/tasks/jspackage.rake +0 -17
  36. data/lib/tasks/lunr.rake +0 -9
  37. data/lib/tasks/pagemaster.rake +0 -11
  38. data/lib/tasks/push.rake +0 -12
  39. data/lib/tasks/test.rake +0 -18
  40. data/lib/wax_tasks/branch.rb +0 -70
  41. data/lib/wax_tasks/iiif/derivatives.rb +0 -86
  42. data/lib/wax_tasks/iiif/manifest.rb +0 -26
  43. data/lib/wax_tasks/image_collection.rb +0 -137
  44. data/lib/wax_tasks/local_branch.rb +0 -21
  45. data/lib/wax_tasks/lunr/index.rb +0 -82
  46. data/lib/wax_tasks/lunr/page_set.rb +0 -57
  47. data/lib/wax_tasks/pagemaster_collection.rb +0 -60
  48. data/lib/wax_tasks/task_runner.rb +0 -148
  49. data/lib/wax_tasks/travis_branch.rb +0 -28
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'destroy wax-generated collection files, including pages, derivatives, and search index(es)'
7
+ task :clobber do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:clobber'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.clobber a }
15
+ end
16
+ end
@@ -1,13 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'wax_tasks'
2
4
 
3
5
  namespace :wax do
4
6
  namespace :derivatives do
5
7
  desc 'generate iiif derivatives from local image files'
6
8
  task :iiif do
7
- arguments = ARGV.drop(1).each { |a| task a.to_sym }
8
- raise WaxTasks::Error::MissingArguments, "You must specify a collection after 'wax:derivatives:iiif'" if arguments.empty?
9
- task_runner = WaxTasks::TaskRunner.new
10
- task_runner.derivatives_iiif(arguments)
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+
12
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:iiif'").magenta if args.empty?
13
+
14
+ site = WaxTasks::Site.new
15
+ args.each { |a| site.generate_derivatives(a, 'iiif') }
11
16
  end
12
17
  end
13
18
 
@@ -1,13 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'wax_tasks'
2
4
 
3
5
  namespace :wax do
4
6
  namespace :derivatives do
5
7
  desc 'generate iiif derivatives from local image files'
6
8
  task :simple do
7
- arguments = ARGV.drop(1).each { |a| task a.to_sym }
8
- raise WaxTasks::Error::MissingArguments, "You must specify a collection after 'wax:derivatives:simple'" if arguments.empty?
9
- task_runner = WaxTasks::TaskRunner.new
10
- task_runner.derivatives_simple(arguments)
9
+ args = ARGV.drop(1).each { |a| task a.to_sym }
10
+ args.reject! { |a| a.start_with? '-' }
11
+ raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:derivatives:simple'").magenta if args.empty?
12
+
13
+ site = WaxTasks::Site.new
14
+ args.each { |a| site.generate_derivatives(a, 'simple') }
11
15
  end
12
16
  end
13
17
  end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'generate collection md pages from yaml or csv data source'
7
+ task :pages do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:pages').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_pages a }
14
+ end
15
+
16
+ # alias :pagemaster to wax:pages for backwards compatibility
17
+ task :pagemaster do
18
+ t = Rake::Task['wax:pages']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wax_tasks'
4
+
5
+ namespace :wax do
6
+ desc 'build lunr search index (with default UI if UI=true)'
7
+ task :search do
8
+ args = ARGV.drop(1).each { |a| task a.to_sym }
9
+ args.reject! { |a| a.start_with? '-' }
10
+ raise WaxTasks::Error::MissingArguments, Rainbow('You must specify a collection after wax:search').magenta if args.empty?
11
+
12
+ site = WaxTasks::Site.new
13
+ args.each { |a| site.generate_static_search a }
14
+ end
15
+
16
+ # alias lunr to search for backwards compatibility
17
+ task :lunr do
18
+ t = Rake::Task['wax:search']
19
+ desc t.full_comment if t.full_comment
20
+ args = ARGV.drop(1).each { |a| task a.to_sym }
21
+ t.invoke(*args)
22
+ end
23
+ end
@@ -1,43 +1,37 @@
1
- require_relative 'wax_tasks/branch'
1
+ # frozen_string_literal: true
2
+
3
+ # rubygems
4
+ require 'rubygems'
5
+
6
+ # stdlib
7
+ require 'csv'
8
+ require 'fileutils'
9
+ require 'json'
10
+
11
+ # 3rd party
12
+ require 'rainbow'
13
+ require 'safe_yaml'
14
+
15
+ # relative
16
+ require_relative 'wax_tasks/asset'
2
17
  require_relative 'wax_tasks/collection'
18
+ require_relative 'wax_tasks/config'
3
19
  require_relative 'wax_tasks/error'
4
- require_relative 'wax_tasks/image_collection'
5
- require_relative 'wax_tasks/local_branch'
6
- require_relative 'wax_tasks/lunr/index'
7
- require_relative 'wax_tasks/pagemaster_collection'
8
- require_relative 'wax_tasks/task_runner'
9
- require_relative 'wax_tasks/travis_branch'
20
+ require_relative 'wax_tasks/index'
21
+ require_relative 'wax_tasks/item'
22
+ require_relative 'wax_tasks/record'
23
+ require_relative 'wax_tasks/site'
10
24
  require_relative 'wax_tasks/utils'
25
+ require_relative 'wax_tasks/version'
11
26
 
12
- # The WaxTasks module powers the Rake tasks in `./tasks`, including:
13
- #
14
- # wax:pagemaster :: generate collection md pages from csv, json, or yaml file
15
- # wax:lunr :: build lunr search index (with default UI if UI=true)
16
- # wax:derivatives:simple :: generate simple image derivatives from local image files
17
- # wax:derivatves:iiif :: generate iiif derivatives from local image files
18
- # wax:jspackage :: write a simple package.json for monitoring js dependencies
19
- # wax:push :: push compiled Jekyll site to git branch
20
- # wax:test :: run htmlproofer, rspec if .rspec file exists
21
27
  #
22
- # Tasks are run by a WaxTasks::TaskRunner object which is resposible
23
- # for reading in site config from `_config.yml`
24
28
  module WaxTasks
25
- # ----------
26
- # CONSTANTS
27
- # ----------
28
-
29
- # @return [String] The path to load Jekyll site config
30
- DEFAULT_CONFIG = '_config.yml'.freeze
31
-
32
- # @return [String] The path to write default LunrUI
33
- LUNR_UI_PATH = 'js/lunr-ui.js'.freeze
34
-
35
- # @return [String] The path to the compiled Jekyll site
36
- SITE_DIR = '_site'.freeze
37
-
38
- # @return [String] Default image variant/derivative widths to generate
39
- DEFAULT_IMAGE_VARIANTS = { thumbnail: 250, full: 1140 }.freeze
40
-
41
- # @return [String] The path where image derivatives should be generated
42
- DEFAULT_DERIVATIVE_DIR = 'img/derivatives'.freeze
29
+ DEFAULT_CONFIG_FILE = './_config.yml'
30
+ #
31
+ #
32
+ def self.config_from_file(file = nil)
33
+ Utils.validate_yaml(file || DEFAULT_CONFIG_FILE)
34
+ rescue StandardError => e
35
+ raise WaxTasks::Error::InvalidConfig, "Cannot open config file '#{file}'.\n #{e}"
36
+ end
43
37
  end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module WaxTasks
5
+ Derivative = Struct.new(:path, :label, :img)
6
+
7
+ #
8
+ class Asset
9
+ attr_reader :id, :path
10
+
11
+ def initialize(path, pid, variants)
12
+ @path = path
13
+ @pid = pid
14
+ @id = asset_id
15
+ @variants = variants
16
+ end
17
+
18
+ #
19
+ #
20
+ def asset_id
21
+ id = File.basename @path, '.*'
22
+ id.prepend "#{@pid}_" unless id == @pid
23
+ id
24
+ end
25
+
26
+ #
27
+ #
28
+ def simple_derivatives
29
+ @variants.map do |label, width|
30
+ img = MiniMagick::Image.open @path
31
+ if width > img.width
32
+ warn Rainbow("Tried to create derivative #{width}px wide, but asset #{@id} for item #{@pid} only has a width of #{img.width}px.").yellow
33
+ else
34
+ img.resize width
35
+ end
36
+
37
+ img.format 'jpg'
38
+ Derivative.new("#{@id}/#{label}.jpg", label, img)
39
+ end
40
+ end
41
+
42
+ #
43
+ #
44
+ def to_iiif_image_record(is_only, index, base_opts)
45
+ opts = base_opts.clone
46
+
47
+ opts[:is_primary] = index.zero?
48
+ opts[:section_label] = "Page #{index + 1}" unless is_only
49
+ opts[:path] = @path
50
+ opts[:manifest_id] = @pid
51
+ opts[:id] = @id
52
+ opts[:variants] = @variants
53
+
54
+ WaxIiif::ImageRecord.new(opts)
55
+ end
56
+ end
57
+ end
@@ -1,91 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'collection/images'
4
+ require_relative 'collection/metadata'
5
+
1
6
  module WaxTasks
2
- # Parent class representing a Jekyll collection
3
- # that cannot be created directly. Only child classes
4
- # (IiifCollection, LunrCollection, PagemasterCollection)
5
- # can be initialized.
7
+ #
6
8
  class Collection
7
- attr_accessor :name, :site
8
- private_class_method :new
9
+ attr_reader :name, :config, :ext, :search_fields,
10
+ :page_source, :metadata_source, :imagedata_source,
11
+ :iiif_derivative_source, :simple_derivative_source
9
12
 
10
- # This method ensures child classes can be instantiated though
11
- # Collection.new cannot be.
12
- def self.inherited(*)
13
- public_class_method :new
14
- end
13
+ include Collection::Metadata
14
+ include Collection::Images
15
15
 
16
- # Creates a new collection with name @name given site configuration @site
17
- #
18
- # @param name [String] name of the collection in site:collections
19
- # @param site [Hash] site config
20
- def initialize(name, site)
21
- @name = name
22
- @site = site
23
- @config = self.config
24
- end
16
+ IMAGE_DERIVATIVE_DIRECTORY = 'img/derivatives'
17
+ DEFAULT_VARIANTS = { 'thumbnail' => 250, 'fullwidth' => 1140 }.freeze
25
18
 
26
- # Finds the collection config within the site config
27
19
  #
28
- # @return [Hash] the config for the collection
29
- def config
30
- @site[:collections].fetch(@name)
31
- rescue StandardError => e
32
- raise Error::InvalidCollection, "Cannot load collection config for #{@name}.\n#{e}"
33
- end
34
-
35
- # Returns the target directory for generated collection pages
36
20
  #
37
- # @return [String] path
38
- def page_dir
39
- WaxTasks::Utils.root_path(@site[:source_dir], @site[:collections_dir], "_#{@name}")
21
+ def initialize(name, config, source, collections_dir, ext)
22
+ @name = name
23
+ @config = config
24
+ @page_extension = ext
25
+ @site_source = source
26
+ @page_source = Utils.safe_join source, collections_dir, "_#{@name}"
27
+ @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source')
28
+ @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source')
29
+ @iiif_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif'
30
+ @simple_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'simple'
31
+ @search_fields = %w[pid label thumbnail permalink collection]
32
+ @image_variants = image_variants
40
33
  end
41
34
 
42
- # Constructs the path to the data source file
43
35
  #
44
- # @return [String] the path to the data source file
45
- def metadata_source_path
46
- source = @config.dig('metadata', 'source')
47
- raise WaxTasks::Error::MissingSource, "Missing collection source in _config.yml for #{@name}" if source.nil?
48
- WaxTasks::Utils.root_path(@site[:source_dir], '_data', source)
36
+ #
37
+ def image_variants
38
+ vars = @config.dig('images', 'variants') || {}
39
+ DEFAULT_VARIANTS.merge vars
49
40
  end
50
41
 
51
- # Ingests the collection source data as an Array of Hashes
52
42
  #
53
- # @param source [String] the path to the CSV, JSON, or YAML source file
54
- # @return [Array] the collection data
55
- def ingest_file(source)
56
- raise Error::MissingSource, "Cannot find #{source}" unless File.exist? source
57
-
58
- data = case File.extname(source)
59
- when '.csv'
60
- WaxTasks::Utils.validate_csv(source)
61
- when '.json'
62
- WaxTasks::Utils.validate_json(source)
63
- when /\.ya?ml/
64
- WaxTasks::Utils.validate_yaml(source)
65
- else
66
- raise Error::InvalidSource, "Can't load #{File.extname(source)} files. Culprit: #{source}"
67
- end
68
-
69
- WaxTasks::Utils.assert_pids(data)
70
- WaxTasks::Utils.assert_unique(data)
43
+ #
44
+ def clobber_pages
45
+ return unless Dir.exist? @page_source
46
+ puts Rainbow("Removing pages from #{@page_source}").cyan
47
+ FileUtils.remove_dir @page_source, true
71
48
  end
72
49
 
73
- # @return [Nil]
74
- def overwrite_metadata
75
- src = self.metadata_source_path
76
- puts "Writing image derivative info #{src}.".cyan
77
- case File.extname(src)
78
- when '.csv'
79
- keys = @metadata.map(&:keys).inject(&:|)
80
- csv_string = keys.to_csv
81
- @metadata.each { |h| csv_string += h.values_at(*keys).to_csv }
82
- File.open(src, 'w') { |f| f.write(csv_string) }
83
- when '.json'
84
- File.open(src, 'w') { |f| f.write(JSON.pretty_generate(@metadata)) }
85
- when /\.ya?ml/
86
- File.open(src, 'w') { |f| f.write(@metadata.to_yaml) }
87
- else
88
- raise Error::InvalidSource
50
+ #
51
+ #
52
+ def clobber_derivatives
53
+ [@iiif_derivative_source, @simple_derivative_source].each do |dir|
54
+ if Dir.exist? dir
55
+ puts Rainbow("Removing derivatives from #{dir}").cyan
56
+ FileUtils.remove_dir dir, true
57
+ end
89
58
  end
90
59
  end
91
60
  end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mini_magick'
4
+ require 'progress_bar'
5
+ require 'wax_iiif'
6
+
7
+ #
8
+ module WaxTasks
9
+ #
10
+ class Collection
11
+ #
12
+ module Images
13
+ #
14
+ #
15
+ def items_from_imagedata
16
+ raise Error::MissingSource, "Cannot find image data source '#{@imagedata_source}'" unless Dir.exist? @imagedata_source
17
+
18
+ pre_process_pdfs
19
+ records = records_from_metadata
20
+ Dir.glob(Utils.safe_join(@imagedata_source, '*')).map do |path|
21
+ item = WaxTasks::Item.new(path, @image_variants)
22
+ next if item.type == '.pdf'
23
+ next puts Rainbow("Skipping #{path} because type #{item.type} is not an accepted format").yellow unless item.valid?
24
+
25
+ item.record = records.find { |r| r.pid == item.pid }
26
+ item.iiif_config = @config.dig 'images', 'iiif'
27
+ warn Rainbow("\nCould not find record in #{@metadata_source} for image item #{path}.\n").orange if item.record.nil?
28
+ item
29
+ end.compact
30
+ end
31
+
32
+ #
33
+ #
34
+ def pre_process_pdfs
35
+ Dir.glob(Utils.safe_join(@imagedata_source, '*.pdf')).each do |path|
36
+ target_dir = path.gsub '.pdf', ''
37
+ next unless Dir.glob("#{target_dir}/*").empty?
38
+
39
+ puts Rainbow("\nPreprocessing #{path} into image files. This may take a minute.\n").cyan
40
+
41
+ opts = { output_dir: File.dirname(target_dir) }
42
+ WaxIiif::Utilities::PdfSplitter.split(path, opts)
43
+ end
44
+ end
45
+
46
+ #
47
+ #
48
+ def write_simple_derivatives
49
+ puts Rainbow("Generating simple image derivatives for collection '#{@name}'\nThis might take awhile.").cyan
50
+
51
+ bar = ProgressBar.new(items_from_imagedata.length)
52
+ bar.write
53
+ items_from_imagedata.map do |item|
54
+ item.simple_derivatives.each do |d|
55
+ path = "#{@simple_derivative_source}/#{d.path}"
56
+ FileUtils.mkdir_p File.dirname(path)
57
+ next if File.exist? path
58
+
59
+ d.img.write path
60
+ item.record.set d.label, path if item.record?
61
+ end
62
+ bar.increment!
63
+ bar.write
64
+ item
65
+ end.flat_map(&:record).compact
66
+ end
67
+
68
+ #
69
+ #
70
+ def iiif_builder(dir)
71
+ build_opts = {
72
+ base_url: "{{ '/' | absolute_url }}#{dir}",
73
+ output_dir: dir,
74
+ collection_label: @name,
75
+ variants: @image_variants.dup.tap { |h| h.delete 'full' }
76
+ }
77
+ WaxIiif::Builder.new build_opts
78
+ end
79
+
80
+ #
81
+ #
82
+ def add_font_matter_to_json_files(dir)
83
+ Dir.glob("#{dir}/**/*.json").each do |f|
84
+ Utils.add_yaml_front_matter_to_file f
85
+ end
86
+ end
87
+
88
+ #
89
+ #
90
+ def add_iiif_results_to_records(records, manifests)
91
+ records.map do |record|
92
+ next nil if record.nil?
93
+
94
+ manifest = manifests.find { |m| m.base_id == record.pid }
95
+ next record if manifest.nil?
96
+
97
+ json = JSON.parse manifest.to_json
98
+ @image_variants.each do |k, _v|
99
+ value = json.fetch k, ''
100
+ record.set k, "/#{Utils.content_clean(value)}" unless value.empty?
101
+ end
102
+
103
+ record.set 'manifest', "/#{Utils.content_clean(manifest.id)}"
104
+ record
105
+ end.compact
106
+ end
107
+
108
+ #
109
+ #
110
+ def write_iiif_derivatives
111
+ items = items_from_imagedata
112
+ iiif_data = items.map(&:iiif_image_records).flatten
113
+ builder = iiif_builder @iiif_derivative_source
114
+
115
+ builder.load iiif_data
116
+
117
+ puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan
118
+ builder.process_data
119
+ records = items.map(&:record).compact
120
+
121
+ add_font_matter_to_json_files @iiif_derivative_source
122
+ add_iiif_results_to_records records, builder.manifests
123
+ end
124
+ end
125
+ end
126
+ end