maintenance_tasks 1.8.2 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b11e5edbefa677caf704bd6af59a14185ab89ff29da35d966253b78143a76af1
4
- data.tar.gz: a103be6c53d5d6dae55d63f451d293abe1c0a4e763cb1c2bd9e3dcfea3a6bc72
3
+ metadata.gz: 0a050fbc2d9dbc3eab88b027ee68163f09cae1472df9e236a7151f5fa1f4464b
4
+ data.tar.gz: c6cf0f55b868b7810c7361669d4383d0fd159dc337b270396e3123a795e75d24
5
5
  SHA512:
6
- metadata.gz: 3415d87b545e09fc65494cecb03a7cea9a5c84838c66c209a129f8017efc611eee7521852c2460ada50c389b2885bb95dcd2bfe807ea8a84693e9b86e2123409
7
- data.tar.gz: 19fce32dfc506afe512ce6ebc2ddfd0a2fd195b61e4611c4a3febae5ad4bfe09d39ac47e902a1c21f830de9e3c5f953379d8011919ba0a381dc6010599eb289e
6
+ metadata.gz: 903da6a69576ffcf022a09248fd7945ec816150505d36c65d96f70cfeec969cb3315e820317c5bff8c882d4e0c09ad33758f9221b2d4d2cf08292a0415219c3a
7
+ data.tar.gz: 811d79fa5d58252649faea6ed0bc82b6f8fa471d77ae02535cf73fb873a9e663deb893fc81d2528863f68194123db7f7dc55c1a9dc299143da9586fccb4570a6
data/README.md CHANGED
@@ -118,6 +118,33 @@ The files uploaded to your Active Storage service provider will be renamed
118
118
  to include an ISO8601 timestamp and the Task name in snake case format.
119
119
  The CSV is expected to have a trailing newline at the end of the file.
120
120
 
121
+ #### Batch CSV Tasks
122
+
123
+ Tasks can process CSVs in batches. Add the `in_batches` option to your task's
124
+ `csv_collection` macro:
125
+
126
+ ```ruby
127
+ # app/tasks/maintenance/batch_import_posts_task.rb
128
+
129
+ module Maintenance
130
+ class BatchImportPostsTask < MaintenanceTasks::Task
131
+ csv_collection(in_batches: 50)
132
+
133
+ def process(batch_of_rows)
134
+ Post.insert_all(post_rows.map(&:to_h))
135
+ end
136
+ end
137
+ end
138
+ ```
139
+
140
+ As with a regular CSV task, ensure you've implemented the following method:
141
+
142
+ * `process`: do the work of your Task on a batch (array of `CSV::Row` objects).
143
+
144
+ Note that `#count` is calculated automatically based on the number of batches in
145
+ your collection, and your Task's progress will be displayed in terms of batches
146
+ (not the total number of rows in your CSV).
147
+
121
148
  ### Processing Batch Collections
122
149
 
123
150
  The Maintenance Tasks gem supports processing Active Records in batches. This
@@ -57,6 +57,11 @@ module MaintenanceTasks
57
57
  )
58
58
  when Array
59
59
  enumerator_builder.build_array_enumerator(collection, cursor: cursor)
60
+ when BatchCsvCollectionBuilder::BatchCsv
61
+ JobIteration::CsvEnumerator.new(collection.csv).batches(
62
+ batch_size: collection.batch_size,
63
+ cursor: cursor,
64
+ )
60
65
  when CSV
61
66
  JobIteration::CsvEnumerator.new(collection).rows(cursor: cursor)
62
67
  else
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module MaintenanceTasks
6
+ # Strategy for building a Task that processes CSV files in batches.
7
+ #
8
+ # @api private
9
+ class BatchCsvCollectionBuilder < CsvCollectionBuilder
10
+ BatchCsv = Struct.new(:csv, :batch_size, keyword_init: true)
11
+
12
+ # Initialize a BatchCsvCollectionBuilder with a batch size.
13
+ #
14
+ # @param batch_size [Integer] the number of CSV rows in a batch.
15
+ def initialize(batch_size)
16
+ @batch_size = batch_size
17
+ super()
18
+ end
19
+
20
+ # Defines the collection to be iterated over, based on the provided CSV.
21
+ # Includes the CSV and the batch size.
22
+ def collection(task)
23
+ BatchCsv.new(
24
+ csv: CSV.new(task.csv_content, headers: true),
25
+ batch_size: @batch_size
26
+ )
27
+ end
28
+
29
+ # The number of batches to be processed. Excludes the header row from the
30
+ # count and assumes a trailing newline is at the end of the CSV file.
31
+ # Note that this number is an approximation based on the number of
32
+ # newlines.
33
+ #
34
+ # @return [Integer] the approximate number of batches to process.
35
+ def count(task)
36
+ (task.csv_content.count("\n") + @batch_size - 1) / @batch_size
37
+ end
38
+ end
39
+ end
@@ -53,16 +53,23 @@ module MaintenanceTasks
53
53
 
54
54
  # Make this Task a task that handles CSV.
55
55
  #
56
+ # @param in_batches [Integer] optionally, supply a batch size if the CSV
57
+ # should be processed in batches.
58
+ #
56
59
  # An input to upload a CSV will be added in the form to start a Run. The
57
60
  # collection and count method are implemented.
58
- def csv_collection
61
+ def csv_collection(in_batches: nil)
59
62
  unless defined?(ActiveStorage)
60
63
  raise NotImplementedError, "Active Storage needs to be installed\n"\
61
64
  "To resolve this issue run: bin/rails active_storage:install"
62
65
  end
63
66
 
64
- self.collection_builder_strategy =
65
- MaintenanceTasks::CsvCollectionBuilder.new
67
+ if in_batches
68
+ self.collection_builder_strategy =
69
+ BatchCsvCollectionBuilder.new(in_batches)
70
+ else
71
+ self.collection_builder_strategy = CsvCollectionBuilder.new
72
+ end
66
73
  end
67
74
 
68
75
  # Make this a Task that calls #process once, instead of iterating over
@@ -3,11 +3,11 @@
3
3
  <h6 class="title is-6">Arguments:</h6>
4
4
  <table class="table">
5
5
  <tbody>
6
- <% run.arguments.each do |key, value| %>
6
+ <% run.arguments.transform_values(&:to_s).each do |key, value| %>
7
7
  <tr>
8
8
  <td class="is-family-monospace"><%= key %></td>
9
9
  <td>
10
- <% next if value.nil? || value.empty? %>
10
+ <% next if value.empty? %>
11
11
  <% if value.include?("\n") %>
12
12
  <pre><%= value %><pre>
13
13
  <% else %>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maintenance_tasks
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.2
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify Engineering
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-10 00:00:00.000000000 Z
11
+ date: 2022-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionpack
@@ -97,6 +97,7 @@ files:
97
97
  - app/jobs/concerns/maintenance_tasks/task_job_concern.rb
98
98
  - app/jobs/maintenance_tasks/task_job.rb
99
99
  - app/models/maintenance_tasks/application_record.rb
100
+ - app/models/maintenance_tasks/batch_csv_collection_builder.rb
100
101
  - app/models/maintenance_tasks/csv_collection_builder.rb
101
102
  - app/models/maintenance_tasks/no_collection_builder.rb
102
103
  - app/models/maintenance_tasks/null_collection_builder.rb
@@ -150,7 +151,7 @@ homepage: https://github.com/Shopify/maintenance_tasks
150
151
  licenses:
151
152
  - MIT
152
153
  metadata:
153
- source_code_uri: https://github.com/Shopify/maintenance_tasks/tree/v1.8.2
154
+ source_code_uri: https://github.com/Shopify/maintenance_tasks/tree/v1.9.0
154
155
  allowed_push_host: https://rubygems.org
155
156
  post_install_message:
156
157
  rdoc_options: []