sidekiq-iteration 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +265 -0
- data/guides/best-practices.md +71 -0
- data/guides/custom-enumerator.md +98 -0
- data/guides/iteration-how-it-works.md +71 -0
- data/guides/throttling.md +42 -0
- data/lib/sidekiq-iteration.rb +3 -0
- data/lib/sidekiq_iteration/active_record_batch_enumerator.rb +127 -0
- data/lib/sidekiq_iteration/active_record_cursor.rb +89 -0
- data/lib/sidekiq_iteration/active_record_enumerator.rb +69 -0
- data/lib/sidekiq_iteration/csv_enumerator.rb +85 -0
- data/lib/sidekiq_iteration/enumerators.rb +187 -0
- data/lib/sidekiq_iteration/iteration.rb +267 -0
- data/lib/sidekiq_iteration/job_retry_patch.rb +30 -0
- data/lib/sidekiq_iteration/nested_enumerator.rb +39 -0
- data/lib/sidekiq_iteration/throttling.rb +45 -0
- data/lib/sidekiq_iteration/version.rb +5 -0
- data/lib/sidekiq_iteration.rb +40 -0
- metadata +80 -0
@@ -0,0 +1,127 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SidekiqIteration
|
4
|
+
# Batch Enumerator based on ActiveRecord Relation.
|
5
|
+
# @private
|
6
|
+
class ActiveRecordBatchEnumerator
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
|
11
|
+
def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
|
12
|
+
@primary_key = "#{relation.table_name}.#{relation.primary_key}"
|
13
|
+
@columns = Array(columns&.map(&:to_s) || @primary_key)
|
14
|
+
@primary_key_index = @columns.index(@primary_key) || @columns.index(relation.primary_key)
|
15
|
+
@pluck_columns = if @primary_key_index
|
16
|
+
@columns
|
17
|
+
else
|
18
|
+
@columns + [@primary_key]
|
19
|
+
end
|
20
|
+
@batch_size = batch_size
|
21
|
+
@cursor = Array.wrap(cursor)
|
22
|
+
@initial_cursor = @cursor
|
23
|
+
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
24
|
+
if relation.joins_values.present? && !@columns.all?(/\./)
|
25
|
+
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
26
|
+
end
|
27
|
+
|
28
|
+
if relation.arel.orders.present? || relation.arel.taken.present?
|
29
|
+
raise ArgumentError,
|
30
|
+
"The relation cannot use ORDER BY or LIMIT due to the way how iteration with a cursor is designed. " \
|
31
|
+
"You can use other ways to limit the number of rows, e.g. a WHERE condition on the primary key column."
|
32
|
+
end
|
33
|
+
|
34
|
+
@base_relation = relation.reorder(@columns.join(", "))
|
35
|
+
end
|
36
|
+
|
37
|
+
def each
|
38
|
+
return to_enum { size } unless block_given?
|
39
|
+
|
40
|
+
while (relation = next_batch)
|
41
|
+
yield relation, cursor_value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def size
|
46
|
+
(@base_relation.count(:all) + @batch_size - 1) / @batch_size # ceiling division
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def next_batch
|
51
|
+
relation = @base_relation.limit(@batch_size)
|
52
|
+
if conditions.any?
|
53
|
+
relation = relation.where(*conditions)
|
54
|
+
end
|
55
|
+
|
56
|
+
cursor_values, ids = relation.uncached do
|
57
|
+
pluck_columns(relation)
|
58
|
+
end
|
59
|
+
|
60
|
+
cursor = cursor_values.last
|
61
|
+
unless cursor.present?
|
62
|
+
@cursor = @initial_cursor
|
63
|
+
return
|
64
|
+
end
|
65
|
+
# The primary key was plucked, but original cursor did not include it, so we should remove it
|
66
|
+
cursor.pop unless @primary_key_index
|
67
|
+
@cursor = Array.wrap(cursor)
|
68
|
+
|
69
|
+
# Yields relations by selecting the primary keys of records in the batch.
|
70
|
+
# Post.where(published: nil) results in an enumerator of relations like:
|
71
|
+
# Post.where(published: nil, ids: batch_of_ids)
|
72
|
+
@base_relation.where(@primary_key => ids)
|
73
|
+
end
|
74
|
+
|
75
|
+
def pluck_columns(relation)
|
76
|
+
if @pluck_columns.size == 1 # only the primary key
|
77
|
+
column_values = relation.pluck(*@pluck_columns)
|
78
|
+
return [column_values, column_values]
|
79
|
+
end
|
80
|
+
|
81
|
+
column_values = relation.pluck(*@pluck_columns)
|
82
|
+
primary_key_index = @primary_key_index || -1
|
83
|
+
primary_key_values = column_values.map { |values| values[primary_key_index] }
|
84
|
+
|
85
|
+
serialize_column_values!(column_values)
|
86
|
+
[column_values, primary_key_values]
|
87
|
+
end
|
88
|
+
|
89
|
+
def cursor_value
|
90
|
+
if @cursor.size == 1
|
91
|
+
@cursor.first
|
92
|
+
else
|
93
|
+
@cursor
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def conditions
|
98
|
+
column_index = @cursor.size - 1
|
99
|
+
column = @columns[column_index]
|
100
|
+
where_clause = if @columns.size == @cursor.size
|
101
|
+
"#{column} > ?"
|
102
|
+
else
|
103
|
+
"#{column} >= ?"
|
104
|
+
end
|
105
|
+
while column_index > 0
|
106
|
+
column_index -= 1
|
107
|
+
column = @columns[column_index]
|
108
|
+
where_clause = "#{column} > ? OR (#{column} = ? AND (#{where_clause}))"
|
109
|
+
end
|
110
|
+
ret = @cursor.reduce([where_clause]) { |params, value| params << value << value }
|
111
|
+
ret.pop
|
112
|
+
ret
|
113
|
+
end
|
114
|
+
|
115
|
+
def serialize_column_values!(column_values)
|
116
|
+
column_values.map! { |values| values.map! { |value| column_value(value) } }
|
117
|
+
end
|
118
|
+
|
119
|
+
def column_value(value)
|
120
|
+
if value.is_a?(Time)
|
121
|
+
value.strftime(SQL_DATETIME_WITH_NSEC)
|
122
|
+
else
|
123
|
+
value
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SidekiqIteration
|
4
|
+
# @private
|
5
|
+
class ActiveRecordCursor
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
attr_reader :position, :reached_end
|
9
|
+
|
10
|
+
def initialize(relation, columns = nil, position = nil)
|
11
|
+
columns ||= "#{relation.table_name}.#{relation.primary_key}"
|
12
|
+
@columns = Array.wrap(columns)
|
13
|
+
raise ArgumentError, "Must specify at least one column" if @columns.empty?
|
14
|
+
|
15
|
+
self.position = Array.wrap(position)
|
16
|
+
if relation.joins_values.present? && !@columns.all?(/\./)
|
17
|
+
raise ArgumentError, "You need to specify fully-qualified columns if you join a table"
|
18
|
+
end
|
19
|
+
|
20
|
+
if relation.arel.orders.present? || relation.arel.taken.present?
|
21
|
+
raise ArgumentError,
|
22
|
+
"The relation cannot use ORDER BY or LIMIT due to the way how iteration with a cursor is designed. " \
|
23
|
+
"You can use other ways to limit the number of rows, e.g. a WHERE condition on the primary key column."
|
24
|
+
end
|
25
|
+
|
26
|
+
@base_relation = relation.reorder(@columns.join(", "))
|
27
|
+
@reached_end = false
|
28
|
+
end
|
29
|
+
|
30
|
+
def <=>(other)
|
31
|
+
if reached_end == other.reached_end
|
32
|
+
position <=> other.position
|
33
|
+
else
|
34
|
+
reached_end ? 1 : -1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def position=(position)
|
39
|
+
raise ArgumentError, "Cursor position cannot contain nil values" if position.any?(&:nil?)
|
40
|
+
|
41
|
+
@position = position
|
42
|
+
end
|
43
|
+
|
44
|
+
def next_batch(batch_size)
|
45
|
+
return if @reached_end
|
46
|
+
|
47
|
+
relation = @base_relation.limit(batch_size)
|
48
|
+
|
49
|
+
if (conditions = self.conditions).any?
|
50
|
+
relation = relation.where(*conditions)
|
51
|
+
end
|
52
|
+
|
53
|
+
records = relation.uncached do
|
54
|
+
relation.to_a
|
55
|
+
end
|
56
|
+
|
57
|
+
update_from_record(records.last) if records.any?
|
58
|
+
@reached_end = records.size < batch_size
|
59
|
+
|
60
|
+
records if records.any?
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def conditions
|
65
|
+
i = @position.size - 1
|
66
|
+
column = @columns[i]
|
67
|
+
conditions = if @columns.size == @position.size
|
68
|
+
"#{column} > ?"
|
69
|
+
else
|
70
|
+
"#{column} >= ?"
|
71
|
+
end
|
72
|
+
while i > 0
|
73
|
+
i -= 1
|
74
|
+
column = @columns[i]
|
75
|
+
conditions = "#{column} > ? OR (#{column} = ? AND (#{conditions}))"
|
76
|
+
end
|
77
|
+
ret = @position.reduce([conditions]) { |params, value| params << value << value }
|
78
|
+
ret.pop
|
79
|
+
ret
|
80
|
+
end
|
81
|
+
|
82
|
+
def update_from_record(record)
|
83
|
+
self.position = @columns.map do |column|
|
84
|
+
method = column.to_s.split(".").last
|
85
|
+
record.send(method)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "active_record_cursor"
|
4
|
+
|
5
|
+
module SidekiqIteration
|
6
|
+
# Builds Enumerator based on ActiveRecord Relation. Supports enumerating on rows and batches.
|
7
|
+
# @private
|
8
|
+
class ActiveRecordEnumerator
|
9
|
+
SQL_DATETIME_WITH_NSEC = "%Y-%m-%d %H:%M:%S.%N"
|
10
|
+
|
11
|
+
def initialize(relation, columns: nil, batch_size: 100, cursor: nil)
|
12
|
+
unless relation.is_a?(ActiveRecord::Relation)
|
13
|
+
raise ArgumentError, "relation must be an ActiveRecord::Relation"
|
14
|
+
end
|
15
|
+
|
16
|
+
@relation = relation
|
17
|
+
@batch_size = batch_size
|
18
|
+
@columns = Array(columns || "#{relation.table_name}.#{relation.primary_key}")
|
19
|
+
@cursor = cursor
|
20
|
+
end
|
21
|
+
|
22
|
+
def records
|
23
|
+
Enumerator.new(-> { size }) do |yielder|
|
24
|
+
batches.each do |batch, _|
|
25
|
+
batch.each do |record|
|
26
|
+
yielder.yield(record, cursor_value(record))
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def batches
|
33
|
+
cursor = ActiveRecordCursor.new(@relation, @columns, @cursor)
|
34
|
+
Enumerator.new(-> { size }) do |yielder|
|
35
|
+
while (records = cursor.next_batch(@batch_size))
|
36
|
+
yielder.yield(records, cursor_value(records.last)) if records.any?
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def size
|
42
|
+
@relation.count(:all)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def cursor_value(record)
|
47
|
+
positions = @columns.map do |column|
|
48
|
+
attribute_name = column.to_s.split(".").last
|
49
|
+
column_value(record, attribute_name)
|
50
|
+
end
|
51
|
+
|
52
|
+
if positions.size == 1
|
53
|
+
positions.first
|
54
|
+
else
|
55
|
+
positions
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def column_value(record, attribute)
|
60
|
+
value = record.read_attribute(attribute.to_sym)
|
61
|
+
case record.class.columns_hash.fetch(attribute).type
|
62
|
+
when :datetime
|
63
|
+
value.strftime(SQL_DATETIME_WITH_NSEC)
|
64
|
+
else
|
65
|
+
value
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SidekiqIteration
|
4
|
+
# @private
|
5
|
+
# CsvEnumerator makes it possible to write an Iteration job
|
6
|
+
# that uses CSV file as a collection to iterate.
|
7
|
+
#
|
8
|
+
# @example Enumerator to iterate on rows
|
9
|
+
# def build_enumerator(cursor:)
|
10
|
+
# csv = CSV.open('tmp/files', { converters: :integer, headers: true })
|
11
|
+
# csv_enumerator(csv, cursor: cursor)
|
12
|
+
# end
|
13
|
+
#
|
14
|
+
# def each_iteration(row)
|
15
|
+
# ...
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# @example Enumerator to iterate on batches of rows
|
19
|
+
# def build_enumerator(cursor:)
|
20
|
+
# csv = CSV.open('tmp/files', { converters: :integer, headers: true })
|
21
|
+
# csv_batches_enumerator(csv, cursor: cursor)
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# def each_iteration(row)
|
25
|
+
# ...
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
class CsvEnumerator
|
29
|
+
# Constructs CsvEnumerator instance based on a CSV file.
|
30
|
+
#
|
31
|
+
# @param [CSV] csv An instance of CSV object
|
32
|
+
# @return [SidekiqIteration::CsvEnumerator]
|
33
|
+
#
|
34
|
+
# @example
|
35
|
+
# csv = CSV.open('tmp/files', { converters: :integer, headers: true })
|
36
|
+
# SidekiqIteration::CsvEnumerator.new(csv).rows(cursor: cursor)
|
37
|
+
#
|
38
|
+
def initialize(csv)
|
39
|
+
unless csv.instance_of?(CSV)
|
40
|
+
raise ArgumentError, "CsvEnumerator.new takes CSV object"
|
41
|
+
end
|
42
|
+
|
43
|
+
@csv = csv
|
44
|
+
end
|
45
|
+
|
46
|
+
# Constructs a enumerator on CSV rows
|
47
|
+
# @return [Enumerator] Enumerator instance
|
48
|
+
#
|
49
|
+
def rows(cursor:)
|
50
|
+
@csv.lazy
|
51
|
+
.each_with_index
|
52
|
+
.drop(count_of_processed_rows(cursor))
|
53
|
+
.to_enum { count_of_rows_in_file }
|
54
|
+
end
|
55
|
+
|
56
|
+
# Constructs a enumerator on batches of CSV rows
|
57
|
+
# @return [Enumerator] Enumerator instance
|
58
|
+
#
|
59
|
+
def batches(cursor:, batch_size: 100)
|
60
|
+
@csv.lazy
|
61
|
+
.each_slice(batch_size)
|
62
|
+
.with_index
|
63
|
+
.drop(count_of_processed_rows(cursor))
|
64
|
+
.to_enum { (count_of_rows_in_file.to_f / batch_size).ceil }
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def count_of_rows_in_file
|
69
|
+
filepath = @csv.path
|
70
|
+
return unless filepath
|
71
|
+
|
72
|
+
count = `wc -l < #{filepath}`.strip.to_i
|
73
|
+
count -= 1 if @csv.headers
|
74
|
+
count
|
75
|
+
end
|
76
|
+
|
77
|
+
def count_of_processed_rows(cursor)
|
78
|
+
if cursor
|
79
|
+
cursor + 1
|
80
|
+
else
|
81
|
+
0
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "active_record_enumerator"
|
4
|
+
require_relative "active_record_batch_enumerator"
|
5
|
+
require_relative "csv_enumerator"
|
6
|
+
require_relative "nested_enumerator"
|
7
|
+
|
8
|
+
module SidekiqIteration
|
9
|
+
module Enumerators
|
10
|
+
# Builds Enumerator object from a given array, using +cursor+ as an offset.
|
11
|
+
#
|
12
|
+
# @param array [Array]
|
13
|
+
# @param cursor [Integer] offset to start iteration from
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# array_enumerator(['build', 'enumerator', 'from', 'any', 'array'], cursor: cursor)
|
17
|
+
#
|
18
|
+
def array_enumerator(array, cursor:)
|
19
|
+
raise ArgumentError, "array must be an Array" unless array.is_a?(Array)
|
20
|
+
|
21
|
+
if defined?(ActiveRecord) && array.any?(ActiveRecord::Base)
|
22
|
+
raise ArgumentError, "array cannot contain ActiveRecord objects"
|
23
|
+
end
|
24
|
+
|
25
|
+
drop = cursor ? cursor + 1 : 0
|
26
|
+
array.each_with_index.drop(drop).to_enum { array.size }
|
27
|
+
end
|
28
|
+
|
29
|
+
# Builds Enumerator from Active Record Relation. Each Enumerator tick moves the cursor one row forward.
|
30
|
+
#
|
31
|
+
# @param scope [ActiveRecord::Relation] scope to iterate
|
32
|
+
# @param cursor [Object] offset to start iteration from, usually an id
|
33
|
+
# @option options :columns [Array<String, Symbol>] used to build the actual query for iteration,
|
34
|
+
# defaults to primary key
|
35
|
+
# @option options :batch_size [Integer] (100) size of the batch
|
36
|
+
#
|
37
|
+
# +columns:+ argument is used to build the actual query for iteration. +columns+: defaults to primary key:
|
38
|
+
#
|
39
|
+
# 1) SELECT * FROM users ORDER BY id LIMIT 100
|
40
|
+
#
|
41
|
+
# When iteration is resumed, +cursor:+ and +columns:+ values will be used to continue from the point
|
42
|
+
# where iteration stopped:
|
43
|
+
#
|
44
|
+
# 2) SELECT * FROM users WHERE id > $CURSOR ORDER BY id LIMIT 100
|
45
|
+
#
|
46
|
+
# +columns:+ can also take more than one column. In that case, +cursor+ will contain serialized values
|
47
|
+
# of all columns at the point where iteration stopped.
|
48
|
+
#
|
49
|
+
# Consider this example with +columns: [:created_at, :id]+. Here's the query will use on the first iteration:
|
50
|
+
#
|
51
|
+
# 1) SELECT * FROM "products" ORDER BY created_at, id LIMIT 100
|
52
|
+
#
|
53
|
+
# And the query on the next iteration:
|
54
|
+
#
|
55
|
+
# 2) SELECT * FROM "products"
|
56
|
+
# WHERE (created_at > '$LAST_CREATED_AT_CURSOR'
|
57
|
+
# OR (created_at = '$LAST_CREATED_AT_CURSOR' AND (id > '$LAST_ID_CURSOR')))
|
58
|
+
# ORDER BY created_at, id LIMIT 100
|
59
|
+
#
|
60
|
+
# As a result of this query pattern, if the values in these columns change for the records in scope during
|
61
|
+
# iteration, they may be skipped or yielded multiple times depending on the nature of the update and the
|
62
|
+
# cursor's value. If the value gets updated to a greater value than the cursor's value, it will get yielded
|
63
|
+
# again. Similarly, if the value gets updated to a lesser value than the curor's value, it will get skipped.
|
64
|
+
#
|
65
|
+
# @example
|
66
|
+
# def build_enumerator(cursor:)
|
67
|
+
# active_record_records_enumerator(User.all, cursor: cursor)
|
68
|
+
# end
|
69
|
+
#
|
70
|
+
# def each_iteration(user)
|
71
|
+
# user.notify_about_something
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
def active_record_records_enumerator(scope, cursor:, **options)
|
75
|
+
ActiveRecordEnumerator.new(scope, cursor: cursor, **options).records
|
76
|
+
end
|
77
|
+
|
78
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches of records.
|
79
|
+
# Each Enumerator tick moves the cursor +batch_size+ rows forward.
|
80
|
+
# @see #active_record_records_enumerator
|
81
|
+
#
|
82
|
+
# @example
|
83
|
+
# def build_enumerator(product_id, cursor:)
|
84
|
+
# active_record_batches_enumerator(
|
85
|
+
# Comment.where(product_id: product_id).select(:id),
|
86
|
+
# cursor: cursor,
|
87
|
+
# batch_size: 100
|
88
|
+
# )
|
89
|
+
# end
|
90
|
+
#
|
91
|
+
# def each_iteration(batch_of_comments, product_id)
|
92
|
+
# comment_ids = batch_of_comments.map(&:id)
|
93
|
+
# CommentService.call(comment_ids: comment_ids)
|
94
|
+
# end
|
95
|
+
#
|
96
|
+
def active_record_batches_enumerator(scope, cursor:, **options)
|
97
|
+
ActiveRecordEnumerator.new(scope, cursor: cursor, **options).batches
|
98
|
+
end
|
99
|
+
|
100
|
+
# Builds Enumerator from Active Record Relation and enumerates on batches, yielding Active Record Relations.
|
101
|
+
# @see #active_record_records_enumerator
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# def build_enumerator(product_id, cursor:)
|
105
|
+
# active_record_relations_enumerator(
|
106
|
+
# Product.find(product_id).comments,
|
107
|
+
# cursor: cursor,
|
108
|
+
# batch_size: 100,
|
109
|
+
# )
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# def each_iteration(batch_of_comments, product_id)
|
113
|
+
# # batch_of_comments will be a Comment::ActiveRecord_Relation
|
114
|
+
# batch_of_comments.update_all(deleted: true)
|
115
|
+
# end
|
116
|
+
#
|
117
|
+
def active_record_relations_enumerator(scope, cursor:, **options)
|
118
|
+
ActiveRecordBatchEnumerator.new(scope, cursor: cursor, **options).each
|
119
|
+
end
|
120
|
+
|
121
|
+
# Builds Enumerator from a CSV file.
|
122
|
+
#
|
123
|
+
# @param csv [CSV] an instance of CSV object
|
124
|
+
# @param cursor [Integer] offset to start iteration from
|
125
|
+
#
|
126
|
+
# @example
|
127
|
+
# def build_enumerator(import_id, cursor:)
|
128
|
+
# import = Import.find(import_id)
|
129
|
+
# csv_enumerator(import.csv, cursor: cursor)
|
130
|
+
# end
|
131
|
+
#
|
132
|
+
# def each_iteration(csv_row)
|
133
|
+
# # insert csv_row to database
|
134
|
+
# end
|
135
|
+
#
|
136
|
+
def csv_enumerator(csv, cursor:)
|
137
|
+
CsvEnumerator.new(csv).rows(cursor: cursor)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Builds Enumerator from a CSV file and enumerates on batches of records.
|
141
|
+
#
|
142
|
+
# @param csv [CSV] an instance of CSV object
|
143
|
+
# @param cursor [Integer] offset to start iteration from
|
144
|
+
# @option options :batch_size [Integer] (100) size of the batch
|
145
|
+
#
|
146
|
+
# @example
|
147
|
+
# def build_enumerator(import_id, cursor:)
|
148
|
+
# import = Import.find(import_id)
|
149
|
+
# csv_batches_enumerator(import.csv, cursor: cursor)
|
150
|
+
# end
|
151
|
+
#
|
152
|
+
# def each_iteration(batch_of_csv_rows)
|
153
|
+
# # ...
|
154
|
+
# end
|
155
|
+
#
|
156
|
+
def csv_batches_enumerator(csv, cursor:, **options)
|
157
|
+
CsvEnumerator.new(csv).batches(cursor: cursor, **options)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Builds Enumerator for nested iteration.
|
161
|
+
#
|
162
|
+
# @param enums [Array<Proc>] an Array of Procs, each should return an Enumerator.
|
163
|
+
# Each proc from enums should accept the yielded items from the parent enumerators and the `cursor` as its arguments.
|
164
|
+
# Each proc's `cursor` argument is its part from the `build_enumerator`'s `cursor` array.
|
165
|
+
# @param cursor [Array<Object>] array of offsets for each of the enums to start iteration from
|
166
|
+
#
|
167
|
+
# @example
|
168
|
+
# def build_enumerator(cursor:)
|
169
|
+
# nested_enumerator(
|
170
|
+
# [
|
171
|
+
# ->(cursor) { active_record_records_enumerator(Shop.all, cursor: cursor) },
|
172
|
+
# ->(shop, cursor) { active_record_records_enumerator(shop.products, cursor: cursor) },
|
173
|
+
# ->(_shop, product, cursor) { active_record_relations_enumerator(product.product_variants, cursor: cursor) }
|
174
|
+
# ],
|
175
|
+
# cursor: cursor
|
176
|
+
# )
|
177
|
+
# end
|
178
|
+
#
|
179
|
+
# def each_iteration(product_variants_relation)
|
180
|
+
# # do something
|
181
|
+
# end
|
182
|
+
#
|
183
|
+
def nested_enumerator(enums, cursor:)
|
184
|
+
NestedEnumerator.new(enums, cursor: cursor).each
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|