eco-helpers 3.0.17 → 3.0.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -1
- data/eco-helpers.gemspec +3 -3
- data/lib/eco/api/common/loaders/parser.rb +10 -0
- data/lib/eco/api/common/people/default_parsers/csv_parser.rb +21 -208
- data/lib/eco/api/common/people/default_parsers/helpers/expected_headers.rb +206 -0
- data/lib/eco/api/common/people/default_parsers/helpers/null_parsing.rb +36 -0
- data/lib/eco/api/common/people/default_parsers/helpers.rb +15 -0
- data/lib/eco/api/common/people/default_parsers/json_parser.rb +56 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +13 -14
- data/lib/eco/api/common/people/default_parsers.rb +2 -0
- data/lib/eco/api/common/people/entry_factory.rb +15 -4
- data/lib/eco/api/session/batch/launcher/mode_size.rb +65 -0
- data/lib/eco/api/session/batch/launcher/retry.rb +3 -3
- data/lib/eco/api/session/batch/launcher/status_handling.rb +4 -2
- data/lib/eco/api/session/batch/launcher.rb +42 -37
- data/lib/eco/api/session.rb +2 -0
- data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb +26 -0
- data/lib/eco/api/usecases/default/utils/cli/json_to_csv_cli.rb +10 -0
- data/lib/eco/api/usecases/default/utils/cli/sort_csv_cli.rb +17 -0
- data/lib/eco/api/usecases/default/utils/cli/split_json_cli.rb +15 -0
- data/lib/eco/api/usecases/default/utils/group_csv_case.rb +213 -0
- data/lib/eco/api/usecases/default/utils/json_to_csv_case.rb +71 -0
- data/lib/eco/api/usecases/default/utils/sort_csv_case.rb +127 -0
- data/lib/eco/api/usecases/default/utils/split_json_case.rb +224 -0
- data/lib/eco/api/usecases/default/utils.rb +4 -0
- data/lib/eco/version.rb +1 -1
- metadata +22 -11
- data/lib/eco/api/session/batch/launcher/mode.rb +0 -23
- data/lib/eco/api/session/batch/launcher/size.rb +0 -40
@@ -2,12 +2,9 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
|
|
2
2
|
attribute :xls
|
3
3
|
|
4
4
|
attr_accessor :already_required
|
5
|
-
attr_reader :file
|
6
5
|
|
7
|
-
def parser(
|
8
|
-
|
9
|
-
rows.tap do |rws|
|
10
|
-
@file = nil
|
6
|
+
def parser(filename, _deps)
|
7
|
+
rows(file: filename).tap do |rws|
|
11
8
|
rws.each do |row|
|
12
9
|
to_string!(row)
|
13
10
|
end
|
@@ -22,13 +19,14 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
|
|
22
19
|
|
23
20
|
def to_string!(row)
|
24
21
|
row.transform_values! do |val|
|
25
|
-
next
|
22
|
+
next unless val
|
26
23
|
next val if val.is_a?(String)
|
24
|
+
|
27
25
|
val.to_s
|
28
26
|
end
|
29
27
|
end
|
30
28
|
|
31
|
-
def
|
29
|
+
def expected_headers
|
32
30
|
log(:warn) {
|
33
31
|
"Headers detection is using your fields_map.json file (native behaviour)"
|
34
32
|
}
|
@@ -39,30 +37,31 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
|
|
39
37
|
0
|
40
38
|
end
|
41
39
|
|
42
|
-
def workbook
|
40
|
+
def workbook(file)
|
43
41
|
require_reading_libs!
|
44
42
|
Roo::Spreadsheet.open(file)
|
45
43
|
end
|
46
44
|
|
47
|
-
def spreadheet(name_or_index = sheet_name)
|
48
|
-
workbook.sheet(name_or_index)
|
45
|
+
def spreadheet(name_or_index = sheet_name, file:)
|
46
|
+
workbook(file).sheet(name_or_index)
|
49
47
|
end
|
50
48
|
|
51
|
-
def rows(target =
|
52
|
-
spreadheet.parse(header_search: target, clean: true)
|
49
|
+
def rows(target = expected_headers, file:)
|
50
|
+
spreadheet(file: file).parse(header_search: target, clean: true)
|
53
51
|
rescue Roo::HeaderRowNotFoundError => e
|
54
52
|
missing = JSON.parse(e.message)
|
55
53
|
|
56
54
|
log(:warn) {
|
57
|
-
"The input file is missing these headers: #{missing}"
|
55
|
+
"The input file is missing these expected headers: #{missing}"
|
58
56
|
}
|
59
57
|
|
60
58
|
present = target - missing
|
61
|
-
rows(present)
|
59
|
+
rows(present, file: file)
|
62
60
|
end
|
63
61
|
|
64
62
|
def require_reading_libs!
|
65
63
|
return if already_required
|
64
|
+
|
66
65
|
require 'roo'
|
67
66
|
require 'roo-xls'
|
68
67
|
self.already_required = true
|
@@ -12,6 +12,7 @@ module Eco
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
require_relative 'default_parsers/helpers'
|
15
16
|
require_relative 'default_parsers/select_parser'
|
16
17
|
require_relative 'default_parsers/boolean_parser'
|
17
18
|
require_relative 'default_parsers/numeric_parser'
|
@@ -22,4 +23,5 @@ require_relative 'default_parsers/freemium_parser'
|
|
22
23
|
require_relative 'default_parsers/policy_groups_parser'
|
23
24
|
require_relative 'default_parsers/login_providers_parser'
|
24
25
|
require_relative 'default_parsers/csv_parser'
|
26
|
+
require_relative 'default_parsers/json_parser'
|
25
27
|
require_relative 'default_parsers/xls_parser'
|
@@ -28,6 +28,7 @@ module Eco
|
|
28
28
|
# to translate external names into internal ones and _vice versa_.
|
29
29
|
def initialize(e, schema:, person_parser: nil, default_parser: nil, attr_map: nil)
|
30
30
|
super(e)
|
31
|
+
|
31
32
|
msg = "Constructor needs a PersonSchema. Given: #{schema.class}"
|
32
33
|
fatal msg unless schema.is_a?(Ecoportal::API::V1::PersonSchema)
|
33
34
|
|
@@ -133,9 +134,10 @@ module Eco
|
|
133
134
|
out.concat(curr)
|
134
135
|
end
|
135
136
|
end
|
136
|
-
|
137
|
+
|
138
|
+
# Get content only when it's not :xls, nor :json
|
137
139
|
# note: even if content was provided, file takes precedence
|
138
|
-
if (format
|
140
|
+
if get_content?(format) && file # rubocop:disable Style/IfUnlessModifier
|
139
141
|
content = get_file_content(file, encoding: encoding)
|
140
142
|
end
|
141
143
|
|
@@ -166,8 +168,10 @@ module Eco
|
|
166
168
|
end
|
167
169
|
end.tap do |out_array|
|
168
170
|
start_from_two = (format == :csv) || format == :xls
|
169
|
-
|
170
|
-
|
171
|
+
first_idx = start_from_two ? 2 : 1
|
172
|
+
|
173
|
+
out_array.each.with_index(first_idx) do |entry_hash, idx|
|
174
|
+
entry_hash["idx"] = idx
|
171
175
|
entry_hash["source_file"] = file
|
172
176
|
end
|
173
177
|
end
|
@@ -222,6 +226,13 @@ module Eco
|
|
222
226
|
|
223
227
|
private
|
224
228
|
|
229
|
+
def get_content?(format)
|
230
|
+
return false if format == :xls
|
231
|
+
return false if format == :json
|
232
|
+
|
233
|
+
true
|
234
|
+
end
|
235
|
+
|
225
236
|
def abort(message)
|
226
237
|
log(:error) { message }
|
227
238
|
exit(1)
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Eco
|
2
|
+
module API
|
3
|
+
class Session
|
4
|
+
class Batch
|
5
|
+
module Launcher
|
6
|
+
module ModeSize
|
7
|
+
include Eco::API::Session::Batch::Launcher::Options
|
8
|
+
|
9
|
+
DEFAULT_BATCH_SIZE = 50
|
10
|
+
DEFAULT_JOB_SIZE = 100
|
11
|
+
|
12
|
+
def batch_size(opts = options)
|
13
|
+
return job_mode_size if job_mode?(opts)
|
14
|
+
|
15
|
+
batch_mode_size
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# Swaps to batch endpoint on specific errors
|
21
|
+
def batch_mode_on(*error_types, options: self.options, allow_job_mode: true, &block)
|
22
|
+
in_job_mode = allow_job_mode && job_mode?(options)
|
23
|
+
|
24
|
+
yield(in_job_mode, batch_size(options))
|
25
|
+
rescue *error_types
|
26
|
+
raise unless in_job_mode
|
27
|
+
|
28
|
+
yield(false , batch_mode_size)
|
29
|
+
end
|
30
|
+
|
31
|
+
# MODE
|
32
|
+
|
33
|
+
# @return [Symbol] the batch mode to run
|
34
|
+
def batch_mode(opts = options)
|
35
|
+
opts.dig(:workflow, :batch, :mode) || :batch
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Boolean] are we running in `:job` mode?
|
39
|
+
def job_mode?(opts = options)
|
40
|
+
batch_mode(opts) == :job
|
41
|
+
end
|
42
|
+
|
43
|
+
# SIZE
|
44
|
+
|
45
|
+
def job_mode_size
|
46
|
+
options.dig(:workflow, :batch, :job, :size).then do |size|
|
47
|
+
next self.class::DEFAULT_JOB_SIZE unless size
|
48
|
+
|
49
|
+
size
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def batch_mode_size
|
54
|
+
options.dig(:workflow, :batch, :size).then do |size|
|
55
|
+
next self.class::DEFAULT_BATCH_SIZE unless size
|
56
|
+
|
57
|
+
[size, 100].min
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -16,9 +16,9 @@ module Eco
|
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
|
-
def offer_retry_on(
|
19
|
+
def offer_retry_on(*error_types, retries_left: 3, &block)
|
20
20
|
yield
|
21
|
-
rescue
|
21
|
+
rescue *error_types => err
|
22
22
|
raise err.class, err.message, cause: nil unless retries_left.positive?
|
23
23
|
|
24
24
|
explanation = "#{err}\n"
|
@@ -29,7 +29,7 @@ module Eco
|
|
29
29
|
raise unless response.upcase.start_with?("Y")
|
30
30
|
|
31
31
|
puts "\nOkay... let's retry!"
|
32
|
-
offer_retry_on(
|
32
|
+
offer_retry_on(*error_types, retries_left: retries_left - 1, &block)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
@@ -6,14 +6,16 @@ module Eco
|
|
6
6
|
module StatusHandling
|
7
7
|
private
|
8
8
|
|
9
|
-
def tap_status(enviro:, queue:, method:, status: nil
|
9
|
+
def tap_status(enviro:, queue:, method:, status: nil)
|
10
10
|
status ||= Eco::API::Session::Batch::Status.new(
|
11
11
|
enviro,
|
12
12
|
queue: queue,
|
13
13
|
method: method
|
14
14
|
)
|
15
15
|
|
16
|
-
status.tap
|
16
|
+
status.tap do
|
17
|
+
yield(status) if block_given?
|
18
|
+
end
|
17
19
|
end
|
18
20
|
end
|
19
21
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require_relative 'launcher/valid_methods'
|
2
2
|
require_relative 'launcher/options'
|
3
|
-
require_relative 'launcher/
|
4
|
-
require_relative 'launcher/size'
|
3
|
+
require_relative 'launcher/mode_size'
|
5
4
|
require_relative 'launcher/benchmarking'
|
6
5
|
require_relative 'launcher/status_handling'
|
7
6
|
require_relative 'launcher/retry'
|
@@ -24,12 +23,17 @@ module Eco
|
|
24
23
|
end
|
25
24
|
|
26
25
|
include Options
|
27
|
-
include
|
28
|
-
include Size
|
26
|
+
include ModeSize
|
29
27
|
include Benchmarking
|
30
28
|
include StatusHandling
|
31
29
|
include Retry
|
32
30
|
|
31
|
+
TIMEOUT_RETRIES = 2
|
32
|
+
RETRY_ON = [
|
33
|
+
Ecoportal::API::Errors::TimeOut,
|
34
|
+
Ecoportal::API::Errors::StartTimeOut
|
35
|
+
].freeze
|
36
|
+
|
33
37
|
private
|
34
38
|
|
35
39
|
def batch_from(
|
@@ -48,7 +52,6 @@ module Eco
|
|
48
52
|
launch_batch(
|
49
53
|
data,
|
50
54
|
method: method,
|
51
|
-
per_page: params[:per_page] || batch_size(options),
|
52
55
|
people_api: people_api,
|
53
56
|
silent: silent,
|
54
57
|
options: options
|
@@ -59,54 +62,56 @@ module Eco
|
|
59
62
|
data,
|
60
63
|
method:,
|
61
64
|
status: nil,
|
62
|
-
job_mode: true,
|
65
|
+
job_mode: true,
|
63
66
|
options: self.options,
|
64
|
-
per_page: batch_size(options),
|
65
67
|
people_api: api&.people,
|
66
68
|
silent: false
|
67
69
|
)
|
68
|
-
iteration = 1
|
69
|
-
done = 0
|
70
|
-
iterations = (data.length.to_f / per_page).ceil
|
71
70
|
|
72
71
|
tap_status(status: status, enviro: enviro, queue: data, method: method) do |overall_status|
|
73
72
|
pending_for_server_error = data.to_a[0..]
|
74
73
|
|
75
|
-
|
74
|
+
batch_mode_on(*RETRY_ON, options: options, allow_job_mode: job_mode) do |as_job_mode, per_page|
|
75
|
+
iteration = 0
|
76
|
+
done = 0
|
77
|
+
iterations = (data.length.to_f / per_page).ceil
|
76
78
|
|
77
|
-
|
78
|
-
msg = "starting batch '#{method}' iteration #{iteration}/#{iterations}, "
|
79
|
-
msg << "with #{slice.length} entries of #{data.length} -- #{done} done"
|
80
|
-
msg << (" " * 20)
|
81
|
-
log(:info) { msg } unless silent
|
79
|
+
start_time = Time.now
|
82
80
|
|
83
|
-
|
81
|
+
data.each_slice(per_page) do |slice|
|
82
|
+
iteration += 1
|
84
83
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
faltal("Request with no response") unless response
|
84
|
+
msg = "starting batch '#{method}' iteration #{iteration}/#{iterations}, "
|
85
|
+
msg << "with #{slice.length} entries of #{data.length} -- #{done} done"
|
86
|
+
msg << (" " * 20)
|
87
|
+
log(:info) { msg } unless silent
|
90
88
|
|
91
|
-
|
89
|
+
start_slice = Time.now
|
92
90
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
91
|
+
offer_retry_on(*RETRY_ON, retries_left: TIMEOUT_RETRIES) do
|
92
|
+
people_api.batch(job_mode: as_job_mode) do |batch|
|
93
|
+
slice.each do |person|
|
94
|
+
batch.public_send(method, person) do |response|
|
95
|
+
faltal("Request with no response") unless response
|
99
96
|
|
100
|
-
|
97
|
+
next if server_error?(response)
|
101
98
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
99
|
+
pending_for_server_error.delete(person)
|
100
|
+
overall_status[person] = response
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end # end batch
|
104
|
+
end
|
105
|
+
|
106
|
+
done += slice.length
|
107
107
|
|
108
|
-
|
109
|
-
|
108
|
+
msg = " ... iteration #{iteration}/#{iterations} done "
|
109
|
+
msg << "in #{str_per_sec(start_slice, slice.length)} "
|
110
|
+
msg << "(average: #{str_per_sec(start_time, done)})"
|
111
|
+
msg << (" " * 20)
|
112
|
+
log(:info) { msg } unless silent
|
113
|
+
end # next slice
|
114
|
+
end
|
110
115
|
|
111
116
|
# temporary working around (due to back-end problems with batch/jobs)
|
112
117
|
unless pending_for_server_error.empty?
|
data/lib/eco/api/session.rb
CHANGED
@@ -132,7 +132,9 @@ module Eco
|
|
132
132
|
# If `schema` is `nil` or not provided it uses the currently associated to the `session`
|
133
133
|
def entry_factory(schema: nil)
|
134
134
|
schema = to_schema(schema) || self.schema
|
135
|
+
|
135
136
|
return @entry_factories[schema&.id] if @entry_factories.key?(schema&.id)
|
137
|
+
|
136
138
|
unless @entry_factories.empty?
|
137
139
|
@entry_factories[schema&.id] = @entry_factories.values.first.newFactory(schema: schema)
|
138
140
|
return @entry_factories[schema&.id]
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::GroupCsv
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
str_desc = 'Groups the csv rows by a pivot field. '
|
4
|
+
str_desc << 'It assumes the sorting field is sorted '
|
5
|
+
str_desc << '(same values should be consecutive)'
|
6
|
+
|
7
|
+
desc str_desc
|
8
|
+
|
9
|
+
callback do |_session, options, _usecase|
|
10
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
11
|
+
options.deep_merge!(input: {file: {name: file}})
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
add_option("-start-at", "Get only the last N-start_at rows") do |options|
|
16
|
+
count = SCR.get_arg("-start-at", with_param: true)
|
17
|
+
options.deep_merge!(input: {file: {start_at: count}})
|
18
|
+
end
|
19
|
+
|
20
|
+
add_option('-by', 'The column that should be used to group') do |options|
|
21
|
+
if (file = SCR.get_arg("-by", with_param: true))
|
22
|
+
options.deep_merge!(input: {group_by_field: file})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::JsonToCsv
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
desc "Transforms an input JSON file into a CSV one."
|
4
|
+
|
5
|
+
callback do |_sess, options, _case|
|
6
|
+
file = SCR.get_file(cli_name, required: true, should_exist: true)
|
7
|
+
options.deep_merge!(source: {file: file})
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::SortCsv
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
desc 'Sorts the CSV by column -by'
|
4
|
+
|
5
|
+
callback do |_session, options, _usecase|
|
6
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
7
|
+
options.deep_merge!(input: {file: file})
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
add_option('-by', 'The column that should be used to sorting') do |options|
|
12
|
+
if (file = SCR.get_arg("-by", with_param: true))
|
13
|
+
options.deep_merge!(input: {sort_by: file})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::SplitJson
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
desc 'Splits a json input file into multiple files'
|
4
|
+
|
5
|
+
callback do |_sess, options, _case|
|
6
|
+
file = SCR.get_file(cli_name, required: true, should_exist: true)
|
7
|
+
options.deep_merge!(source: {file: file})
|
8
|
+
end
|
9
|
+
|
10
|
+
add_option("-max-items", "The max count of items of the output files") do |options|
|
11
|
+
count = SCR.get_arg("-max-items", with_param: true)
|
12
|
+
options.deep_merge!(output: {file: {max_items: count}})
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
|
2
|
+
# @note you might run first the `sort-csv` case.
|
3
|
+
# @note you must inherit from this case and define the constants.
|
4
|
+
#
|
5
|
+
# GROUP_BY_FIELD = 'target_csv_field'.freeze
|
6
|
+
# GROUPED_FIELDS = [
|
7
|
+
# 'joined_field_1',
|
8
|
+
# 'joined_field_2',
|
9
|
+
# 'joined_field_3',
|
10
|
+
# ].freeze
|
11
|
+
#
|
12
|
+
class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
13
|
+
name 'group-csv'
|
14
|
+
type :other
|
15
|
+
|
16
|
+
require_relative 'cli/group_csv_cli'
|
17
|
+
|
18
|
+
def main(*_args)
|
19
|
+
if simulate?
|
20
|
+
count = Eco::CSV.count(input_file)
|
21
|
+
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
22
|
+
else
|
23
|
+
generate_file
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def generate_file # rubocop:disable Metrics/AbcSize
|
30
|
+
row_count = 0
|
31
|
+
in_index = nil
|
32
|
+
|
33
|
+
CSV.open(output_filename, 'wb') do |out_csv|
|
34
|
+
first = true
|
35
|
+
|
36
|
+
puts "\n"
|
37
|
+
|
38
|
+
streamed_input.for_each(start_at_idx: start_at) do |row, idx|
|
39
|
+
if first
|
40
|
+
first = false
|
41
|
+
headers!(row)
|
42
|
+
out_csv << headers
|
43
|
+
require_group_by_field!(row, file: input_file)
|
44
|
+
end
|
45
|
+
|
46
|
+
in_index = idx
|
47
|
+
next unless !block_given? || yield(row, idx)
|
48
|
+
|
49
|
+
next unless pivotable?(row, idx)
|
50
|
+
next unless (last_group = pivot_row(row))
|
51
|
+
|
52
|
+
row_count += 1
|
53
|
+
|
54
|
+
if (row_count % 500).zero?
|
55
|
+
print "... Done #{row_count} rows \r"
|
56
|
+
$stdout.flush
|
57
|
+
end
|
58
|
+
|
59
|
+
out_csv << last_group.values_at(*headers)
|
60
|
+
end
|
61
|
+
|
62
|
+
# finalize
|
63
|
+
if (lrow = pivot_row)
|
64
|
+
row_count += 1
|
65
|
+
out_csv << lrow.values_at(*headers)
|
66
|
+
end
|
67
|
+
ensure
|
68
|
+
msg = "Generated file '#{output_filename}' "
|
69
|
+
msg << "with #{row_count} rows (out of #{in_index})."
|
70
|
+
|
71
|
+
log(:info) { msg } unless simulate?
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# It tracks the current grouped row
|
76
|
+
# @return [Nil, Hash] the last grouped row when `row` doesn't belong
|
77
|
+
# or `nil` otherwise
|
78
|
+
def pivot_row(row = nil)
|
79
|
+
@group ||= {}
|
80
|
+
return @group unless row
|
81
|
+
|
82
|
+
pivot_value = row[group_by_field]
|
83
|
+
|
84
|
+
unless (last_pivot = @group[group_by_field])
|
85
|
+
last_pivot = @group[group_by_field] = pivot_value
|
86
|
+
end
|
87
|
+
|
88
|
+
last = @group
|
89
|
+
@group = {group_by_field => pivot_value} unless pivot_value == last_pivot
|
90
|
+
|
91
|
+
headers_rest.each do |field|
|
92
|
+
curr_values = row[field].to_s.split('|').compact.uniq
|
93
|
+
pivot_values = @group[field].to_s.split('|').compact.uniq
|
94
|
+
@group[field] = (pivot_values | curr_values).join('|')
|
95
|
+
end
|
96
|
+
|
97
|
+
last unless last == @group
|
98
|
+
end
|
99
|
+
|
100
|
+
attr_reader :group
|
101
|
+
attr_reader :headers, :headers_rest
|
102
|
+
|
103
|
+
def headers!(row)
|
104
|
+
return if headers?
|
105
|
+
|
106
|
+
@headers_rest = grouped_fields & row.headers
|
107
|
+
@headers_rest -= [group_by_field]
|
108
|
+
@headers = [group_by_field, *headers_rest]
|
109
|
+
end
|
110
|
+
|
111
|
+
def headers?
|
112
|
+
instance_variable_defined?(:@headers)
|
113
|
+
end
|
114
|
+
|
115
|
+
def pivotable?(row, idx)
|
116
|
+
return true unless row[group_by_field].to_s.strip.empty?
|
117
|
+
|
118
|
+
msg = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
|
119
|
+
msg << ". Skipping (discared) ..."
|
120
|
+
log(:warn) { msg }
|
121
|
+
false
|
122
|
+
end
|
123
|
+
|
124
|
+
def streamed_input
|
125
|
+
@streamed_input ||= Eco::CSV::Stream.new(input_file)
|
126
|
+
end
|
127
|
+
|
128
|
+
def input_file
|
129
|
+
options.dig(:input, :file, :name)
|
130
|
+
end
|
131
|
+
|
132
|
+
def start_at
|
133
|
+
return nil unless (num = options.dig(:input, :file, :start_at))
|
134
|
+
|
135
|
+
num = num.to_i
|
136
|
+
num = nil if num.zero?
|
137
|
+
num
|
138
|
+
end
|
139
|
+
|
140
|
+
def output_filename
|
141
|
+
return nil unless input_name
|
142
|
+
|
143
|
+
File.join(input_dir, "#{input_name}_grouped#{input_ext}")
|
144
|
+
end
|
145
|
+
|
146
|
+
def input_name
|
147
|
+
@input_name ||= File.basename(input_basename, input_ext)
|
148
|
+
end
|
149
|
+
|
150
|
+
def input_ext
|
151
|
+
@input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
|
152
|
+
".#{name}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def input_basename
|
157
|
+
@input_basename ||= File.basename(input_full_filename)
|
158
|
+
end
|
159
|
+
|
160
|
+
def input_dir
|
161
|
+
@input_dir = File.dirname(input_full_filename)
|
162
|
+
end
|
163
|
+
|
164
|
+
def input_full_filename
|
165
|
+
@input_full_filename ||= File.expand_path(input_file)
|
166
|
+
end
|
167
|
+
|
168
|
+
def require_group_by_field!(row, file:)
|
169
|
+
return true if row.key?(group_by_field)
|
170
|
+
|
171
|
+
msg = "Pivot field '#{group_by_field}' missing in header of file '#{file}'"
|
172
|
+
log(:error) { msg }
|
173
|
+
raise msg
|
174
|
+
end
|
175
|
+
|
176
|
+
def group_by_field
|
177
|
+
return @group_by_field if instance_variable_defined?(:@group_by_field)
|
178
|
+
|
179
|
+
return (@group_by_field = opts_group_by) if opts_group_by
|
180
|
+
|
181
|
+
unless self.class.const_defined?(:GROUP_BY_FIELD)
|
182
|
+
msg = "(#{self.class}) You must define GROUP_BY_FIELD constant"
|
183
|
+
log(:error) { msg }
|
184
|
+
raise msg
|
185
|
+
end
|
186
|
+
|
187
|
+
@group_by_field = self.class::GROUP_BY_FIELD
|
188
|
+
end
|
189
|
+
|
190
|
+
def grouped_fields
|
191
|
+
return @grouped_fields if instance_variable_defined?(:@grouped_fields)
|
192
|
+
|
193
|
+
unless self.class.const_defined?(:GROUPED_FIELDS)
|
194
|
+
msg = "(#{self.class}) You must define GROUPED_FIELDS constant"
|
195
|
+
log(:error) { msg }
|
196
|
+
raise msg
|
197
|
+
end
|
198
|
+
|
199
|
+
@grouped_fields ||= [self.class::GROUPED_FIELDS].flatten.compact.tap do |flds|
|
200
|
+
next unless flds.empty?
|
201
|
+
|
202
|
+
log(:warn) {
|
203
|
+
msg = "There were no fields to be grouped/joined. "
|
204
|
+
msg << "This is equivalent to launch a unique operation."
|
205
|
+
msg
|
206
|
+
}
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def opts_group_by
|
211
|
+
options.dig(:input, :group_by_field)
|
212
|
+
end
|
213
|
+
end
|