eco-helpers 3.0.18 → 3.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -3
- data/eco-helpers.gemspec +3 -3
- data/lib/eco/api/common/loaders/parser.rb +10 -0
- data/lib/eco/api/common/people/default_parsers/csv_parser.rb +21 -208
- data/lib/eco/api/common/people/default_parsers/helpers/expected_headers.rb +206 -0
- data/lib/eco/api/common/people/default_parsers/helpers/null_parsing.rb +36 -0
- data/lib/eco/api/common/people/default_parsers/helpers.rb +15 -0
- data/lib/eco/api/common/people/default_parsers/json_parser.rb +56 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +13 -14
- data/lib/eco/api/common/people/default_parsers.rb +2 -0
- data/lib/eco/api/common/people/entry_factory.rb +15 -4
- data/lib/eco/api/session/batch/launcher/status_handling.rb +4 -2
- data/lib/eco/api/session/batch/launcher.rb +3 -3
- data/lib/eco/api/session.rb +2 -0
- data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb +26 -0
- data/lib/eco/api/usecases/default/utils/cli/json_to_csv_cli.rb +10 -0
- data/lib/eco/api/usecases/default/utils/cli/sort_csv_cli.rb +17 -0
- data/lib/eco/api/usecases/default/utils/cli/split_json_cli.rb +15 -0
- data/lib/eco/api/usecases/default/utils/group_csv_case.rb +213 -0
- data/lib/eco/api/usecases/default/utils/json_to_csv_case.rb +71 -0
- data/lib/eco/api/usecases/default/utils/sort_csv_case.rb +127 -0
- data/lib/eco/api/usecases/default/utils/split_json_case.rb +224 -0
- data/lib/eco/api/usecases/default/utils.rb +4 -0
- data/lib/eco/version.rb +1 -1
- metadata +21 -9
@@ -2,12 +2,9 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
|
|
2
2
|
attribute :xls
|
3
3
|
|
4
4
|
attr_accessor :already_required
|
5
|
-
attr_reader :file
|
6
5
|
|
7
|
-
def parser(
|
8
|
-
|
9
|
-
rows.tap do |rws|
|
10
|
-
@file = nil
|
6
|
+
def parser(filename, _deps)
|
7
|
+
rows(file: filename).tap do |rws|
|
11
8
|
rws.each do |row|
|
12
9
|
to_string!(row)
|
13
10
|
end
|
@@ -22,13 +19,14 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
|
|
22
19
|
|
23
20
|
def to_string!(row)
|
24
21
|
row.transform_values! do |val|
|
25
|
-
next
|
22
|
+
next unless val
|
26
23
|
next val if val.is_a?(String)
|
24
|
+
|
27
25
|
val.to_s
|
28
26
|
end
|
29
27
|
end
|
30
28
|
|
31
|
-
def
|
29
|
+
def expected_headers
|
32
30
|
log(:warn) {
|
33
31
|
"Headers detection is using your fields_map.json file (native behaviour)"
|
34
32
|
}
|
@@ -39,30 +37,31 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
|
|
39
37
|
0
|
40
38
|
end
|
41
39
|
|
42
|
-
def workbook
|
40
|
+
def workbook(file)
|
43
41
|
require_reading_libs!
|
44
42
|
Roo::Spreadsheet.open(file)
|
45
43
|
end
|
46
44
|
|
47
|
-
def spreadheet(name_or_index = sheet_name)
|
48
|
-
workbook.sheet(name_or_index)
|
45
|
+
def spreadheet(name_or_index = sheet_name, file:)
|
46
|
+
workbook(file).sheet(name_or_index)
|
49
47
|
end
|
50
48
|
|
51
|
-
def rows(target =
|
52
|
-
spreadheet.parse(header_search: target, clean: true)
|
49
|
+
def rows(target = expected_headers, file:)
|
50
|
+
spreadheet(file: file).parse(header_search: target, clean: true)
|
53
51
|
rescue Roo::HeaderRowNotFoundError => e
|
54
52
|
missing = JSON.parse(e.message)
|
55
53
|
|
56
54
|
log(:warn) {
|
57
|
-
"The input file is missing these headers: #{missing}"
|
55
|
+
"The input file is missing these expected headers: #{missing}"
|
58
56
|
}
|
59
57
|
|
60
58
|
present = target - missing
|
61
|
-
rows(present)
|
59
|
+
rows(present, file: file)
|
62
60
|
end
|
63
61
|
|
64
62
|
def require_reading_libs!
|
65
63
|
return if already_required
|
64
|
+
|
66
65
|
require 'roo'
|
67
66
|
require 'roo-xls'
|
68
67
|
self.already_required = true
|
@@ -12,6 +12,7 @@ module Eco
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
+
require_relative 'default_parsers/helpers'
|
15
16
|
require_relative 'default_parsers/select_parser'
|
16
17
|
require_relative 'default_parsers/boolean_parser'
|
17
18
|
require_relative 'default_parsers/numeric_parser'
|
@@ -22,4 +23,5 @@ require_relative 'default_parsers/freemium_parser'
|
|
22
23
|
require_relative 'default_parsers/policy_groups_parser'
|
23
24
|
require_relative 'default_parsers/login_providers_parser'
|
24
25
|
require_relative 'default_parsers/csv_parser'
|
26
|
+
require_relative 'default_parsers/json_parser'
|
25
27
|
require_relative 'default_parsers/xls_parser'
|
@@ -28,6 +28,7 @@ module Eco
|
|
28
28
|
# to translate external names into internal ones and _vice versa_.
|
29
29
|
def initialize(e, schema:, person_parser: nil, default_parser: nil, attr_map: nil)
|
30
30
|
super(e)
|
31
|
+
|
31
32
|
msg = "Constructor needs a PersonSchema. Given: #{schema.class}"
|
32
33
|
fatal msg unless schema.is_a?(Ecoportal::API::V1::PersonSchema)
|
33
34
|
|
@@ -133,9 +134,10 @@ module Eco
|
|
133
134
|
out.concat(curr)
|
134
135
|
end
|
135
136
|
end
|
136
|
-
|
137
|
+
|
138
|
+
# Get content only when it's not :xls, nor :json
|
137
139
|
# note: even if content was provided, file takes precedence
|
138
|
-
if (format
|
140
|
+
if get_content?(format) && file # rubocop:disable Style/IfUnlessModifier
|
139
141
|
content = get_file_content(file, encoding: encoding)
|
140
142
|
end
|
141
143
|
|
@@ -166,8 +168,10 @@ module Eco
|
|
166
168
|
end
|
167
169
|
end.tap do |out_array|
|
168
170
|
start_from_two = (format == :csv) || format == :xls
|
169
|
-
|
170
|
-
|
171
|
+
first_idx = start_from_two ? 2 : 1
|
172
|
+
|
173
|
+
out_array.each.with_index(first_idx) do |entry_hash, idx|
|
174
|
+
entry_hash["idx"] = idx
|
171
175
|
entry_hash["source_file"] = file
|
172
176
|
end
|
173
177
|
end
|
@@ -222,6 +226,13 @@ module Eco
|
|
222
226
|
|
223
227
|
private
|
224
228
|
|
229
|
+
def get_content?(format)
|
230
|
+
return false if format == :xls
|
231
|
+
return false if format == :json
|
232
|
+
|
233
|
+
true
|
234
|
+
end
|
235
|
+
|
225
236
|
def abort(message)
|
226
237
|
log(:error) { message }
|
227
238
|
exit(1)
|
@@ -6,14 +6,16 @@ module Eco
|
|
6
6
|
module StatusHandling
|
7
7
|
private
|
8
8
|
|
9
|
-
def tap_status(enviro:, queue:, method:, status: nil
|
9
|
+
def tap_status(enviro:, queue:, method:, status: nil)
|
10
10
|
status ||= Eco::API::Session::Batch::Status.new(
|
11
11
|
enviro,
|
12
12
|
queue: queue,
|
13
13
|
method: method
|
14
14
|
)
|
15
15
|
|
16
|
-
status.tap
|
16
|
+
status.tap do
|
17
|
+
yield(status) if block_given?
|
18
|
+
end
|
17
19
|
end
|
18
20
|
end
|
19
21
|
end
|
@@ -71,7 +71,7 @@ module Eco
|
|
71
71
|
tap_status(status: status, enviro: enviro, queue: data, method: method) do |overall_status|
|
72
72
|
pending_for_server_error = data.to_a[0..]
|
73
73
|
|
74
|
-
batch_mode_on(*RETRY_ON, options: options, allow_job_mode: job_mode) do |
|
74
|
+
batch_mode_on(*RETRY_ON, options: options, allow_job_mode: job_mode) do |as_job_mode, per_page|
|
75
75
|
iteration = 0
|
76
76
|
done = 0
|
77
77
|
iterations = (data.length.to_f / per_page).ceil
|
@@ -79,7 +79,7 @@ module Eco
|
|
79
79
|
start_time = Time.now
|
80
80
|
|
81
81
|
data.each_slice(per_page) do |slice|
|
82
|
-
iteration
|
82
|
+
iteration += 1
|
83
83
|
|
84
84
|
msg = "starting batch '#{method}' iteration #{iteration}/#{iterations}, "
|
85
85
|
msg << "with #{slice.length} entries of #{data.length} -- #{done} done"
|
@@ -89,7 +89,7 @@ module Eco
|
|
89
89
|
start_slice = Time.now
|
90
90
|
|
91
91
|
offer_retry_on(*RETRY_ON, retries_left: TIMEOUT_RETRIES) do
|
92
|
-
people_api.batch(job_mode:
|
92
|
+
people_api.batch(job_mode: as_job_mode) do |batch|
|
93
93
|
slice.each do |person|
|
94
94
|
batch.public_send(method, person) do |response|
|
95
95
|
faltal("Request with no response") unless response
|
data/lib/eco/api/session.rb
CHANGED
@@ -132,7 +132,9 @@ module Eco
|
|
132
132
|
# If `schema` is `nil` or not provided it uses the currently associated to the `session`
|
133
133
|
def entry_factory(schema: nil)
|
134
134
|
schema = to_schema(schema) || self.schema
|
135
|
+
|
135
136
|
return @entry_factories[schema&.id] if @entry_factories.key?(schema&.id)
|
137
|
+
|
136
138
|
unless @entry_factories.empty?
|
137
139
|
@entry_factories[schema&.id] = @entry_factories.values.first.newFactory(schema: schema)
|
138
140
|
return @entry_factories[schema&.id]
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::GroupCsv
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
str_desc = 'Groups the csv rows by a pivot field. '
|
4
|
+
str_desc << 'It assumes the sorting field is sorted '
|
5
|
+
str_desc << '(same values should be consecutive)'
|
6
|
+
|
7
|
+
desc str_desc
|
8
|
+
|
9
|
+
callback do |_session, options, _usecase|
|
10
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
11
|
+
options.deep_merge!(input: {file: {name: file}})
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
add_option("-start-at", "Get only the last N-start_at rows") do |options|
|
16
|
+
count = SCR.get_arg("-start-at", with_param: true)
|
17
|
+
options.deep_merge!(input: {file: {start_at: count}})
|
18
|
+
end
|
19
|
+
|
20
|
+
add_option('-by', 'The column that should be used to group') do |options|
|
21
|
+
if (file = SCR.get_arg("-by", with_param: true))
|
22
|
+
options.deep_merge!(input: {group_by_field: file})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::JsonToCsv
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
desc "Transforms an input JSON file into a CSV one."
|
4
|
+
|
5
|
+
callback do |_sess, options, _case|
|
6
|
+
file = SCR.get_file(cli_name, required: true, should_exist: true)
|
7
|
+
options.deep_merge!(source: {file: file})
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::SortCsv
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
desc 'Sorts the CSV by column -by'
|
4
|
+
|
5
|
+
callback do |_session, options, _usecase|
|
6
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
7
|
+
options.deep_merge!(input: {file: file})
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
add_option('-by', 'The column that should be used to sorting') do |options|
|
12
|
+
if (file = SCR.get_arg("-by", with_param: true))
|
13
|
+
options.deep_merge!(input: {sort_by: file})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::SplitJson
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
3
|
+
desc 'Splits a json input file into multiple files'
|
4
|
+
|
5
|
+
callback do |_sess, options, _case|
|
6
|
+
file = SCR.get_file(cli_name, required: true, should_exist: true)
|
7
|
+
options.deep_merge!(source: {file: file})
|
8
|
+
end
|
9
|
+
|
10
|
+
add_option("-max-items", "The max count of items of the output files") do |options|
|
11
|
+
count = SCR.get_arg("-max-items", with_param: true)
|
12
|
+
options.deep_merge!(output: {file: {max_items: count}})
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
# This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
|
2
|
+
# @note you might run first the `sort-csv` case.
|
3
|
+
# @note you must inherit from this case and define the constants.
|
4
|
+
#
|
5
|
+
# GROUP_BY_FIELD = 'target_csv_field'.freeze
|
6
|
+
# GROUPED_FIELDS = [
|
7
|
+
# 'joined_field_1',
|
8
|
+
# 'joined_field_2',
|
9
|
+
# 'joined_field_3',
|
10
|
+
# ].freeze
|
11
|
+
#
|
12
|
+
class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
13
|
+
name 'group-csv'
|
14
|
+
type :other
|
15
|
+
|
16
|
+
require_relative 'cli/group_csv_cli'
|
17
|
+
|
18
|
+
def main(*_args)
|
19
|
+
if simulate?
|
20
|
+
count = Eco::CSV.count(input_file)
|
21
|
+
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
22
|
+
else
|
23
|
+
generate_file
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def generate_file # rubocop:disable Metrics/AbcSize
|
30
|
+
row_count = 0
|
31
|
+
in_index = nil
|
32
|
+
|
33
|
+
CSV.open(output_filename, 'wb') do |out_csv|
|
34
|
+
first = true
|
35
|
+
|
36
|
+
puts "\n"
|
37
|
+
|
38
|
+
streamed_input.for_each(start_at_idx: start_at) do |row, idx|
|
39
|
+
if first
|
40
|
+
first = false
|
41
|
+
headers!(row)
|
42
|
+
out_csv << headers
|
43
|
+
require_group_by_field!(row, file: input_file)
|
44
|
+
end
|
45
|
+
|
46
|
+
in_index = idx
|
47
|
+
next unless !block_given? || yield(row, idx)
|
48
|
+
|
49
|
+
next unless pivotable?(row, idx)
|
50
|
+
next unless (last_group = pivot_row(row))
|
51
|
+
|
52
|
+
row_count += 1
|
53
|
+
|
54
|
+
if (row_count % 500).zero?
|
55
|
+
print "... Done #{row_count} rows \r"
|
56
|
+
$stdout.flush
|
57
|
+
end
|
58
|
+
|
59
|
+
out_csv << last_group.values_at(*headers)
|
60
|
+
end
|
61
|
+
|
62
|
+
# finalize
|
63
|
+
if (lrow = pivot_row)
|
64
|
+
row_count += 1
|
65
|
+
out_csv << lrow.values_at(*headers)
|
66
|
+
end
|
67
|
+
ensure
|
68
|
+
msg = "Generated file '#{output_filename}' "
|
69
|
+
msg << "with #{row_count} rows (out of #{in_index})."
|
70
|
+
|
71
|
+
log(:info) { msg } unless simulate?
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# It tracks the current grouped row
|
76
|
+
# @return [Nil, Hash] the last grouped row when `row` doesn't belong
|
77
|
+
# or `nil` otherwise
|
78
|
+
def pivot_row(row = nil)
|
79
|
+
@group ||= {}
|
80
|
+
return @group unless row
|
81
|
+
|
82
|
+
pivot_value = row[group_by_field]
|
83
|
+
|
84
|
+
unless (last_pivot = @group[group_by_field])
|
85
|
+
last_pivot = @group[group_by_field] = pivot_value
|
86
|
+
end
|
87
|
+
|
88
|
+
last = @group
|
89
|
+
@group = {group_by_field => pivot_value} unless pivot_value == last_pivot
|
90
|
+
|
91
|
+
headers_rest.each do |field|
|
92
|
+
curr_values = row[field].to_s.split('|').compact.uniq
|
93
|
+
pivot_values = @group[field].to_s.split('|').compact.uniq
|
94
|
+
@group[field] = (pivot_values | curr_values).join('|')
|
95
|
+
end
|
96
|
+
|
97
|
+
last unless last == @group
|
98
|
+
end
|
99
|
+
|
100
|
+
attr_reader :group
|
101
|
+
attr_reader :headers, :headers_rest
|
102
|
+
|
103
|
+
def headers!(row)
|
104
|
+
return if headers?
|
105
|
+
|
106
|
+
@headers_rest = grouped_fields & row.headers
|
107
|
+
@headers_rest -= [group_by_field]
|
108
|
+
@headers = [group_by_field, *headers_rest]
|
109
|
+
end
|
110
|
+
|
111
|
+
def headers?
|
112
|
+
instance_variable_defined?(:@headers)
|
113
|
+
end
|
114
|
+
|
115
|
+
def pivotable?(row, idx)
|
116
|
+
return true unless row[group_by_field].to_s.strip.empty?
|
117
|
+
|
118
|
+
msg = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
|
119
|
+
msg << ". Skipping (discared) ..."
|
120
|
+
log(:warn) { msg }
|
121
|
+
false
|
122
|
+
end
|
123
|
+
|
124
|
+
def streamed_input
|
125
|
+
@streamed_input ||= Eco::CSV::Stream.new(input_file)
|
126
|
+
end
|
127
|
+
|
128
|
+
def input_file
|
129
|
+
options.dig(:input, :file, :name)
|
130
|
+
end
|
131
|
+
|
132
|
+
def start_at
|
133
|
+
return nil unless (num = options.dig(:input, :file, :start_at))
|
134
|
+
|
135
|
+
num = num.to_i
|
136
|
+
num = nil if num.zero?
|
137
|
+
num
|
138
|
+
end
|
139
|
+
|
140
|
+
def output_filename
|
141
|
+
return nil unless input_name
|
142
|
+
|
143
|
+
File.join(input_dir, "#{input_name}_grouped#{input_ext}")
|
144
|
+
end
|
145
|
+
|
146
|
+
def input_name
|
147
|
+
@input_name ||= File.basename(input_basename, input_ext)
|
148
|
+
end
|
149
|
+
|
150
|
+
def input_ext
|
151
|
+
@input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
|
152
|
+
".#{name}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def input_basename
|
157
|
+
@input_basename ||= File.basename(input_full_filename)
|
158
|
+
end
|
159
|
+
|
160
|
+
def input_dir
|
161
|
+
@input_dir = File.dirname(input_full_filename)
|
162
|
+
end
|
163
|
+
|
164
|
+
def input_full_filename
|
165
|
+
@input_full_filename ||= File.expand_path(input_file)
|
166
|
+
end
|
167
|
+
|
168
|
+
def require_group_by_field!(row, file:)
|
169
|
+
return true if row.key?(group_by_field)
|
170
|
+
|
171
|
+
msg = "Pivot field '#{group_by_field}' missing in header of file '#{file}'"
|
172
|
+
log(:error) { msg }
|
173
|
+
raise msg
|
174
|
+
end
|
175
|
+
|
176
|
+
def group_by_field
|
177
|
+
return @group_by_field if instance_variable_defined?(:@group_by_field)
|
178
|
+
|
179
|
+
return (@group_by_field = opts_group_by) if opts_group_by
|
180
|
+
|
181
|
+
unless self.class.const_defined?(:GROUP_BY_FIELD)
|
182
|
+
msg = "(#{self.class}) You must define GROUP_BY_FIELD constant"
|
183
|
+
log(:error) { msg }
|
184
|
+
raise msg
|
185
|
+
end
|
186
|
+
|
187
|
+
@group_by_field = self.class::GROUP_BY_FIELD
|
188
|
+
end
|
189
|
+
|
190
|
+
def grouped_fields
|
191
|
+
return @grouped_fields if instance_variable_defined?(:@grouped_fields)
|
192
|
+
|
193
|
+
unless self.class.const_defined?(:GROUPED_FIELDS)
|
194
|
+
msg = "(#{self.class}) You must define GROUPED_FIELDS constant"
|
195
|
+
log(:error) { msg }
|
196
|
+
raise msg
|
197
|
+
end
|
198
|
+
|
199
|
+
@grouped_fields ||= [self.class::GROUPED_FIELDS].flatten.compact.tap do |flds|
|
200
|
+
next unless flds.empty?
|
201
|
+
|
202
|
+
log(:warn) {
|
203
|
+
msg = "There were no fields to be grouped/joined. "
|
204
|
+
msg << "This is equivalent to launch a unique operation."
|
205
|
+
msg
|
206
|
+
}
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def opts_group_by
|
211
|
+
options.dig(:input, :group_by_field)
|
212
|
+
end
|
213
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::JsonToCsv < Eco::API::Common::Loaders::UseCase
|
2
|
+
require_relative 'cli/json_to_csv_cli'
|
3
|
+
|
4
|
+
name 'json-to-csv'
|
5
|
+
type :other
|
6
|
+
|
7
|
+
def main(*_args)
|
8
|
+
return if simulate?
|
9
|
+
|
10
|
+
CSV.open(out_filename, 'w') do |csv|
|
11
|
+
csv << all_keys
|
12
|
+
data.each do |item|
|
13
|
+
csv << item.values_at(*all_keys)
|
14
|
+
end
|
15
|
+
ensure
|
16
|
+
log(:info) {
|
17
|
+
"Generated output file: '#{File.expand_path(out_filename)}'."
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def all_keys
|
25
|
+
@all_keys ||= data.each_with_object([]) do |item, head|
|
26
|
+
head.concat(item.keys - head)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def data
|
31
|
+
@data ||= parse_json_file.tap do |dt|
|
32
|
+
ensure_array!(dt)
|
33
|
+
|
34
|
+
log(:info) {
|
35
|
+
"Loaded #{dt.count} items (from file '#{File.basename(input_file)}')"
|
36
|
+
}
|
37
|
+
|
38
|
+
exit 0 if simulate?
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def out_filename
|
43
|
+
@out_filename ||= ''.then do
|
44
|
+
input_basename = File.basename(input_file)
|
45
|
+
base_name = File.basename(input_basename, '.json')
|
46
|
+
"#{base_name}.csv"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def input_file
|
51
|
+
options.dig(:source, :file)
|
52
|
+
end
|
53
|
+
|
54
|
+
def ensure_array!(data)
|
55
|
+
return if data.is_a?(Array)
|
56
|
+
|
57
|
+
msg = "Expecting JSON file to contain an Array. Given: #{data.class}"
|
58
|
+
log(:error) { msg }
|
59
|
+
raise msg
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_json_file(filename = input_file)
|
63
|
+
fd = File.open(filename)
|
64
|
+
JSON.load fd # rubocop:disable Security/JSONLoad
|
65
|
+
rescue JSON::ParserError => err
|
66
|
+
log(:error) { "Parsing error on file '#{filename}'" }
|
67
|
+
raise err
|
68
|
+
ensure
|
69
|
+
fd&.close
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::SortCsv < Eco::API::Custom::UseCase
|
2
|
+
name 'sort-csv'
|
3
|
+
type :other
|
4
|
+
|
5
|
+
require_relative 'cli/sort_csv_cli'
|
6
|
+
|
7
|
+
def main(*_args)
|
8
|
+
if simulate?
|
9
|
+
count = Eco::CSV.count(input_file)
|
10
|
+
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
11
|
+
else
|
12
|
+
group_input_rows
|
13
|
+
generate_file
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :headers, :headers_rest
|
20
|
+
|
21
|
+
def group_input_rows
|
22
|
+
idx = 0
|
23
|
+
first = true
|
24
|
+
|
25
|
+
Eco::CSV.foreach(input_file, headers: true, skip_blanks: true) do |row|
|
26
|
+
idx += 1
|
27
|
+
|
28
|
+
if first
|
29
|
+
first = false
|
30
|
+
@output_headers = row.headers
|
31
|
+
require_sort_field!(row, file: input_file)
|
32
|
+
end
|
33
|
+
|
34
|
+
pivot_value = row[sort_field]
|
35
|
+
(row_groups[pivot_value] ||= []) << row
|
36
|
+
|
37
|
+
if (idx % 500).zero?
|
38
|
+
print "... Tracked #{idx} rows \r"
|
39
|
+
$stdout.flush
|
40
|
+
end
|
41
|
+
end
|
42
|
+
ensure
|
43
|
+
log(:info) { "Tracked #{idx} rows"}
|
44
|
+
end
|
45
|
+
|
46
|
+
def generate_file
|
47
|
+
idx = 0
|
48
|
+
|
49
|
+
CSV.open(output_filename, 'wb') do |csv|
|
50
|
+
csv << @output_headers
|
51
|
+
|
52
|
+
row_groups.keys.sort.each do |key|
|
53
|
+
row_groups[key].each do |row|
|
54
|
+
csv << row.values_at(*@output_headers)
|
55
|
+
|
56
|
+
idx += 1
|
57
|
+
if (idx % 500).zero?
|
58
|
+
print "... Sorted #{idx} rows \r"
|
59
|
+
$stdout.flush
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
ensure
|
65
|
+
msg = "Generated file '#{output_filename}' with #{idx} rows."
|
66
|
+
log(:info) { msg } unless simulate?
|
67
|
+
end
|
68
|
+
|
69
|
+
def row_groups
|
70
|
+
@row_groups ||= {}
|
71
|
+
end
|
72
|
+
|
73
|
+
def output_filename
|
74
|
+
return nil unless input_name
|
75
|
+
|
76
|
+
File.join(input_dir, "#{input_name}_sorted#{input_ext}")
|
77
|
+
end
|
78
|
+
|
79
|
+
def input_name
|
80
|
+
@input_name ||= File.basename(input_basename, input_ext)
|
81
|
+
end
|
82
|
+
|
83
|
+
def input_ext
|
84
|
+
@input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
|
85
|
+
".#{name}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def input_basename
|
90
|
+
@input_basename ||= File.basename(input_full_filename)
|
91
|
+
end
|
92
|
+
|
93
|
+
def input_dir
|
94
|
+
@input_dir = File.dirname(input_full_filename)
|
95
|
+
end
|
96
|
+
|
97
|
+
def input_full_filename
|
98
|
+
@input_full_filename ||= File.expand_path(input_file)
|
99
|
+
end
|
100
|
+
|
101
|
+
def input_file
|
102
|
+
options.dig(:input, :file)
|
103
|
+
end
|
104
|
+
|
105
|
+
def require_sort_field!(row, file:)
|
106
|
+
return true if row.key?(sort_field)
|
107
|
+
|
108
|
+
msg = "Sort field '#{sort_field}' missing in header of file '#{file}'"
|
109
|
+
log(:error) { msg }
|
110
|
+
raise msg
|
111
|
+
end
|
112
|
+
|
113
|
+
def sort_field
|
114
|
+
@sort_field ||= opts_sort_by.tap do |pivot|
|
115
|
+
next if pivot
|
116
|
+
|
117
|
+
msg = "The pivot field should be specified with -by option"
|
118
|
+
|
119
|
+
log(:error) { msg }
|
120
|
+
raise msg
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def opts_sort_by
|
125
|
+
options.dig(:input, :sort_by)
|
126
|
+
end
|
127
|
+
end
|