eco-helpers 3.2.12 → 3.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -1
- data/lib/eco/api/usecases/default/utils/add_page_id_case.rb +273 -0
- data/lib/eco/api/usecases/default/utils/cli/add_page_id_cli.rb +29 -0
- data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb +5 -0
- data/lib/eco/api/usecases/default/utils/cli/merge_csv_cli.rb +27 -0
- data/lib/eco/api/usecases/default/utils/cli/track_files_cli.rb +16 -0
- data/lib/eco/api/usecases/default/utils/group_csv_case/file_handler.rb +62 -0
- data/lib/eco/api/usecases/default/utils/group_csv_case.rb +79 -33
- data/lib/eco/api/usecases/default/utils/merge_csv_case.rb +313 -0
- data/lib/eco/api/usecases/default/utils/split_csv_case.rb +6 -1
- data/lib/eco/api/usecases/default/utils/track_files_case.rb +179 -0
- data/lib/eco/api/usecases/default/utils.rb +3 -0
- data/lib/eco/api/usecases/graphql/helpers/location/command/result.rb +2 -2
- data/lib/eco/api/usecases/graphql/helpers/location/command/results.rb +2 -1
- data/lib/eco/api/usecases/graphql/helpers/location/tags_remap/tags_map.rb +5 -1
- data/lib/eco/api/usecases/graphql/helpers/location/tags_remap/tags_set.rb +6 -3
- data/lib/eco/api/usecases/graphql/helpers/location/tags_remap.rb +3 -2
- data/lib/eco/api/usecases/graphql/samples/location/command/dsl.rb +16 -6
- data/lib/eco/api/usecases/graphql/samples/location/command/service/tree_update.rb +2 -1
- data/lib/eco/csv/split.rb +47 -19
- data/lib/eco/csv/stream.rb +51 -1
- data/lib/eco/csv.rb +6 -3
- data/lib/eco/version.rb +1 -1
- metadata +10 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d67a16095de2e32c2c627214b0254df6d2685e0591ab6295082736e52494c4d3
|
|
4
|
+
data.tar.gz: 60835a688189d8feda9bdc6198bbdb0cfaa9e9f95e5c7521f36cedbec706c1b0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0c1ded6a88ad0c6394e96cb511fddb5c5ac29635307affc1577d5eeb210f01ad8dd78edf78e449b9bca765a8754aa31083abb72beb60746ed21741e523878e6c
|
|
7
|
+
data.tar.gz: a18f9c81c2430ba8251bdfc34e6e4e1d3da0fd3cbe4647226942469d8da1f71e00aa7e21e3162d9d89d492f98e800642c0132edc30a305515df67764c397de91
|
data/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [3.2.
|
|
5
|
+
## [3.2.15] - 2026-05-xx
|
|
6
6
|
|
|
7
7
|
### Added
|
|
8
8
|
|
|
@@ -10,6 +10,34 @@ All notable changes to this project will be documented in this file.
|
|
|
10
10
|
|
|
11
11
|
### Fixed
|
|
12
12
|
|
|
13
|
+
## [3.2.14] - 2026-05-22
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- `track-files` case
|
|
18
|
+
- `add-page-id` case
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- **improvement**: added `-format` argument to `-group-csv` to output a `jsonl` **custom** file.
|
|
23
|
+
|
|
24
|
+
## [3.2.13] - 2026-04-15
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
|
|
28
|
+
- `-split-csv` case
|
|
29
|
+
- Allow custom split criteria via `splitter` named argument.
|
|
30
|
+
- `-merge-csv` case
|
|
31
|
+
|
|
32
|
+
### Changed
|
|
33
|
+
|
|
34
|
+
- improved `Stream` with methods `eof?` and `shift`
|
|
35
|
+
|
|
36
|
+
### Fixed
|
|
37
|
+
|
|
38
|
+
- Locations remap on RS update
|
|
39
|
+
- `-group-csv`: correct rows count
|
|
40
|
+
|
|
13
41
|
## [3.2.12] - 2026-01-19
|
|
14
42
|
|
|
15
43
|
### Added
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
# @note you might add a `filter` method
|
|
2
|
+
#
|
|
3
|
+
# def filter
|
|
4
|
+
# @filter ||= proc do |row, _r_idx|
|
|
5
|
+
# next true
|
|
6
|
+
# next true unless (ref_id = row[pivot_column(row)])
|
|
7
|
+
# next false if excluded_ref_id?(ref_id)
|
|
8
|
+
#
|
|
9
|
+
# true
|
|
10
|
+
# end
|
|
11
|
+
# end
|
|
12
|
+
#
|
|
13
|
+
class Eco::API::UseCases::Default::Utils::AddPageId < Eco::API::Custom::UseCase
|
|
14
|
+
name 'add-page-id'
|
|
15
|
+
type :other
|
|
16
|
+
|
|
17
|
+
require_relative 'cli/add_page_id_cli'
|
|
18
|
+
|
|
19
|
+
PIVOT_FIELD = [
|
|
20
|
+
'ref_id'
|
|
21
|
+
].freeze
|
|
22
|
+
|
|
23
|
+
PAGE_ID = 'page_id'.freeze
|
|
24
|
+
EXCLUDED_REF_IDS = %w[].freeze
|
|
25
|
+
|
|
26
|
+
def main(*_args)
|
|
27
|
+
if simulate?
|
|
28
|
+
count = Eco::CSV.count(input_file)
|
|
29
|
+
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
|
30
|
+
else
|
|
31
|
+
generate_file(&filter)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
attr_reader :headers, :headers_rest
|
|
38
|
+
|
|
39
|
+
def filter
|
|
40
|
+
nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def excluded_ref_id?(ref_id)
|
|
44
|
+
self.class::EXCLUDED_REF_IDS.include?(ref_id)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def generate_file # rubocop:disable Metrics/AbcSize
|
|
48
|
+
idx = -1
|
|
49
|
+
row_count = 0
|
|
50
|
+
headers_added = false
|
|
51
|
+
|
|
52
|
+
CSV.open(output_filename, 'wb') do |csv|
|
|
53
|
+
puts "\n"
|
|
54
|
+
|
|
55
|
+
Eco::CSV.foreach(input_file, headers: true, skip_blanks: true) do |row|
|
|
56
|
+
idx += 1
|
|
57
|
+
|
|
58
|
+
next unless !block_given? || yield(row, idx)
|
|
59
|
+
|
|
60
|
+
unless headers_added
|
|
61
|
+
headers!(row)
|
|
62
|
+
require_pivot_field!(row, file: input_file)
|
|
63
|
+
|
|
64
|
+
csv << headers
|
|
65
|
+
headers_added = true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
unless (pivot_value = row[pivot_field])
|
|
69
|
+
msg = "Row #{idx} doesn't have value for pivot field '#{pivot_field}'"
|
|
70
|
+
msg << ". Skipping (discarded) ..."
|
|
71
|
+
log(:warn) { msg }
|
|
72
|
+
next
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
unless (page_id = input_maps[pivot_value])
|
|
76
|
+
warn_unknown_mapping_reference!(pivot_value)
|
|
77
|
+
next
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
row_count += 1
|
|
81
|
+
|
|
82
|
+
if (row_count % 500).zero?
|
|
83
|
+
print "... Mapped #{row_count} rows \r"
|
|
84
|
+
$stdout.flush
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
values = [page_id, pivot_value]
|
|
88
|
+
oth_values = row.values_at(*headers_rest)
|
|
89
|
+
values.concat(oth_values) unless headers_rest.empty?
|
|
90
|
+
|
|
91
|
+
csv << values
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
ensure
|
|
95
|
+
msg = "Generated file '#{output_filename}' with #{row_count} rows (out of #{idx})."
|
|
96
|
+
log(:info) { msg } unless simulate?
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def warn_unknown_mapping_reference!(ref_id)
|
|
100
|
+
return if unknown.include?(ref_id)
|
|
101
|
+
|
|
102
|
+
unknown << ref_id
|
|
103
|
+
msg = "Could not map '#{pivot_field}' '#{ref_id}' to a '#{page_id_field}'"
|
|
104
|
+
msg << ". Skipping (discarded) ..."
|
|
105
|
+
|
|
106
|
+
log(:warn) { msg }
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def unknown
|
|
110
|
+
@unknown ||= []
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def headers!(row)
|
|
114
|
+
return if instance_variable_defined?(:@headers)
|
|
115
|
+
|
|
116
|
+
@headers_rest = row.headers - base_out_header(row)
|
|
117
|
+
@headers = [*base_out_header, *headers_rest]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def base_out_header(row = nil)
|
|
121
|
+
@base_out_header ||= [page_id_field, pivot_field(row)] # space: :output
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def input_maps
|
|
125
|
+
return @input_maps if instance_variable_defined?(:@input_maps)
|
|
126
|
+
|
|
127
|
+
@input_maps = {}
|
|
128
|
+
idx = 0
|
|
129
|
+
|
|
130
|
+
Eco::CSV.foreach(input_maps_file, headers: true) do |row|
|
|
131
|
+
idx += 1
|
|
132
|
+
|
|
133
|
+
if (idx % 500).zero?
|
|
134
|
+
print "... Creating mappings table (#{idx} done) \r"
|
|
135
|
+
$stdout.flush
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
require_pivot_field!(row, space: :maps, file: input_maps_file)
|
|
139
|
+
require_page_id_field!(row, file: input_maps_file)
|
|
140
|
+
|
|
141
|
+
ref_id = row[pivot_field(space: :maps)]
|
|
142
|
+
page_id = row[page_id_field(space: :maps)]
|
|
143
|
+
|
|
144
|
+
@input_maps[ref_id] = page_id
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
@input_maps
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def input_maps_file
|
|
151
|
+
options.dig(:input, :maps).tap do |file|
|
|
152
|
+
next if file && File.exist?(file)
|
|
153
|
+
|
|
154
|
+
log(:error) {
|
|
155
|
+
msg = "You must specify an existing maps file with the option '-maps-file'"
|
|
156
|
+
msg << ".\n * File: '#{file}' does not exist" unless file.nil?
|
|
157
|
+
msg
|
|
158
|
+
}
|
|
159
|
+
exit 1
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def output_filename
|
|
164
|
+
return nil unless input_name
|
|
165
|
+
|
|
166
|
+
File.join(
|
|
167
|
+
input_dir,
|
|
168
|
+
"#{input_name}_mapped#{input_ext}"
|
|
169
|
+
)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def input_name
|
|
173
|
+
@input_name ||= File.basename(
|
|
174
|
+
input_basename,
|
|
175
|
+
input_ext
|
|
176
|
+
)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def input_ext
|
|
180
|
+
@input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
|
|
181
|
+
".#{name}"
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def input_basename
|
|
186
|
+
@input_basename ||= File.basename(input_full_filename)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def input_dir
|
|
190
|
+
@input_dir = File.dirname(input_full_filename)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def input_full_filename
|
|
194
|
+
@input_full_filename ||= File.expand_path(input_file)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def input_file
|
|
198
|
+
options.dig(:input, :file)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def require_pivot_field!(row, file:, space: :input)
|
|
202
|
+
return true if row.key?(pivot_field(row, space: space))
|
|
203
|
+
|
|
204
|
+
msg = "Pivot field '#{pivot_field}' missing in header of file '#{file}'"
|
|
205
|
+
log(:error) { msg }
|
|
206
|
+
raise msg
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def require_page_id_field!(row, file:)
|
|
210
|
+
return true if row.key?(page_id_field(space: :maps))
|
|
211
|
+
|
|
212
|
+
msg = "Page ID field '#{page_id_field(space: :maps)}' missing in header of file '#{file}'"
|
|
213
|
+
log(:error) { msg }
|
|
214
|
+
raise msg
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def pivot_field(row = nil, space: :input)
|
|
218
|
+
@pivot_field ||= {}
|
|
219
|
+
return @pivot_field[space] if @pivot_field.key?(space)
|
|
220
|
+
|
|
221
|
+
@pivot_field[space] ||= pivot_fields(space: space).select do |name|
|
|
222
|
+
row.key?(name)
|
|
223
|
+
end.then do |sel|
|
|
224
|
+
next sel.first if sel.one?
|
|
225
|
+
|
|
226
|
+
msg = "Could not find any column named: #{pivot_fields.join(', ')}"
|
|
227
|
+
msg = "Multiple pivot columns: #{sel.join(', ')}" if sel.any?
|
|
228
|
+
|
|
229
|
+
log(:error) { msg }
|
|
230
|
+
raise msg
|
|
231
|
+
end.tap do |col|
|
|
232
|
+
log(:info) { "Using header '#{col}' as pivot column." }
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def pivot_fields(space: :input)
|
|
237
|
+
@pivot_fields ||= {}
|
|
238
|
+
return @pivot_fields[space] if @pivot_fields.key?(space)
|
|
239
|
+
|
|
240
|
+
return (@pivot_fields[space] = [opts_pivot]) if opts_pivot && space == :input
|
|
241
|
+
|
|
242
|
+
unless self.class.const_defined?(:PIVOT_FIELD)
|
|
243
|
+
msg = "(#{self.class}) You must define PIVOT_FIELD constant"
|
|
244
|
+
log(:error) { msg }
|
|
245
|
+
raise msg
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
@pivot_fields[space] = self.class::PIVOT_FIELD.dup
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def page_id_field(space: :output)
|
|
252
|
+
@page_id_field = {}
|
|
253
|
+
return @page_id_field[space] if @page_id_field.key?(space)
|
|
254
|
+
|
|
255
|
+
return (@page_id_field[space] = opts_page_id) if opts_page_id && space == :output
|
|
256
|
+
|
|
257
|
+
unless self.class.const_defined?(:PAGE_ID)
|
|
258
|
+
msg = "(#{self.class}) You must define PAGE_ID field constant"
|
|
259
|
+
log(:error) { msg }
|
|
260
|
+
raise msg
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
@page_id_field[space] = self.class::PAGE_ID
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def opts_pivot
|
|
267
|
+
options.dig(:input, :pivot_field)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def opts_page_id
|
|
271
|
+
options.dig(:input, :page_id)
|
|
272
|
+
end
|
|
273
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::AddPageId
|
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
|
3
|
+
desc 'Adds the page_id column based on mappings onto -pivot'
|
|
4
|
+
|
|
5
|
+
callback do |_session, options, _usecase|
|
|
6
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
|
7
|
+
options.deep_merge!(input: {file: file})
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_option('-maps-file', 'Source file with he mappings') do |options|
|
|
12
|
+
if (file = SCR.get_file('-maps-file', required: true, should_exist: true))
|
|
13
|
+
options.deep_merge!(input: {maps: file})
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
add_option('-pivot', 'The column that should be used to pivot') do |options|
|
|
18
|
+
if (file = SCR.get_arg("-pivot", with_param: true))
|
|
19
|
+
options.deep_merge!(input: {pivot_field: file})
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
add_option('-page-id', 'The column that should be used to dump the id') do |options|
|
|
24
|
+
if (file = SCR.get_arg("-page-id", with_param: true))
|
|
25
|
+
options.deep_merge!(input: {page_id: file})
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -22,5 +22,10 @@ class Eco::API::UseCases::Default::Utils::GroupCsv
|
|
|
22
22
|
options.deep_merge!(input: {group_by_field: file})
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
|
+
|
|
26
|
+
add_option('-format', 'Kind of extract (csv - default | jsonl') do |options|
|
|
27
|
+
format = SCR.get_arg('-format', with_param: true)
|
|
28
|
+
options.deep_merge!(output: {format: format})
|
|
29
|
+
end
|
|
25
30
|
end
|
|
26
31
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::MergeCsv
|
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
|
3
|
+
str_desc = 'Merges the csv rows by a pivot field. '
|
|
4
|
+
str_desc << 'It assumes the pivot field is sorted '
|
|
5
|
+
str_desc << '(same values should be consecutive)'
|
|
6
|
+
|
|
7
|
+
desc str_desc
|
|
8
|
+
|
|
9
|
+
callback do |_session, options, _usecase|
|
|
10
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
|
11
|
+
options.deep_merge!(input: {file: {name: file}})
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
add_option('-merge', 'The CSV file that should be merged onto the original') do |options|
|
|
16
|
+
if (file = SCR.get_file('-merge', required: true, should_exist: true))
|
|
17
|
+
options.deep_merge!(input: {merge_file: {name: file}})
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
add_option('-by', 'The column that should be used to merge') do |options|
|
|
22
|
+
if (file = SCR.get_arg('-by', with_param: true))
|
|
23
|
+
options.deep_merge!(input: {merge_by_field: file})
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::TrackFiles
|
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
|
3
|
+
desc 'Tracks the files of a folder in a CSV'
|
|
4
|
+
|
|
5
|
+
callback do |_session, options, _usecase|
|
|
6
|
+
if (folder = SCR.get_file(cli_name, required: true))
|
|
7
|
+
options.deep_merge!(input: {folder: folder})
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_option("-s3-path", "Relative subpath from the S3 uploads folder.") do |options|
|
|
12
|
+
path = SCR.get_arg("-s3-path", with_param: true)
|
|
13
|
+
options.deep_merge!(output: {s3_path: path})
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::GroupCsv
|
|
2
|
+
class FileHandler
|
|
3
|
+
attr_reader :filename, :format
|
|
4
|
+
|
|
5
|
+
def initialize(filename, format: :csv)
|
|
6
|
+
@filename = filename
|
|
7
|
+
@format = format
|
|
8
|
+
|
|
9
|
+
open
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def <<(value)
|
|
13
|
+
msg = "File has been closed. Can't write to it: #{filename}"
|
|
14
|
+
raise msg unless file
|
|
15
|
+
|
|
16
|
+
case format
|
|
17
|
+
when :csv
|
|
18
|
+
file << value
|
|
19
|
+
when :jsonl
|
|
20
|
+
file.puts to_s(value)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def close
|
|
25
|
+
return if file.nil?
|
|
26
|
+
|
|
27
|
+
file.close.tap do
|
|
28
|
+
@file = nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
attr_reader :file
|
|
35
|
+
|
|
36
|
+
def to_s(value)
|
|
37
|
+
case value
|
|
38
|
+
when String
|
|
39
|
+
value.split("\n").first.tap do |line|
|
|
40
|
+
next if line == value
|
|
41
|
+
|
|
42
|
+
raise ArgumentError, "As string, value should be a single line. Given: #{value}"
|
|
43
|
+
end
|
|
44
|
+
when Hash
|
|
45
|
+
value.to_json
|
|
46
|
+
else
|
|
47
|
+
raise ArgumentError, "Unsupported type: #{value.class}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def open
|
|
52
|
+
case format
|
|
53
|
+
when :csv
|
|
54
|
+
@file = CSV.open(filename, 'wb')
|
|
55
|
+
when :jsonl
|
|
56
|
+
@file = File.open(filename, 'wb')
|
|
57
|
+
else
|
|
58
|
+
raise "Unknown output format: #{format}"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -1,36 +1,59 @@
|
|
|
1
1
|
# This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
|
|
2
2
|
# @note you might run first the `sort-csv` case.
|
|
3
|
+
# @note when using `jsonl` as an output `format`, it doesn't merge fields,
|
|
4
|
+
# but it groups them based on some criteria.
|
|
5
|
+
# - In this case you need to define a `json_builder` method that returns a hash.
|
|
3
6
|
# @note you must inherit from this case and define the constants.
|
|
4
7
|
#
|
|
5
|
-
# GROUP_BY_FIELD = 'target_csv_field'.freeze
|
|
8
|
+
# GROUP_BY_FIELD = 'target_csv_field'.freeze # if `-by` command option isn't used
|
|
6
9
|
# GROUPED_FIELDS = [
|
|
7
10
|
# 'joined_field_1',
|
|
8
11
|
# 'joined_field_2',
|
|
9
12
|
# 'joined_field_3',
|
|
10
13
|
# ].freeze
|
|
11
|
-
#
|
|
14
|
+
# @note that `GROUPED_FIELDS` isn't necessary if `jsonl` is used as an output `format`
|
|
12
15
|
class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
13
16
|
name 'group-csv'
|
|
14
17
|
type :other
|
|
15
18
|
|
|
16
19
|
require_relative 'cli/group_csv_cli'
|
|
20
|
+
require_relative 'group_csv_case/file_handler'
|
|
21
|
+
|
|
22
|
+
OUTPUT_FORMAT = :csv # :csv or :jsonl
|
|
17
23
|
|
|
18
24
|
def main(*_args)
|
|
19
25
|
if simulate?
|
|
20
26
|
count = Eco::CSV.count(input_file)
|
|
21
27
|
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
|
22
28
|
else
|
|
29
|
+
msg = "You should define a json_builder method when using jsonl as output format"
|
|
30
|
+
raise msg unless respond_to?(:json_builder, true) || output_format != :jsonl
|
|
31
|
+
|
|
23
32
|
generate_file
|
|
24
33
|
end
|
|
25
34
|
end
|
|
26
35
|
|
|
27
36
|
private
|
|
28
37
|
|
|
38
|
+
attr_reader :in_index
|
|
39
|
+
|
|
40
|
+
def with_output_file
|
|
41
|
+
handler = FileHandler.new(output_filename, format: output_format)
|
|
42
|
+
|
|
43
|
+
yield handler
|
|
44
|
+
ensure
|
|
45
|
+
handler&.close
|
|
46
|
+
|
|
47
|
+
msg = "Generated file '#{output_filename}' "
|
|
48
|
+
msg << "with #{row_count} rows (out of #{in_index + 1})."
|
|
49
|
+
|
|
50
|
+
log(:info) { msg } unless simulate?
|
|
51
|
+
end
|
|
52
|
+
|
|
29
53
|
def generate_file # rubocop:disable Metrics/AbcSize
|
|
30
|
-
|
|
31
|
-
in_index = nil
|
|
54
|
+
@in_index = nil
|
|
32
55
|
|
|
33
|
-
|
|
56
|
+
with_output_file do |f_handler|
|
|
34
57
|
first = true
|
|
35
58
|
|
|
36
59
|
puts "\n"
|
|
@@ -39,36 +62,37 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
39
62
|
if first
|
|
40
63
|
first = false
|
|
41
64
|
headers!(row)
|
|
42
|
-
|
|
65
|
+
f_handler << headers if output_format == :csv
|
|
43
66
|
require_group_by_field!(row, file: input_file)
|
|
44
67
|
end
|
|
45
68
|
|
|
46
|
-
in_index = idx
|
|
69
|
+
@in_index = idx
|
|
47
70
|
next unless !block_given? || yield(row, idx)
|
|
48
71
|
|
|
49
72
|
next unless pivotable?(row, idx)
|
|
50
73
|
next unless (last_group = pivot_row(row))
|
|
51
74
|
|
|
52
|
-
row_count
|
|
75
|
+
row_count!
|
|
53
76
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
77
|
+
case output_format
|
|
78
|
+
when :csv
|
|
79
|
+
f_handler << last_group.values_at(*headers)
|
|
80
|
+
when :jsonl
|
|
81
|
+
f_handler << json_builder(last_group)
|
|
57
82
|
end
|
|
58
|
-
|
|
59
|
-
out_csv << last_group.values_at(*headers)
|
|
60
83
|
end
|
|
61
84
|
|
|
62
85
|
# finalize
|
|
63
|
-
if (
|
|
64
|
-
row_count
|
|
65
|
-
|
|
86
|
+
if (l_row = pivot_row)
|
|
87
|
+
row_count!
|
|
88
|
+
|
|
89
|
+
case output_format
|
|
90
|
+
when :csv
|
|
91
|
+
f_handler << l_row.values_at(*headers)
|
|
92
|
+
when :jsonl
|
|
93
|
+
f_handler << json_builder(l_row)
|
|
94
|
+
end
|
|
66
95
|
end
|
|
67
|
-
ensure
|
|
68
|
-
msg = "Generated file '#{output_filename}' "
|
|
69
|
-
msg << "with #{row_count} rows (out of #{in_index})."
|
|
70
|
-
|
|
71
|
-
log(:info) { msg } unless simulate?
|
|
72
96
|
end
|
|
73
97
|
end
|
|
74
98
|
|
|
@@ -82,41 +106,59 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
82
106
|
pivot_value = row[group_by_field]
|
|
83
107
|
|
|
84
108
|
unless (last_pivot = @group[group_by_field])
|
|
109
|
+
# init
|
|
85
110
|
last_pivot = @group[group_by_field] = pivot_value
|
|
86
111
|
end
|
|
87
112
|
|
|
88
113
|
last = @group
|
|
89
114
|
@group = {group_by_field => pivot_value} unless pivot_value == last_pivot
|
|
90
115
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
116
|
+
case output_format
|
|
117
|
+
when :csv
|
|
118
|
+
headers_rest.each do |field|
|
|
119
|
+
curr_values = row[field].to_s.split('|').compact.uniq
|
|
120
|
+
group_values = @group[field].to_s.split('|').compact.uniq
|
|
121
|
+
@group[field] = (group_values | curr_values).join('|')
|
|
122
|
+
end
|
|
123
|
+
when :jsonl
|
|
124
|
+
@group['rows'] ||= []
|
|
125
|
+
@group['rows'] << row.to_h.slice(*headers_rest)
|
|
95
126
|
end
|
|
96
127
|
|
|
97
128
|
last unless last == @group
|
|
98
129
|
end
|
|
99
130
|
|
|
100
|
-
attr_reader :group
|
|
131
|
+
attr_reader :group, :row_count
|
|
101
132
|
attr_reader :headers, :headers_rest
|
|
102
133
|
|
|
103
134
|
def headers!(row)
|
|
104
135
|
return if headers?
|
|
105
136
|
|
|
106
|
-
@
|
|
107
|
-
@headers_rest
|
|
108
|
-
@
|
|
137
|
+
@grouped_fields = row.headers - [group_by_field] if output_format == :jsonl
|
|
138
|
+
@headers_rest = grouped_fields & row.headers
|
|
139
|
+
@headers_rest -= [group_by_field]
|
|
140
|
+
@headers = [group_by_field, *headers_rest]
|
|
109
141
|
end
|
|
110
142
|
|
|
111
143
|
def headers?
|
|
112
144
|
instance_variable_defined?(:@headers)
|
|
113
145
|
end
|
|
114
146
|
|
|
147
|
+
def row_count!
|
|
148
|
+
@row_count ||= 0
|
|
149
|
+
(@row_count += 1).tap do |cnt|
|
|
150
|
+
if (cnt % 500).zero?
|
|
151
|
+
print "... Done #{cnt} rows \r"
|
|
152
|
+
$stdout.flush
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
115
157
|
def pivotable?(row, idx)
|
|
116
158
|
return true unless row[group_by_field].to_s.strip.empty?
|
|
117
159
|
|
|
118
160
|
msg = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
|
|
119
|
-
msg << '. Skipping (
|
|
161
|
+
msg << '. Skipping (discarded) ...'
|
|
120
162
|
log(:warn) { msg }
|
|
121
163
|
false
|
|
122
164
|
end
|
|
@@ -130,17 +172,21 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
130
172
|
end
|
|
131
173
|
|
|
132
174
|
def start_at
|
|
133
|
-
return
|
|
175
|
+
return unless (num = options.dig(:input, :file, :start_at))
|
|
134
176
|
|
|
135
177
|
num = num.to_i
|
|
136
178
|
num = nil if num.zero?
|
|
137
179
|
num
|
|
138
180
|
end
|
|
139
181
|
|
|
182
|
+
def output_format
|
|
183
|
+
options.dig(:output, :format)&.to_sym || self.class::OUTPUT_FORMAT
|
|
184
|
+
end
|
|
185
|
+
|
|
140
186
|
def output_filename
|
|
141
|
-
return
|
|
187
|
+
return unless input_name
|
|
142
188
|
|
|
143
|
-
File.join(input_dir, "#{input_name}_grouped
|
|
189
|
+
File.join(input_dir, "#{input_name}_grouped.#{output_format}")
|
|
144
190
|
end
|
|
145
191
|
|
|
146
192
|
def input_name
|