eco-helpers 3.2.13 → 3.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/lib/eco/api/usecases/default/utils/add_page_id_case.rb +273 -0
- data/lib/eco/api/usecases/default/utils/cli/add_page_id_cli.rb +29 -0
- data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb +5 -0
- data/lib/eco/api/usecases/default/utils/cli/track_files_cli.rb +16 -0
- data/lib/eco/api/usecases/default/utils/group_csv_case/file_handler.rb +62 -0
- data/lib/eco/api/usecases/default/utils/group_csv_case.rb +64 -22
- data/lib/eco/api/usecases/default/utils/track_files_case.rb +179 -0
- data/lib/eco/api/usecases/default/utils.rb +2 -0
- data/lib/eco/version.rb +1 -1
- metadata +6 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d67a16095de2e32c2c627214b0254df6d2685e0591ab6295082736e52494c4d3
|
|
4
|
+
data.tar.gz: 60835a688189d8feda9bdc6198bbdb0cfaa9e9f95e5c7521f36cedbec706c1b0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0c1ded6a88ad0c6394e96cb511fddb5c5ac29635307affc1577d5eeb210f01ad8dd78edf78e449b9bca765a8754aa31083abb72beb60746ed21741e523878e6c
|
|
7
|
+
data.tar.gz: a18f9c81c2430ba8251bdfc34e6e4e1d3da0fd3cbe4647226942469d8da1f71e00aa7e21e3162d9d89d492f98e800642c0132edc30a305515df67764c397de91
|
data/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [3.2.
|
|
5
|
+
## [3.2.15] - 2026-05-xx
|
|
6
6
|
|
|
7
7
|
### Added
|
|
8
8
|
|
|
@@ -10,6 +10,17 @@ All notable changes to this project will be documented in this file.
|
|
|
10
10
|
|
|
11
11
|
### Fixed
|
|
12
12
|
|
|
13
|
+
## [3.2.14] - 2026-05-22
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- `track-files` case
|
|
18
|
+
- `add-page-id` case
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- **improvement**: added `-format` argument to `-group-csv` to output a `jsonl` **custom** file.
|
|
23
|
+
|
|
13
24
|
## [3.2.13] - 2026-04-15
|
|
14
25
|
|
|
15
26
|
### Added
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
# @note you might add a `filter` method
|
|
2
|
+
#
|
|
3
|
+
# def filter
|
|
4
|
+
# @filter ||= proc do |row, _r_idx|
|
|
5
|
+
# next true
|
|
6
|
+
# next true unless (ref_id = row[pivot_column(row)])
|
|
7
|
+
# next false if excluded_ref_id?(ref_id)
|
|
8
|
+
#
|
|
9
|
+
# true
|
|
10
|
+
# end
|
|
11
|
+
# end
|
|
12
|
+
#
|
|
13
|
+
class Eco::API::UseCases::Default::Utils::AddPageId < Eco::API::Custom::UseCase
|
|
14
|
+
name 'add-page-id'
|
|
15
|
+
type :other
|
|
16
|
+
|
|
17
|
+
require_relative 'cli/add_page_id_cli'
|
|
18
|
+
|
|
19
|
+
PIVOT_FIELD = [
|
|
20
|
+
'ref_id'
|
|
21
|
+
].freeze
|
|
22
|
+
|
|
23
|
+
PAGE_ID = 'page_id'.freeze
|
|
24
|
+
EXCLUDED_REF_IDS = %w[].freeze
|
|
25
|
+
|
|
26
|
+
def main(*_args)
|
|
27
|
+
if simulate?
|
|
28
|
+
count = Eco::CSV.count(input_file)
|
|
29
|
+
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
|
30
|
+
else
|
|
31
|
+
generate_file(&filter)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
attr_reader :headers, :headers_rest
|
|
38
|
+
|
|
39
|
+
def filter
|
|
40
|
+
nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def excluded_ref_id?(ref_id)
|
|
44
|
+
self.class::EXCLUDED_REF_IDS.include?(ref_id)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def generate_file # rubocop:disable Metrics/AbcSize
|
|
48
|
+
idx = -1
|
|
49
|
+
row_count = 0
|
|
50
|
+
headers_added = false
|
|
51
|
+
|
|
52
|
+
CSV.open(output_filename, 'wb') do |csv|
|
|
53
|
+
puts "\n"
|
|
54
|
+
|
|
55
|
+
Eco::CSV.foreach(input_file, headers: true, skip_blanks: true) do |row|
|
|
56
|
+
idx += 1
|
|
57
|
+
|
|
58
|
+
next unless !block_given? || yield(row, idx)
|
|
59
|
+
|
|
60
|
+
unless headers_added
|
|
61
|
+
headers!(row)
|
|
62
|
+
require_pivot_field!(row, file: input_file)
|
|
63
|
+
|
|
64
|
+
csv << headers
|
|
65
|
+
headers_added = true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
unless (pivot_value = row[pivot_field])
|
|
69
|
+
msg = "Row #{idx} doesn't have value for pivot field '#{pivot_field}'"
|
|
70
|
+
msg << ". Skipping (discarded) ..."
|
|
71
|
+
log(:warn) { msg }
|
|
72
|
+
next
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
unless (page_id = input_maps[pivot_value])
|
|
76
|
+
warn_unknown_mapping_reference!(pivot_value)
|
|
77
|
+
next
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
row_count += 1
|
|
81
|
+
|
|
82
|
+
if (row_count % 500).zero?
|
|
83
|
+
print "... Mapped #{row_count} rows \r"
|
|
84
|
+
$stdout.flush
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
values = [page_id, pivot_value]
|
|
88
|
+
oth_values = row.values_at(*headers_rest)
|
|
89
|
+
values.concat(oth_values) unless headers_rest.empty?
|
|
90
|
+
|
|
91
|
+
csv << values
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
ensure
|
|
95
|
+
msg = "Generated file '#{output_filename}' with #{row_count} rows (out of #{idx})."
|
|
96
|
+
log(:info) { msg } unless simulate?
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def warn_unknown_mapping_reference!(ref_id)
|
|
100
|
+
return if unknown.include?(ref_id)
|
|
101
|
+
|
|
102
|
+
unknown << ref_id
|
|
103
|
+
msg = "Could not map '#{pivot_field}' '#{ref_id}' to a '#{page_id_field}'"
|
|
104
|
+
msg << ". Skipping (discarded) ..."
|
|
105
|
+
|
|
106
|
+
log(:warn) { msg }
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def unknown
|
|
110
|
+
@unknown ||= []
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def headers!(row)
|
|
114
|
+
return if instance_variable_defined?(:@headers)
|
|
115
|
+
|
|
116
|
+
@headers_rest = row.headers - base_out_header(row)
|
|
117
|
+
@headers = [*base_out_header, *headers_rest]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def base_out_header(row = nil)
|
|
121
|
+
@base_out_header ||= [page_id_field, pivot_field(row)] # space: :output
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def input_maps
|
|
125
|
+
return @input_maps if instance_variable_defined?(:@input_maps)
|
|
126
|
+
|
|
127
|
+
@input_maps = {}
|
|
128
|
+
idx = 0
|
|
129
|
+
|
|
130
|
+
Eco::CSV.foreach(input_maps_file, headers: true) do |row|
|
|
131
|
+
idx += 1
|
|
132
|
+
|
|
133
|
+
if (idx % 500).zero?
|
|
134
|
+
print "... Creating mappings table (#{idx} done) \r"
|
|
135
|
+
$stdout.flush
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
require_pivot_field!(row, space: :maps, file: input_maps_file)
|
|
139
|
+
require_page_id_field!(row, file: input_maps_file)
|
|
140
|
+
|
|
141
|
+
ref_id = row[pivot_field(space: :maps)]
|
|
142
|
+
page_id = row[page_id_field(space: :maps)]
|
|
143
|
+
|
|
144
|
+
@input_maps[ref_id] = page_id
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
@input_maps
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def input_maps_file
|
|
151
|
+
options.dig(:input, :maps).tap do |file|
|
|
152
|
+
next if file && File.exist?(file)
|
|
153
|
+
|
|
154
|
+
log(:error) {
|
|
155
|
+
msg = "You must specify an existing maps file with the option '-maps-file'"
|
|
156
|
+
msg << ".\n * File: '#{file}' does not exist" unless file.nil?
|
|
157
|
+
msg
|
|
158
|
+
}
|
|
159
|
+
exit 1
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def output_filename
|
|
164
|
+
return nil unless input_name
|
|
165
|
+
|
|
166
|
+
File.join(
|
|
167
|
+
input_dir,
|
|
168
|
+
"#{input_name}_mapped#{input_ext}"
|
|
169
|
+
)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def input_name
|
|
173
|
+
@input_name ||= File.basename(
|
|
174
|
+
input_basename,
|
|
175
|
+
input_ext
|
|
176
|
+
)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def input_ext
|
|
180
|
+
@input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
|
|
181
|
+
".#{name}"
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def input_basename
|
|
186
|
+
@input_basename ||= File.basename(input_full_filename)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def input_dir
|
|
190
|
+
@input_dir = File.dirname(input_full_filename)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def input_full_filename
|
|
194
|
+
@input_full_filename ||= File.expand_path(input_file)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def input_file
|
|
198
|
+
options.dig(:input, :file)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def require_pivot_field!(row, file:, space: :input)
|
|
202
|
+
return true if row.key?(pivot_field(row, space: space))
|
|
203
|
+
|
|
204
|
+
msg = "Pivot field '#{pivot_field}' missing in header of file '#{file}'"
|
|
205
|
+
log(:error) { msg }
|
|
206
|
+
raise msg
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def require_page_id_field!(row, file:)
|
|
210
|
+
return true if row.key?(page_id_field(space: :maps))
|
|
211
|
+
|
|
212
|
+
msg = "Page ID field '#{page_id_field(space: :maps)}' missing in header of file '#{file}'"
|
|
213
|
+
log(:error) { msg }
|
|
214
|
+
raise msg
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def pivot_field(row = nil, space: :input)
|
|
218
|
+
@pivot_field ||= {}
|
|
219
|
+
return @pivot_field[space] if @pivot_field.key?(space)
|
|
220
|
+
|
|
221
|
+
@pivot_field[space] ||= pivot_fields(space: space).select do |name|
|
|
222
|
+
row.key?(name)
|
|
223
|
+
end.then do |sel|
|
|
224
|
+
next sel.first if sel.one?
|
|
225
|
+
|
|
226
|
+
msg = "Could not find any column named: #{pivot_fields.join(', ')}"
|
|
227
|
+
msg = "Multiple pivot columns: #{sel.join(', ')}" if sel.any?
|
|
228
|
+
|
|
229
|
+
log(:error) { msg }
|
|
230
|
+
raise msg
|
|
231
|
+
end.tap do |col|
|
|
232
|
+
log(:info) { "Using header '#{col}' as pivot column." }
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def pivot_fields(space: :input)
|
|
237
|
+
@pivot_fields ||= {}
|
|
238
|
+
return @pivot_fields[space] if @pivot_fields.key?(space)
|
|
239
|
+
|
|
240
|
+
return (@pivot_fields[space] = [opts_pivot]) if opts_pivot && space == :input
|
|
241
|
+
|
|
242
|
+
unless self.class.const_defined?(:PIVOT_FIELD)
|
|
243
|
+
msg = "(#{self.class}) You must define PIVOT_FIELD constant"
|
|
244
|
+
log(:error) { msg }
|
|
245
|
+
raise msg
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
@pivot_fields[space] = self.class::PIVOT_FIELD.dup
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def page_id_field(space: :output)
|
|
252
|
+
@page_id_field = {}
|
|
253
|
+
return @page_id_field[space] if @page_id_field.key?(space)
|
|
254
|
+
|
|
255
|
+
return (@page_id_field[space] = opts_page_id) if opts_page_id && space == :output
|
|
256
|
+
|
|
257
|
+
unless self.class.const_defined?(:PAGE_ID)
|
|
258
|
+
msg = "(#{self.class}) You must define PAGE_ID field constant"
|
|
259
|
+
log(:error) { msg }
|
|
260
|
+
raise msg
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
@page_id_field[space] = self.class::PAGE_ID
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def opts_pivot
|
|
267
|
+
options.dig(:input, :pivot_field)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def opts_page_id
|
|
271
|
+
options.dig(:input, :page_id)
|
|
272
|
+
end
|
|
273
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::AddPageId
|
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
|
3
|
+
desc 'Adds the page_id column based on mappings onto -pivot'
|
|
4
|
+
|
|
5
|
+
callback do |_session, options, _usecase|
|
|
6
|
+
if (file = SCR.get_file(cli_name, required: true, should_exist: true))
|
|
7
|
+
options.deep_merge!(input: {file: file})
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_option('-maps-file', 'Source file with he mappings') do |options|
|
|
12
|
+
if (file = SCR.get_file('-maps-file', required: true, should_exist: true))
|
|
13
|
+
options.deep_merge!(input: {maps: file})
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
add_option('-pivot', 'The column that should be used to pivot') do |options|
|
|
18
|
+
if (file = SCR.get_arg("-pivot", with_param: true))
|
|
19
|
+
options.deep_merge!(input: {pivot_field: file})
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
add_option('-page-id', 'The column that should be used to dump the id') do |options|
|
|
24
|
+
if (file = SCR.get_arg("-page-id", with_param: true))
|
|
25
|
+
options.deep_merge!(input: {page_id: file})
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -22,5 +22,10 @@ class Eco::API::UseCases::Default::Utils::GroupCsv
|
|
|
22
22
|
options.deep_merge!(input: {group_by_field: file})
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
|
+
|
|
26
|
+
add_option('-format', 'Kind of extract (csv - default | jsonl') do |options|
|
|
27
|
+
format = SCR.get_arg('-format', with_param: true)
|
|
28
|
+
options.deep_merge!(output: {format: format})
|
|
29
|
+
end
|
|
25
30
|
end
|
|
26
31
|
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::TrackFiles
|
|
2
|
+
class Cli < Eco::API::UseCases::Cli
|
|
3
|
+
desc 'Tracks the files of a folder in a CSV'
|
|
4
|
+
|
|
5
|
+
callback do |_session, options, _usecase|
|
|
6
|
+
if (folder = SCR.get_file(cli_name, required: true))
|
|
7
|
+
options.deep_merge!(input: {folder: folder})
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
add_option("-s3-path", "Relative subpath from the S3 uploads folder.") do |options|
|
|
12
|
+
path = SCR.get_arg("-s3-path", with_param: true)
|
|
13
|
+
options.deep_merge!(output: {s3_path: path})
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
class Eco::API::UseCases::Default::Utils::GroupCsv
|
|
2
|
+
class FileHandler
|
|
3
|
+
attr_reader :filename, :format
|
|
4
|
+
|
|
5
|
+
def initialize(filename, format: :csv)
|
|
6
|
+
@filename = filename
|
|
7
|
+
@format = format
|
|
8
|
+
|
|
9
|
+
open
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def <<(value)
|
|
13
|
+
msg = "File has been closed. Can't write to it: #{filename}"
|
|
14
|
+
raise msg unless file
|
|
15
|
+
|
|
16
|
+
case format
|
|
17
|
+
when :csv
|
|
18
|
+
file << value
|
|
19
|
+
when :jsonl
|
|
20
|
+
file.puts to_s(value)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def close
|
|
25
|
+
return if file.nil?
|
|
26
|
+
|
|
27
|
+
file.close.tap do
|
|
28
|
+
@file = nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
attr_reader :file
|
|
35
|
+
|
|
36
|
+
def to_s(value)
|
|
37
|
+
case value
|
|
38
|
+
when String
|
|
39
|
+
value.split("\n").first.tap do |line|
|
|
40
|
+
next if line == value
|
|
41
|
+
|
|
42
|
+
raise ArgumentError, "As string, value should be a single line. Given: #{value}"
|
|
43
|
+
end
|
|
44
|
+
when Hash
|
|
45
|
+
value.to_json
|
|
46
|
+
else
|
|
47
|
+
raise ArgumentError, "Unsupported type: #{value.class}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def open
|
|
52
|
+
case format
|
|
53
|
+
when :csv
|
|
54
|
+
@file = CSV.open(filename, 'wb')
|
|
55
|
+
when :jsonl
|
|
56
|
+
@file = File.open(filename, 'wb')
|
|
57
|
+
else
|
|
58
|
+
raise "Unknown output format: #{format}"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -1,35 +1,59 @@
|
|
|
1
1
|
# This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
|
|
2
2
|
# @note you might run first the `sort-csv` case.
|
|
3
|
+
# @note when using `jsonl` as an output `format`, it doesn't merge fields,
|
|
4
|
+
# but it groups them based on some criteria.
|
|
5
|
+
# - In this case you need to define a `json_builder` method that returns a hash.
|
|
3
6
|
# @note you must inherit from this case and define the constants.
|
|
4
7
|
#
|
|
5
|
-
# GROUP_BY_FIELD = 'target_csv_field'.freeze
|
|
8
|
+
# GROUP_BY_FIELD = 'target_csv_field'.freeze # if `-by` command option isn't used
|
|
6
9
|
# GROUPED_FIELDS = [
|
|
7
10
|
# 'joined_field_1',
|
|
8
11
|
# 'joined_field_2',
|
|
9
12
|
# 'joined_field_3',
|
|
10
13
|
# ].freeze
|
|
11
|
-
#
|
|
14
|
+
# @note that `GROUPED_FIELDS` isn't necessary if `jsonl` is used as an output `format`
|
|
12
15
|
class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
13
16
|
name 'group-csv'
|
|
14
17
|
type :other
|
|
15
18
|
|
|
16
19
|
require_relative 'cli/group_csv_cli'
|
|
20
|
+
require_relative 'group_csv_case/file_handler'
|
|
21
|
+
|
|
22
|
+
OUTPUT_FORMAT = :csv # :csv or :jsonl
|
|
17
23
|
|
|
18
24
|
def main(*_args)
|
|
19
25
|
if simulate?
|
|
20
26
|
count = Eco::CSV.count(input_file)
|
|
21
27
|
log(:info) { "CSV '#{input_file}' has #{count} rows." }
|
|
22
28
|
else
|
|
29
|
+
msg = "You should define a json_builder method when using jsonl as output format"
|
|
30
|
+
raise msg unless respond_to?(:json_builder, true) || output_format != :jsonl
|
|
31
|
+
|
|
23
32
|
generate_file
|
|
24
33
|
end
|
|
25
34
|
end
|
|
26
35
|
|
|
27
36
|
private
|
|
28
37
|
|
|
38
|
+
attr_reader :in_index
|
|
39
|
+
|
|
40
|
+
def with_output_file
|
|
41
|
+
handler = FileHandler.new(output_filename, format: output_format)
|
|
42
|
+
|
|
43
|
+
yield handler
|
|
44
|
+
ensure
|
|
45
|
+
handler&.close
|
|
46
|
+
|
|
47
|
+
msg = "Generated file '#{output_filename}' "
|
|
48
|
+
msg << "with #{row_count} rows (out of #{in_index + 1})."
|
|
49
|
+
|
|
50
|
+
log(:info) { msg } unless simulate?
|
|
51
|
+
end
|
|
52
|
+
|
|
29
53
|
def generate_file # rubocop:disable Metrics/AbcSize
|
|
30
|
-
in_index = nil
|
|
54
|
+
@in_index = nil
|
|
31
55
|
|
|
32
|
-
|
|
56
|
+
with_output_file do |f_handler|
|
|
33
57
|
first = true
|
|
34
58
|
|
|
35
59
|
puts "\n"
|
|
@@ -38,11 +62,11 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
38
62
|
if first
|
|
39
63
|
first = false
|
|
40
64
|
headers!(row)
|
|
41
|
-
|
|
65
|
+
f_handler << headers if output_format == :csv
|
|
42
66
|
require_group_by_field!(row, file: input_file)
|
|
43
67
|
end
|
|
44
68
|
|
|
45
|
-
in_index = idx
|
|
69
|
+
@in_index = idx
|
|
46
70
|
next unless !block_given? || yield(row, idx)
|
|
47
71
|
|
|
48
72
|
next unless pivotable?(row, idx)
|
|
@@ -50,19 +74,25 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
50
74
|
|
|
51
75
|
row_count!
|
|
52
76
|
|
|
53
|
-
|
|
77
|
+
case output_format
|
|
78
|
+
when :csv
|
|
79
|
+
f_handler << last_group.values_at(*headers)
|
|
80
|
+
when :jsonl
|
|
81
|
+
f_handler << json_builder(last_group)
|
|
82
|
+
end
|
|
54
83
|
end
|
|
55
84
|
|
|
56
85
|
# finalize
|
|
57
86
|
if (l_row = pivot_row)
|
|
58
87
|
row_count!
|
|
59
|
-
out_csv << l_row.values_at(*headers)
|
|
60
|
-
end
|
|
61
|
-
ensure
|
|
62
|
-
msg = "Generated file '#{output_filename}' "
|
|
63
|
-
msg << "with #{row_count} rows (out of #{in_index + 1})."
|
|
64
88
|
|
|
65
|
-
|
|
89
|
+
case output_format
|
|
90
|
+
when :csv
|
|
91
|
+
f_handler << l_row.values_at(*headers)
|
|
92
|
+
when :jsonl
|
|
93
|
+
f_handler << json_builder(l_row)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
66
96
|
end
|
|
67
97
|
end
|
|
68
98
|
|
|
@@ -76,16 +106,23 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
76
106
|
pivot_value = row[group_by_field]
|
|
77
107
|
|
|
78
108
|
unless (last_pivot = @group[group_by_field])
|
|
109
|
+
# init
|
|
79
110
|
last_pivot = @group[group_by_field] = pivot_value
|
|
80
111
|
end
|
|
81
112
|
|
|
82
113
|
last = @group
|
|
83
114
|
@group = {group_by_field => pivot_value} unless pivot_value == last_pivot
|
|
84
115
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
116
|
+
case output_format
|
|
117
|
+
when :csv
|
|
118
|
+
headers_rest.each do |field|
|
|
119
|
+
curr_values = row[field].to_s.split('|').compact.uniq
|
|
120
|
+
group_values = @group[field].to_s.split('|').compact.uniq
|
|
121
|
+
@group[field] = (group_values | curr_values).join('|')
|
|
122
|
+
end
|
|
123
|
+
when :jsonl
|
|
124
|
+
@group['rows'] ||= []
|
|
125
|
+
@group['rows'] << row.to_h.slice(*headers_rest)
|
|
89
126
|
end
|
|
90
127
|
|
|
91
128
|
last unless last == @group
|
|
@@ -97,9 +134,10 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
97
134
|
def headers!(row)
|
|
98
135
|
return if headers?
|
|
99
136
|
|
|
100
|
-
@
|
|
101
|
-
@headers_rest
|
|
102
|
-
@
|
|
137
|
+
@grouped_fields = row.headers - [group_by_field] if output_format == :jsonl
|
|
138
|
+
@headers_rest = grouped_fields & row.headers
|
|
139
|
+
@headers_rest -= [group_by_field]
|
|
140
|
+
@headers = [group_by_field, *headers_rest]
|
|
103
141
|
end
|
|
104
142
|
|
|
105
143
|
def headers?
|
|
@@ -108,7 +146,7 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
108
146
|
|
|
109
147
|
def row_count!
|
|
110
148
|
@row_count ||= 0
|
|
111
|
-
(@row_count
|
|
149
|
+
(@row_count += 1).tap do |cnt|
|
|
112
150
|
if (cnt % 500).zero?
|
|
113
151
|
print "... Done #{cnt} rows \r"
|
|
114
152
|
$stdout.flush
|
|
@@ -141,10 +179,14 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
|
|
|
141
179
|
num
|
|
142
180
|
end
|
|
143
181
|
|
|
182
|
+
def output_format
|
|
183
|
+
options.dig(:output, :format)&.to_sym || self.class::OUTPUT_FORMAT
|
|
184
|
+
end
|
|
185
|
+
|
|
144
186
|
def output_filename
|
|
145
187
|
return unless input_name
|
|
146
188
|
|
|
147
|
-
File.join(input_dir, "#{input_name}_grouped
|
|
189
|
+
File.join(input_dir, "#{input_name}_grouped.#{output_format}")
|
|
148
190
|
end
|
|
149
191
|
|
|
150
192
|
def input_name
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Tracks the files of a source folder into a file
|
|
2
|
+
class Eco::API::UseCases::Default::Utils::TrackFiles < Eco::API::Custom::UseCase
|
|
3
|
+
name 'track-files'
|
|
4
|
+
type :other
|
|
5
|
+
|
|
6
|
+
require_relative 'cli/track_files_cli'
|
|
7
|
+
|
|
8
|
+
OUT_HEADERS = %w[
|
|
9
|
+
ref_id
|
|
10
|
+
filename
|
|
11
|
+
filesize
|
|
12
|
+
s3_path
|
|
13
|
+
].freeze
|
|
14
|
+
|
|
15
|
+
REF_ID_PATH_POSITION = :last
|
|
16
|
+
BASE_S3_PATH = 'uploads'.freeze
|
|
17
|
+
# S3_SUBPATH = 'org-name'.freeze
|
|
18
|
+
|
|
19
|
+
def main(*_args)
|
|
20
|
+
if simulate?
|
|
21
|
+
count_files
|
|
22
|
+
else
|
|
23
|
+
generate_file
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
attr_reader :folder_count, :file_count
|
|
30
|
+
|
|
31
|
+
def folder_count!(cnt = 1)
|
|
32
|
+
@folder_count ||= 0
|
|
33
|
+
|
|
34
|
+
print '.'
|
|
35
|
+
@folder_count += cnt
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def file_count!(cnt = 1)
|
|
39
|
+
@file_count ||= 0
|
|
40
|
+
@file_count += cnt
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def count_files
|
|
44
|
+
with_each_file
|
|
45
|
+
|
|
46
|
+
log(:info) {
|
|
47
|
+
"Found #{file_count} files, in #{folder_count} folders (with files)."
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ref_id_path_position
|
|
52
|
+
self.class::REF_ID_PATH_POSITION
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def generate_file
|
|
56
|
+
CSV.open(output_filename, 'wb') do |csv|
|
|
57
|
+
csv << self.class::OUT_HEADERS
|
|
58
|
+
|
|
59
|
+
with_each_file do |file, src_path|
|
|
60
|
+
ref_id =
|
|
61
|
+
case ref_id_path_position
|
|
62
|
+
when :first then src_path.first
|
|
63
|
+
when :last then src_path.last
|
|
64
|
+
else
|
|
65
|
+
raise ArgumentError, "Unknown REF_ID_PATH_POSITION: #{ref_id_path_position} "
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
file_row = [ref_id]
|
|
69
|
+
file_row << file_name = File.basename(file)
|
|
70
|
+
file_row << File.size(file)
|
|
71
|
+
file_row << s3_path(file_name, src_path)
|
|
72
|
+
|
|
73
|
+
csv << file_row
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
ensure
|
|
77
|
+
msg = "Generated file '#{output_filename}' "
|
|
78
|
+
msg << "with #{file_count} files/rows "
|
|
79
|
+
msg << "organized in #{folder_count} folders."
|
|
80
|
+
|
|
81
|
+
log(:info) { msg } unless simulate?
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def with_each_file(folders = top_subfolders, src_path: [], &block)
|
|
85
|
+
folders.each do |folder|
|
|
86
|
+
folder_name = File.basename(folder)
|
|
87
|
+
path = src_path[0..-1]
|
|
88
|
+
path << folder_name
|
|
89
|
+
|
|
90
|
+
files = folder_files(folder)
|
|
91
|
+
subfolders = top_subfolders(folder)
|
|
92
|
+
|
|
93
|
+
next if files.empty? && subfolders.empty? # skip
|
|
94
|
+
|
|
95
|
+
if files.any? && subfolders.any?
|
|
96
|
+
msg = "Folder '#{folder}' contains both files and subfolders."
|
|
97
|
+
msg << "\nFor correctly tracking and handling file attachments, "
|
|
98
|
+
msg << "this is not supported."
|
|
99
|
+
|
|
100
|
+
raise ArgumentError, msg
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
unless files.empty?
|
|
104
|
+
folder_count!
|
|
105
|
+
file_count!(files.count)
|
|
106
|
+
|
|
107
|
+
files.each do |file|
|
|
108
|
+
yield(file, path) if block_given?
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
next if subfolders.empty?
|
|
113
|
+
|
|
114
|
+
with_each_file(
|
|
115
|
+
subfolders,
|
|
116
|
+
src_path: path,
|
|
117
|
+
&block
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def s3_path(filename, path)
|
|
123
|
+
[
|
|
124
|
+
self.class::BASE_S3_PATH,
|
|
125
|
+
s3_subpath,
|
|
126
|
+
*path,
|
|
127
|
+
filename
|
|
128
|
+
].compact.join('/')
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def s3_subpath
|
|
132
|
+
options.dig(:output, :s3_path) ||
|
|
133
|
+
s3_subpath_const ||
|
|
134
|
+
config.active_enviro
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def s3_subpath_const
|
|
138
|
+
self.class::S3_SUBPATH if self.class.const_defined?(:S3_SUBPATH)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def top_subfolders(base_folder = input_base_folder)
|
|
142
|
+
Dir[
|
|
143
|
+
File.join(base_folder, "*")
|
|
144
|
+
].select do |f|
|
|
145
|
+
File.directory?(f)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def folder_files(dir)
|
|
150
|
+
Dir[
|
|
151
|
+
File.join(dir, "*")
|
|
152
|
+
].select do |f|
|
|
153
|
+
File.file?(f)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def output_filename
|
|
158
|
+
return unless input_folder_name
|
|
159
|
+
|
|
160
|
+
File.join(
|
|
161
|
+
config.active_enviro,
|
|
162
|
+
'sftp',
|
|
163
|
+
"#{input_folder_name}_files.csv"
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def input_folder_name
|
|
168
|
+
@input_folder_name ||= File.basename(input_base_folder)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def input_base_folder
|
|
172
|
+
options.dig(:input, :folder).tap do |folder|
|
|
173
|
+
next if File.directory?(folder)
|
|
174
|
+
|
|
175
|
+
msg = "Expecting '#{folder}' to be a directory, but it isn't."
|
|
176
|
+
raise ArgumentError, msg
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
data/lib/eco/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: eco-helpers
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.2.
|
|
4
|
+
version: 3.2.14
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Oscar Segura
|
|
@@ -794,6 +794,8 @@ files:
|
|
|
794
794
|
- lib/eco/api/usecases/default/people/utils/switch_supervisor_case.rb
|
|
795
795
|
- lib/eco/api/usecases/default/people/utils/transfer_account_case.rb
|
|
796
796
|
- lib/eco/api/usecases/default/utils.rb
|
|
797
|
+
- lib/eco/api/usecases/default/utils/add_page_id_case.rb
|
|
798
|
+
- lib/eco/api/usecases/default/utils/cli/add_page_id_cli.rb
|
|
797
799
|
- lib/eco/api/usecases/default/utils/cli/entries_to_csv_cli.rb
|
|
798
800
|
- lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb
|
|
799
801
|
- lib/eco/api/usecases/default/utils/cli/json_to_csv_cli.rb
|
|
@@ -801,13 +803,16 @@ files:
|
|
|
801
803
|
- lib/eco/api/usecases/default/utils/cli/sort_csv_cli.rb
|
|
802
804
|
- lib/eco/api/usecases/default/utils/cli/split_csv_cli.rb
|
|
803
805
|
- lib/eco/api/usecases/default/utils/cli/split_json_cli.rb
|
|
806
|
+
- lib/eco/api/usecases/default/utils/cli/track_files_cli.rb
|
|
804
807
|
- lib/eco/api/usecases/default/utils/entries_to_csv_case.rb
|
|
805
808
|
- lib/eco/api/usecases/default/utils/group_csv_case.rb
|
|
809
|
+
- lib/eco/api/usecases/default/utils/group_csv_case/file_handler.rb
|
|
806
810
|
- lib/eco/api/usecases/default/utils/json_to_csv_case.rb
|
|
807
811
|
- lib/eco/api/usecases/default/utils/merge_csv_case.rb
|
|
808
812
|
- lib/eco/api/usecases/default/utils/sort_csv_case.rb
|
|
809
813
|
- lib/eco/api/usecases/default/utils/split_csv_case.rb
|
|
810
814
|
- lib/eco/api/usecases/default/utils/split_json_case.rb
|
|
815
|
+
- lib/eco/api/usecases/default/utils/track_files_case.rb
|
|
811
816
|
- lib/eco/api/usecases/default_cases.rb
|
|
812
817
|
- lib/eco/api/usecases/default_cases/create_case.rb
|
|
813
818
|
- lib/eco/api/usecases/default_cases/delete_sync_case.rb
|