remi 0.2.27 → 0.2.28
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +34 -5
- data/features/metadata.feature +17 -0
- data/features/step_definitions/remi_step.rb +6 -6
- data/features/transforms/date_diff.feature +1 -0
- data/jobs/aggregate_job.rb +0 -1
- data/jobs/all_jobs_shared.rb +0 -2
- data/jobs/copy_source_job.rb +0 -1
- data/jobs/csv_file_target_job.rb +0 -1
- data/jobs/metadata_job.rb +60 -0
- data/jobs/parameters_job.rb +1 -1
- data/jobs/sample_job.rb +19 -20
- data/jobs/sftp_file_target_job.rb +0 -1
- data/jobs/transforms/date_diff_job.rb +1 -1
- data/jobs/transforms/nvl_job.rb +1 -1
- data/jobs/transforms/parse_date_job.rb +7 -4
- data/jobs/transforms/prefix_job.rb +1 -1
- data/jobs/transforms/truncate_job.rb +1 -1
- data/lib/remi.rb +10 -15
- data/lib/remi/cucumber/business_rules.rb +23 -23
- data/lib/remi/cucumber/data_source.rb +2 -1
- data/lib/remi/data_frame.rb +36 -0
- data/lib/remi/data_frame/daru.rb +67 -0
- data/lib/remi/data_subject.rb +71 -10
- data/lib/remi/data_subject/csv_file.rb +151 -0
- data/lib/remi/data_subject/data_frame.rb +53 -0
- data/lib/remi/data_subject/postgres.rb +136 -0
- data/lib/remi/data_subject/salesforce.rb +136 -0
- data/lib/remi/data_subject/sftp_file.rb +66 -0
- data/lib/remi/fields.rb +8 -0
- data/lib/remi/source_to_target_map.rb +56 -32
- data/lib/remi/transform.rb +426 -83
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +2 -1
- data/spec/metadata_spec.rb +62 -0
- metadata +15 -28
- data/lib/remi/data_source.rb +0 -13
- data/lib/remi/data_source/csv_file.rb +0 -101
- data/lib/remi/data_source/data_frame.rb +0 -16
- data/lib/remi/data_source/postgres.rb +0 -58
- data/lib/remi/data_source/salesforce.rb +0 -87
- data/lib/remi/data_target.rb +0 -15
- data/lib/remi/data_target/csv_file.rb +0 -42
- data/lib/remi/data_target/data_frame.rb +0 -14
- data/lib/remi/data_target/postgres.rb +0 -74
- data/lib/remi/data_target/salesforce.rb +0 -54
- data/lib/remi/data_target/sftp_file.rb +0 -54
- data/lib/remi/refinements/daru.rb +0 -85
@@ -13,7 +13,7 @@ class TruncateJob
|
|
13
13
|
define_transform :main, sources: :source_data, targets: :target_data do
|
14
14
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
15
15
|
map source(:my_field) .target(:truncated_field)
|
16
|
-
.transform(Remi::Transform
|
16
|
+
.transform(Remi::Transform::Truncate.new(params[:truncate_len].to_i))
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
data/lib/remi.rb
CHANGED
@@ -39,25 +39,20 @@ require 'remi/settings'
|
|
39
39
|
require 'remi/job'
|
40
40
|
require 'remi/source_to_target_map'
|
41
41
|
require 'remi/field_symbolizers'
|
42
|
-
require 'remi/data_subject'
|
43
|
-
require 'remi/sf_bulk_helper' # separate into SF support package
|
44
42
|
|
45
43
|
require 'remi/refinements/symbolizer'
|
46
|
-
require 'remi/refinements/daru'
|
47
44
|
|
48
45
|
require 'remi/extractor/sftp_file'
|
49
46
|
|
50
|
-
require 'remi/
|
51
|
-
require 'remi/
|
52
|
-
require 'remi/
|
53
|
-
|
54
|
-
require 'remi/
|
55
|
-
|
56
|
-
require 'remi/
|
57
|
-
require 'remi/
|
58
|
-
require 'remi/
|
59
|
-
require 'remi/
|
60
|
-
require 'remi/data_target/sftp_file'
|
61
|
-
require 'remi/data_target/postgres'
|
47
|
+
require 'remi/fields'
|
48
|
+
require 'remi/data_frame'
|
49
|
+
require 'remi/data_frame/daru'
|
50
|
+
|
51
|
+
require 'remi/data_subject'
|
52
|
+
require 'remi/data_subject/csv_file'
|
53
|
+
#require 'remi/data_subject/salesforce' # intentionally not included by default
|
54
|
+
require 'remi/data_subject/postgres'
|
55
|
+
require 'remi/data_subject/sftp_file'
|
56
|
+
require 'remi/data_subject/data_frame'
|
62
57
|
|
63
58
|
require 'remi/transform'
|
@@ -250,14 +250,14 @@ module Remi::BusinessRules
|
|
250
250
|
class DataSubject
|
251
251
|
def initialize(name, subject)
|
252
252
|
@name = name
|
253
|
-
@
|
253
|
+
@data_subject = subject
|
254
254
|
@fields = DataFieldCollection.new
|
255
255
|
|
256
256
|
stub_data
|
257
257
|
end
|
258
258
|
|
259
259
|
attr_reader :name
|
260
|
-
attr_reader :
|
260
|
+
attr_reader :data_subject
|
261
261
|
|
262
262
|
def add_field(field_name)
|
263
263
|
@fields.add_field(self, field_name)
|
@@ -272,17 +272,17 @@ module Remi::BusinessRules
|
|
272
272
|
end
|
273
273
|
|
274
274
|
def size
|
275
|
-
@
|
275
|
+
@data_subject.df.size
|
276
276
|
end
|
277
277
|
|
278
278
|
def get_attrib(name)
|
279
|
-
@
|
279
|
+
@data_subject.send(name)
|
280
280
|
end
|
281
281
|
|
282
282
|
# Public: Converts the data subject to a hash where the keys are the table
|
283
283
|
# columns and the values are an array for the value of column for each row.
|
284
284
|
def column_hash
|
285
|
-
@
|
285
|
+
@data_subject.df.to_h.reduce({}) do |h, (k,v)|
|
286
286
|
h[k.symbolize] = v.to_a
|
287
287
|
h
|
288
288
|
end
|
@@ -290,7 +290,7 @@ module Remi::BusinessRules
|
|
290
290
|
|
291
291
|
# For debugging only
|
292
292
|
def _df
|
293
|
-
@
|
293
|
+
@data_subject.df
|
294
294
|
end
|
295
295
|
|
296
296
|
|
@@ -298,7 +298,7 @@ module Remi::BusinessRules
|
|
298
298
|
# Need more robust duping to make that feasible.
|
299
299
|
# Don't use results for anything more than size.
|
300
300
|
def where(field_name, operation)
|
301
|
-
@
|
301
|
+
@data_subject.df.where(@data_subject.df[field_name.symbolize(@data_subject.field_symbolizer)].recode { |v| operation.call(v) })
|
302
302
|
end
|
303
303
|
|
304
304
|
def where_is(field_name, value)
|
@@ -324,29 +324,29 @@ module Remi::BusinessRules
|
|
324
324
|
|
325
325
|
|
326
326
|
def stub_data
|
327
|
-
@
|
327
|
+
@data_subject.stub_df if @data_subject.respond_to? :stub_df
|
328
328
|
end
|
329
329
|
|
330
330
|
def example_to_df(example)
|
331
|
-
example.to_df(@
|
331
|
+
example.to_df(@data_subject.df.row[0].to_h, field_symbolizer: @data_subject.field_symbolizer)
|
332
332
|
end
|
333
333
|
|
334
334
|
def stub_data_with(example)
|
335
335
|
stub_data
|
336
|
-
@
|
336
|
+
@data_subject.df = example_to_df(example)
|
337
337
|
end
|
338
338
|
|
339
339
|
def append_data_with(example)
|
340
|
-
@
|
340
|
+
@data_subject.df = @data_subject.df.concat example_to_df(example)
|
341
341
|
end
|
342
342
|
|
343
343
|
|
344
344
|
def replicate_rows(n_rows)
|
345
|
-
replicated_df = Daru::DataFrame.new([], order: @
|
346
|
-
@
|
345
|
+
replicated_df = Daru::DataFrame.new([], order: @data_subject.df.vectors.to_a)
|
346
|
+
@data_subject.df.each do |vector|
|
347
347
|
replicated_df[vector.name] = vector.to_a * n_rows
|
348
348
|
end
|
349
|
-
@
|
349
|
+
@data_subject.df = replicated_df
|
350
350
|
end
|
351
351
|
|
352
352
|
def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
|
@@ -378,31 +378,31 @@ module Remi::BusinessRules
|
|
378
378
|
|
379
379
|
def distribute_values(table)
|
380
380
|
cumulative_dist = cumulative_dist_from_freq_table(table)
|
381
|
-
generated_data = generate_values_from_cumulative_dist(@
|
381
|
+
generated_data = generate_values_from_cumulative_dist(@data_subject.df.size, cumulative_dist)
|
382
382
|
|
383
383
|
generated_data.each do |field_name, data_array|
|
384
384
|
vector_name = fields[field_name].field_name
|
385
|
-
@
|
385
|
+
@data_subject.df[vector_name] = Daru::Vector.new(data_array, index: @data_subject.df.index)
|
386
386
|
end
|
387
387
|
end
|
388
388
|
|
389
389
|
def freq_by(*field_names)
|
390
|
-
@
|
390
|
+
@data_subject.df.group_by(field_names).size * 1.0 / @data_subject.df.size
|
391
391
|
end
|
392
392
|
|
393
393
|
def mock_extractor(filestore)
|
394
|
-
extractor = class << @
|
394
|
+
extractor = class << @data_subject.extractor; self; end
|
395
395
|
|
396
396
|
extractor.send(:define_method, :all_entries, ->() { filestore.sftp_entries })
|
397
397
|
extractor.send(:define_method, :download, ->(to_download) { to_download.map { |e| e.name } })
|
398
398
|
end
|
399
399
|
|
400
400
|
def extract
|
401
|
-
@
|
401
|
+
@data_subject.extractor.extract
|
402
402
|
end
|
403
403
|
|
404
404
|
def csv_options
|
405
|
-
@
|
405
|
+
@data_subject.csv_options
|
406
406
|
end
|
407
407
|
|
408
408
|
end
|
@@ -456,7 +456,7 @@ module Remi::BusinessRules
|
|
456
456
|
def initialize(subject, name)
|
457
457
|
@subject = subject
|
458
458
|
@name = name
|
459
|
-
@field_name = name.symbolize(subject.
|
459
|
+
@field_name = name.symbolize(subject.data_subject.field_symbolizer)
|
460
460
|
end
|
461
461
|
|
462
462
|
attr_reader :name
|
@@ -468,11 +468,11 @@ module Remi::BusinessRules
|
|
468
468
|
end
|
469
469
|
|
470
470
|
def metadata
|
471
|
-
@subject.
|
471
|
+
@subject.data_subject.fields[@field_name]
|
472
472
|
end
|
473
473
|
|
474
474
|
def vector
|
475
|
-
@subject.
|
475
|
+
@subject.data_subject.df[@field_name]
|
476
476
|
end
|
477
477
|
|
478
478
|
def value
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Remi
|
2
|
+
module DataFrame
|
3
|
+
class << self
|
4
|
+
def create(remi_df_type = :daru, *args, **kargs, &block)
|
5
|
+
dataframe = case remi_df_type
|
6
|
+
when :daru
|
7
|
+
Remi::DataFrame::Daru.new(*args, **kargs, &block)
|
8
|
+
else
|
9
|
+
raise TypeError, "Unknown frame type: #{remi_df_type}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def daru(*args, **kargs, &block)
|
14
|
+
self.create(:daru, *args, **kargs, &block)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def [](*args)
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def size
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_csv(*args, **kargs, &block)
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
# Public: Returns the type of DataFrame
|
32
|
+
def remi_df_type
|
33
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Remi
|
2
|
+
module DataFrame
|
3
|
+
class Daru < SimpleDelegator
|
4
|
+
include Remi::DataFrame
|
5
|
+
|
6
|
+
def initialize(*args, **kargs, &block)
|
7
|
+
if args[0].is_a? ::Daru::DataFrame
|
8
|
+
super(args[0])
|
9
|
+
else
|
10
|
+
super(::Daru::DataFrame.new(*args, **kargs, &block))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# Public: Returns the type of DataFrame
|
16
|
+
def remi_df_type
|
17
|
+
:daru
|
18
|
+
end
|
19
|
+
|
20
|
+
# Public: Saves a Dataframe to a file.
|
21
|
+
def hash_dump(filename)
|
22
|
+
File.binwrite(filename, Marshal.dump(self))
|
23
|
+
end
|
24
|
+
|
25
|
+
# Public: Creates a DataFrame by reading the dumped version from a file.
|
26
|
+
def self.from_hash_dump(filename)
|
27
|
+
Marshal.load(File.binread(filename))
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Allows the user to define an arbitrary aggregation function.
|
31
|
+
#
|
32
|
+
# by - The name of the DataFrame vector to use to group records.
|
33
|
+
# func - A lambda function that accepts three arguments - the
|
34
|
+
# first argument is the DataFrame, the second is the
|
35
|
+
# key to the current group, and the third is the index
|
36
|
+
# of the elements belonging to a group.
|
37
|
+
#
|
38
|
+
# Example:
|
39
|
+
# df = Remi::DataFrame::Daru.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
|
40
|
+
#
|
41
|
+
# mymin = lambda do |vector, df, group_key, indices|
|
42
|
+
# values = indices.map { |idx| df.row[idx][vector] }
|
43
|
+
# "Group #{group_key} has a minimum value of #{values.min}"
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# df.aggregate(by: :a, func: mymin.curry.(:year))
|
47
|
+
#
|
48
|
+
#
|
49
|
+
# Returns a Daru::Vector.
|
50
|
+
def aggregate(by:, func:)
|
51
|
+
grouped = self.group_by(by)
|
52
|
+
df_indices = self.index.to_a
|
53
|
+
::Daru::Vector.new(
|
54
|
+
grouped.groups.reduce({}) do |h, (key, indices)|
|
55
|
+
# Daru groups don't use the index of the dataframe when returning groups (WTF?).
|
56
|
+
# Instead they return the position of the record in the dataframe. Here, we
|
57
|
+
group_df_indices = indices.map { |v| df_indices[v] }
|
58
|
+
group_key = key.size == 1 ? key.first : key
|
59
|
+
h[group_key] = func.(self, group_key, group_df_indices)
|
60
|
+
h
|
61
|
+
end
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -1,24 +1,85 @@
|
|
1
1
|
module Remi
|
2
|
-
|
2
|
+
|
3
|
+
# Namespaces for specific sources/targets
|
4
|
+
module DataSource; end
|
5
|
+
module DataTarget; end
|
6
|
+
|
7
|
+
class DataSubject
|
8
|
+
def initialize(*args, fields: Remi::Fields.new, remi_df_type: :daru, logger: Remi::Settings.logger, **kargs, &block)
|
9
|
+
@fields = fields
|
10
|
+
@remi_df_type = remi_df_type
|
11
|
+
@logger = logger
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_accessor :fields
|
15
|
+
|
3
16
|
def field_symbolizer
|
4
17
|
Remi::FieldSymbolizers[:standard]
|
5
18
|
end
|
6
19
|
|
7
20
|
def df
|
8
|
-
@dataframe ||=
|
21
|
+
@dataframe ||= Remi::DataFrame.create(@remi_df_type, [], order: @fields.keys)
|
9
22
|
end
|
10
23
|
|
11
24
|
def df=(new_dataframe)
|
12
|
-
|
25
|
+
if new_dataframe.respond_to? :remi_df_type
|
26
|
+
@dataframe = new_dataframe
|
27
|
+
else
|
28
|
+
@dataframe = Remi::DataFrame.create(@remi_df_type, new_dataframe)
|
29
|
+
end
|
13
30
|
end
|
14
31
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
32
|
+
module DataSource
|
33
|
+
|
34
|
+
# Public: Access the dataframe from a DataSource
|
35
|
+
#
|
36
|
+
# Returns a Remi::DataFrame
|
37
|
+
def df
|
38
|
+
@dataframe ||= to_dataframe
|
39
|
+
end
|
40
|
+
|
41
|
+
# Public: Memoized version of extract!
|
42
|
+
def extract
|
43
|
+
@extract ||= extract!
|
44
|
+
end
|
45
|
+
|
46
|
+
# Public: Called to extract data from the source.
|
47
|
+
#
|
48
|
+
# Returns data in a format that can be used to create a dataframe.
|
49
|
+
def extract!
|
50
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
51
|
+
@extract
|
52
|
+
end
|
53
|
+
|
54
|
+
# Public: Converts extracted data to a dataframe
|
55
|
+
#
|
56
|
+
# Returns a Remi::DataFrame
|
57
|
+
def to_dataframe
|
58
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
module DataTarget
|
63
|
+
|
64
|
+
# Public: Loads data to the target. This is automatically called
|
65
|
+
# after all transforms have executed, but could also get called manually.
|
66
|
+
# The actual load operation is only executed if hasn't already.
|
67
|
+
#
|
68
|
+
# Returns true if the load operation was successful.
|
69
|
+
def load
|
70
|
+
return true if @loaded || df.size == 0
|
71
|
+
|
72
|
+
@loaded = load!
|
73
|
+
end
|
74
|
+
|
75
|
+
# Public: Performs the load operation, regardless of whether it has
|
76
|
+
# already executed.
|
77
|
+
#
|
78
|
+
# Returns true if the load operation was successful
|
79
|
+
def load!
|
80
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
81
|
+
|
82
|
+
false
|
22
83
|
end
|
23
84
|
end
|
24
85
|
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Remi
|
2
|
+
module DataSubject::CsvFile
|
3
|
+
def self.included(base)
|
4
|
+
base.extend(CsvFileClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
def field_symbolizer
|
8
|
+
self.class.default_csv_options[:header_converters]
|
9
|
+
end
|
10
|
+
|
11
|
+
module CsvFileClassMethods
|
12
|
+
def default_csv_options
|
13
|
+
@default_csv_options ||= CSV::DEFAULT_OPTIONS.merge({
|
14
|
+
headers: true,
|
15
|
+
header_converters: Remi::FieldSymbolizers[:standard],
|
16
|
+
converters: [],
|
17
|
+
col_sep: ',',
|
18
|
+
encoding: 'UTF-8',
|
19
|
+
quote_char: '"'
|
20
|
+
})
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
class DataSource::CsvFile < Remi::DataSubject
|
30
|
+
include Remi::DataSubject::DataSource
|
31
|
+
include Remi::DataSubject::CsvFile
|
32
|
+
|
33
|
+
def initialize(*args, **kargs, &block)
|
34
|
+
super
|
35
|
+
init_csv_file(*args, **kargs, &block)
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :extractor
|
39
|
+
attr_reader :csv_options
|
40
|
+
|
41
|
+
# Public: Called to extract data from the source.
|
42
|
+
#
|
43
|
+
# Returns data in a format that can be used to create a dataframe.
|
44
|
+
def extract!
|
45
|
+
@extract = Array(@extractor.extract)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Public: Converts extracted data to a dataframe.
|
49
|
+
# Currently only supports Daru DataFrames.
|
50
|
+
#
|
51
|
+
# Returns a Remi::DataFrame
|
52
|
+
def to_dataframe
|
53
|
+
# Assumes that each file has exactly the same structure
|
54
|
+
result_df = nil
|
55
|
+
extract.each_with_index do |filename, idx|
|
56
|
+
@logger.info "Converting #{filename} to a dataframe"
|
57
|
+
csv_df = Daru::DataFrame.from_csv filename, @csv_options
|
58
|
+
|
59
|
+
csv_df[@filename_field] = Daru::Vector.new([filename] * csv_df.size, index: csv_df.index) if @filename_field
|
60
|
+
if idx == 0
|
61
|
+
result_df = csv_df
|
62
|
+
else
|
63
|
+
result_df = result_df.concat csv_df
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
Remi::DataFrame.create(:daru, result_df)
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
def extractor=(arg)
|
73
|
+
case arg
|
74
|
+
when Extractor::SftpFile, Extractor::LocalFile
|
75
|
+
@extractor = arg
|
76
|
+
when String
|
77
|
+
@extractor = Extractor::LocalFile.new(path: arg)
|
78
|
+
when Regexp
|
79
|
+
raise "Adding regex matching to local files would be easy, not done yet"
|
80
|
+
else
|
81
|
+
raise "Unknown extractor of type #{arg.class}: #{arg}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Only going to support single file for now
|
86
|
+
def source_filename
|
87
|
+
raise "Multiple source files detected" if extract.size > 1
|
88
|
+
@source_filename ||= extract.first
|
89
|
+
end
|
90
|
+
|
91
|
+
def first_line
|
92
|
+
# Readline assumes \n line endings. Strip out \r if it is a DOS file.
|
93
|
+
@first_line ||= File.open(source_filename) do |f|
|
94
|
+
f.readline.gsub(/\r/,'')
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def headers
|
99
|
+
@headers ||= CSV.open(source_filename, 'r', source_csv_options) { |csv| csv.first }.headers
|
100
|
+
end
|
101
|
+
|
102
|
+
def valid_headers?
|
103
|
+
(fields.keys - headers).empty?
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def init_csv_file(*args, extractor:, csv_options: {}, filename_field: nil, **kargs, &block)
|
110
|
+
self.extractor = extractor
|
111
|
+
@csv_options = self.class.default_csv_options.merge(csv_options)
|
112
|
+
@filename_field = filename_field
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
class DataTarget::CsvFile < Remi::DataSubject
|
121
|
+
include ::Remi::DataSubject::DataTarget
|
122
|
+
include ::Remi::DataSubject::CsvFile
|
123
|
+
|
124
|
+
default_csv_options[:row_sep] = "\n"
|
125
|
+
|
126
|
+
def initialize(*args, **kargs, &block)
|
127
|
+
super
|
128
|
+
init_csv_file(*args, **kargs, &block)
|
129
|
+
end
|
130
|
+
|
131
|
+
attr_reader :csv_options
|
132
|
+
|
133
|
+
# Public: Performs the load operation, regardless of whether it has
|
134
|
+
# already executed.
|
135
|
+
#
|
136
|
+
# Returns true if the load operation was successful
|
137
|
+
def load!
|
138
|
+
@logger.info "Writing CSV file #{@path}"
|
139
|
+
df.write_csv @path, @csv_options
|
140
|
+
true
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
def init_csv_file(*args, path:, csv_options: {}, **kargs, &block)
|
147
|
+
@path = path
|
148
|
+
@csv_options = self.class.default_csv_options.merge(csv_options)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|