remi 0.2.27 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +34 -5
- data/features/metadata.feature +17 -0
- data/features/step_definitions/remi_step.rb +6 -6
- data/features/transforms/date_diff.feature +1 -0
- data/jobs/aggregate_job.rb +0 -1
- data/jobs/all_jobs_shared.rb +0 -2
- data/jobs/copy_source_job.rb +0 -1
- data/jobs/csv_file_target_job.rb +0 -1
- data/jobs/metadata_job.rb +60 -0
- data/jobs/parameters_job.rb +1 -1
- data/jobs/sample_job.rb +19 -20
- data/jobs/sftp_file_target_job.rb +0 -1
- data/jobs/transforms/date_diff_job.rb +1 -1
- data/jobs/transforms/nvl_job.rb +1 -1
- data/jobs/transforms/parse_date_job.rb +7 -4
- data/jobs/transforms/prefix_job.rb +1 -1
- data/jobs/transforms/truncate_job.rb +1 -1
- data/lib/remi.rb +10 -15
- data/lib/remi/cucumber/business_rules.rb +23 -23
- data/lib/remi/cucumber/data_source.rb +2 -1
- data/lib/remi/data_frame.rb +36 -0
- data/lib/remi/data_frame/daru.rb +67 -0
- data/lib/remi/data_subject.rb +71 -10
- data/lib/remi/data_subject/csv_file.rb +151 -0
- data/lib/remi/data_subject/data_frame.rb +53 -0
- data/lib/remi/data_subject/postgres.rb +136 -0
- data/lib/remi/data_subject/salesforce.rb +136 -0
- data/lib/remi/data_subject/sftp_file.rb +66 -0
- data/lib/remi/fields.rb +8 -0
- data/lib/remi/source_to_target_map.rb +56 -32
- data/lib/remi/transform.rb +426 -83
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +2 -1
- data/spec/metadata_spec.rb +62 -0
- metadata +15 -28
- data/lib/remi/data_source.rb +0 -13
- data/lib/remi/data_source/csv_file.rb +0 -101
- data/lib/remi/data_source/data_frame.rb +0 -16
- data/lib/remi/data_source/postgres.rb +0 -58
- data/lib/remi/data_source/salesforce.rb +0 -87
- data/lib/remi/data_target.rb +0 -15
- data/lib/remi/data_target/csv_file.rb +0 -42
- data/lib/remi/data_target/data_frame.rb +0 -14
- data/lib/remi/data_target/postgres.rb +0 -74
- data/lib/remi/data_target/salesforce.rb +0 -54
- data/lib/remi/data_target/sftp_file.rb +0 -54
- data/lib/remi/refinements/daru.rb +0 -85
@@ -13,7 +13,7 @@ class TruncateJob
|
|
13
13
|
define_transform :main, sources: :source_data, targets: :target_data do
|
14
14
|
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
15
15
|
map source(:my_field) .target(:truncated_field)
|
16
|
-
.transform(Remi::Transform
|
16
|
+
.transform(Remi::Transform::Truncate.new(params[:truncate_len].to_i))
|
17
17
|
end
|
18
18
|
end
|
19
19
|
end
|
data/lib/remi.rb
CHANGED
@@ -39,25 +39,20 @@ require 'remi/settings'
|
|
39
39
|
require 'remi/job'
|
40
40
|
require 'remi/source_to_target_map'
|
41
41
|
require 'remi/field_symbolizers'
|
42
|
-
require 'remi/data_subject'
|
43
|
-
require 'remi/sf_bulk_helper' # separate into SF support package
|
44
42
|
|
45
43
|
require 'remi/refinements/symbolizer'
|
46
|
-
require 'remi/refinements/daru'
|
47
44
|
|
48
45
|
require 'remi/extractor/sftp_file'
|
49
46
|
|
50
|
-
require 'remi/
|
51
|
-
require 'remi/
|
52
|
-
require 'remi/
|
53
|
-
|
54
|
-
require 'remi/
|
55
|
-
|
56
|
-
require 'remi/
|
57
|
-
require 'remi/
|
58
|
-
require 'remi/
|
59
|
-
require 'remi/
|
60
|
-
require 'remi/data_target/sftp_file'
|
61
|
-
require 'remi/data_target/postgres'
|
47
|
+
require 'remi/fields'
|
48
|
+
require 'remi/data_frame'
|
49
|
+
require 'remi/data_frame/daru'
|
50
|
+
|
51
|
+
require 'remi/data_subject'
|
52
|
+
require 'remi/data_subject/csv_file'
|
53
|
+
#require 'remi/data_subject/salesforce' # intentionally not included by default
|
54
|
+
require 'remi/data_subject/postgres'
|
55
|
+
require 'remi/data_subject/sftp_file'
|
56
|
+
require 'remi/data_subject/data_frame'
|
62
57
|
|
63
58
|
require 'remi/transform'
|
@@ -250,14 +250,14 @@ module Remi::BusinessRules
|
|
250
250
|
class DataSubject
|
251
251
|
def initialize(name, subject)
|
252
252
|
@name = name
|
253
|
-
@
|
253
|
+
@data_subject = subject
|
254
254
|
@fields = DataFieldCollection.new
|
255
255
|
|
256
256
|
stub_data
|
257
257
|
end
|
258
258
|
|
259
259
|
attr_reader :name
|
260
|
-
attr_reader :
|
260
|
+
attr_reader :data_subject
|
261
261
|
|
262
262
|
def add_field(field_name)
|
263
263
|
@fields.add_field(self, field_name)
|
@@ -272,17 +272,17 @@ module Remi::BusinessRules
|
|
272
272
|
end
|
273
273
|
|
274
274
|
def size
|
275
|
-
@
|
275
|
+
@data_subject.df.size
|
276
276
|
end
|
277
277
|
|
278
278
|
def get_attrib(name)
|
279
|
-
@
|
279
|
+
@data_subject.send(name)
|
280
280
|
end
|
281
281
|
|
282
282
|
# Public: Converts the data subject to a hash where the keys are the table
|
283
283
|
# columns and the values are an array for the value of column for each row.
|
284
284
|
def column_hash
|
285
|
-
@
|
285
|
+
@data_subject.df.to_h.reduce({}) do |h, (k,v)|
|
286
286
|
h[k.symbolize] = v.to_a
|
287
287
|
h
|
288
288
|
end
|
@@ -290,7 +290,7 @@ module Remi::BusinessRules
|
|
290
290
|
|
291
291
|
# For debugging only
|
292
292
|
def _df
|
293
|
-
@
|
293
|
+
@data_subject.df
|
294
294
|
end
|
295
295
|
|
296
296
|
|
@@ -298,7 +298,7 @@ module Remi::BusinessRules
|
|
298
298
|
# Need more robust duping to make that feasible.
|
299
299
|
# Don't use results for anything more than size.
|
300
300
|
def where(field_name, operation)
|
301
|
-
@
|
301
|
+
@data_subject.df.where(@data_subject.df[field_name.symbolize(@data_subject.field_symbolizer)].recode { |v| operation.call(v) })
|
302
302
|
end
|
303
303
|
|
304
304
|
def where_is(field_name, value)
|
@@ -324,29 +324,29 @@ module Remi::BusinessRules
|
|
324
324
|
|
325
325
|
|
326
326
|
def stub_data
|
327
|
-
@
|
327
|
+
@data_subject.stub_df if @data_subject.respond_to? :stub_df
|
328
328
|
end
|
329
329
|
|
330
330
|
def example_to_df(example)
|
331
|
-
example.to_df(@
|
331
|
+
example.to_df(@data_subject.df.row[0].to_h, field_symbolizer: @data_subject.field_symbolizer)
|
332
332
|
end
|
333
333
|
|
334
334
|
def stub_data_with(example)
|
335
335
|
stub_data
|
336
|
-
@
|
336
|
+
@data_subject.df = example_to_df(example)
|
337
337
|
end
|
338
338
|
|
339
339
|
def append_data_with(example)
|
340
|
-
@
|
340
|
+
@data_subject.df = @data_subject.df.concat example_to_df(example)
|
341
341
|
end
|
342
342
|
|
343
343
|
|
344
344
|
def replicate_rows(n_rows)
|
345
|
-
replicated_df = Daru::DataFrame.new([], order: @
|
346
|
-
@
|
345
|
+
replicated_df = Daru::DataFrame.new([], order: @data_subject.df.vectors.to_a)
|
346
|
+
@data_subject.df.each do |vector|
|
347
347
|
replicated_df[vector.name] = vector.to_a * n_rows
|
348
348
|
end
|
349
|
-
@
|
349
|
+
@data_subject.df = replicated_df
|
350
350
|
end
|
351
351
|
|
352
352
|
def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
|
@@ -378,31 +378,31 @@ module Remi::BusinessRules
|
|
378
378
|
|
379
379
|
def distribute_values(table)
|
380
380
|
cumulative_dist = cumulative_dist_from_freq_table(table)
|
381
|
-
generated_data = generate_values_from_cumulative_dist(@
|
381
|
+
generated_data = generate_values_from_cumulative_dist(@data_subject.df.size, cumulative_dist)
|
382
382
|
|
383
383
|
generated_data.each do |field_name, data_array|
|
384
384
|
vector_name = fields[field_name].field_name
|
385
|
-
@
|
385
|
+
@data_subject.df[vector_name] = Daru::Vector.new(data_array, index: @data_subject.df.index)
|
386
386
|
end
|
387
387
|
end
|
388
388
|
|
389
389
|
def freq_by(*field_names)
|
390
|
-
@
|
390
|
+
@data_subject.df.group_by(field_names).size * 1.0 / @data_subject.df.size
|
391
391
|
end
|
392
392
|
|
393
393
|
def mock_extractor(filestore)
|
394
|
-
extractor = class << @
|
394
|
+
extractor = class << @data_subject.extractor; self; end
|
395
395
|
|
396
396
|
extractor.send(:define_method, :all_entries, ->() { filestore.sftp_entries })
|
397
397
|
extractor.send(:define_method, :download, ->(to_download) { to_download.map { |e| e.name } })
|
398
398
|
end
|
399
399
|
|
400
400
|
def extract
|
401
|
-
@
|
401
|
+
@data_subject.extractor.extract
|
402
402
|
end
|
403
403
|
|
404
404
|
def csv_options
|
405
|
-
@
|
405
|
+
@data_subject.csv_options
|
406
406
|
end
|
407
407
|
|
408
408
|
end
|
@@ -456,7 +456,7 @@ module Remi::BusinessRules
|
|
456
456
|
def initialize(subject, name)
|
457
457
|
@subject = subject
|
458
458
|
@name = name
|
459
|
-
@field_name = name.symbolize(subject.
|
459
|
+
@field_name = name.symbolize(subject.data_subject.field_symbolizer)
|
460
460
|
end
|
461
461
|
|
462
462
|
attr_reader :name
|
@@ -468,11 +468,11 @@ module Remi::BusinessRules
|
|
468
468
|
end
|
469
469
|
|
470
470
|
def metadata
|
471
|
-
@subject.
|
471
|
+
@subject.data_subject.fields[@field_name]
|
472
472
|
end
|
473
473
|
|
474
474
|
def vector
|
475
|
-
@subject.
|
475
|
+
@subject.data_subject.df[@field_name]
|
476
476
|
end
|
477
477
|
|
478
478
|
def value
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Remi
|
2
|
+
module DataFrame
|
3
|
+
class << self
|
4
|
+
def create(remi_df_type = :daru, *args, **kargs, &block)
|
5
|
+
dataframe = case remi_df_type
|
6
|
+
when :daru
|
7
|
+
Remi::DataFrame::Daru.new(*args, **kargs, &block)
|
8
|
+
else
|
9
|
+
raise TypeError, "Unknown frame type: #{remi_df_type}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def daru(*args, **kargs, &block)
|
14
|
+
self.create(:daru, *args, **kargs, &block)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def [](*args)
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def size
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_csv(*args, **kargs, &block)
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
# Public: Returns the type of DataFrame
|
32
|
+
def remi_df_type
|
33
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Remi
|
2
|
+
module DataFrame
|
3
|
+
class Daru < SimpleDelegator
|
4
|
+
include Remi::DataFrame
|
5
|
+
|
6
|
+
def initialize(*args, **kargs, &block)
|
7
|
+
if args[0].is_a? ::Daru::DataFrame
|
8
|
+
super(args[0])
|
9
|
+
else
|
10
|
+
super(::Daru::DataFrame.new(*args, **kargs, &block))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# Public: Returns the type of DataFrame
|
16
|
+
def remi_df_type
|
17
|
+
:daru
|
18
|
+
end
|
19
|
+
|
20
|
+
# Public: Saves a Dataframe to a file.
|
21
|
+
def hash_dump(filename)
|
22
|
+
File.binwrite(filename, Marshal.dump(self))
|
23
|
+
end
|
24
|
+
|
25
|
+
# Public: Creates a DataFrame by reading the dumped version from a file.
|
26
|
+
def self.from_hash_dump(filename)
|
27
|
+
Marshal.load(File.binread(filename))
|
28
|
+
end
|
29
|
+
|
30
|
+
# Public: Allows the user to define an arbitrary aggregation function.
|
31
|
+
#
|
32
|
+
# by - The name of the DataFrame vector to use to group records.
|
33
|
+
# func - A lambda function that accepts three arguments - the
|
34
|
+
# first argument is the DataFrame, the second is the
|
35
|
+
# key to the current group, and the third is the index
|
36
|
+
# of the elements belonging to a group.
|
37
|
+
#
|
38
|
+
# Example:
|
39
|
+
# df = Remi::DataFrame::Daru.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
|
40
|
+
#
|
41
|
+
# mymin = lambda do |vector, df, group_key, indices|
|
42
|
+
# values = indices.map { |idx| df.row[idx][vector] }
|
43
|
+
# "Group #{group_key} has a minimum value of #{values.min}"
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# df.aggregate(by: :a, func: mymin.curry.(:year))
|
47
|
+
#
|
48
|
+
#
|
49
|
+
# Returns a Daru::Vector.
|
50
|
+
def aggregate(by:, func:)
|
51
|
+
grouped = self.group_by(by)
|
52
|
+
df_indices = self.index.to_a
|
53
|
+
::Daru::Vector.new(
|
54
|
+
grouped.groups.reduce({}) do |h, (key, indices)|
|
55
|
+
# Daru groups don't use the index of the dataframe when returning groups (WTF?).
|
56
|
+
# Instead they return the position of the record in the dataframe. Here, we
|
57
|
+
group_df_indices = indices.map { |v| df_indices[v] }
|
58
|
+
group_key = key.size == 1 ? key.first : key
|
59
|
+
h[group_key] = func.(self, group_key, group_df_indices)
|
60
|
+
h
|
61
|
+
end
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -1,24 +1,85 @@
|
|
1
1
|
module Remi
|
2
|
-
|
2
|
+
|
3
|
+
# Namespaces for specific sources/targets
|
4
|
+
module DataSource; end
|
5
|
+
module DataTarget; end
|
6
|
+
|
7
|
+
class DataSubject
|
8
|
+
def initialize(*args, fields: Remi::Fields.new, remi_df_type: :daru, logger: Remi::Settings.logger, **kargs, &block)
|
9
|
+
@fields = fields
|
10
|
+
@remi_df_type = remi_df_type
|
11
|
+
@logger = logger
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_accessor :fields
|
15
|
+
|
3
16
|
def field_symbolizer
|
4
17
|
Remi::FieldSymbolizers[:standard]
|
5
18
|
end
|
6
19
|
|
7
20
|
def df
|
8
|
-
@dataframe ||=
|
21
|
+
@dataframe ||= Remi::DataFrame.create(@remi_df_type, [], order: @fields.keys)
|
9
22
|
end
|
10
23
|
|
11
24
|
def df=(new_dataframe)
|
12
|
-
|
25
|
+
if new_dataframe.respond_to? :remi_df_type
|
26
|
+
@dataframe = new_dataframe
|
27
|
+
else
|
28
|
+
@dataframe = Remi::DataFrame.create(@remi_df_type, new_dataframe)
|
29
|
+
end
|
13
30
|
end
|
14
31
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
32
|
+
module DataSource
|
33
|
+
|
34
|
+
# Public: Access the dataframe from a DataSource
|
35
|
+
#
|
36
|
+
# Returns a Remi::DataFrame
|
37
|
+
def df
|
38
|
+
@dataframe ||= to_dataframe
|
39
|
+
end
|
40
|
+
|
41
|
+
# Public: Memoized version of extract!
|
42
|
+
def extract
|
43
|
+
@extract ||= extract!
|
44
|
+
end
|
45
|
+
|
46
|
+
# Public: Called to extract data from the source.
|
47
|
+
#
|
48
|
+
# Returns data in a format that can be used to create a dataframe.
|
49
|
+
def extract!
|
50
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
51
|
+
@extract
|
52
|
+
end
|
53
|
+
|
54
|
+
# Public: Converts extracted data to a dataframe
|
55
|
+
#
|
56
|
+
# Returns a Remi::DataFrame
|
57
|
+
def to_dataframe
|
58
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
module DataTarget
|
63
|
+
|
64
|
+
# Public: Loads data to the target. This is automatically called
|
65
|
+
# after all transforms have executed, but could also get called manually.
|
66
|
+
# The actual load operation is only executed if hasn't already.
|
67
|
+
#
|
68
|
+
# Returns true if the load operation was successful.
|
69
|
+
def load
|
70
|
+
return true if @loaded || df.size == 0
|
71
|
+
|
72
|
+
@loaded = load!
|
73
|
+
end
|
74
|
+
|
75
|
+
# Public: Performs the load operation, regardless of whether it has
|
76
|
+
# already executed.
|
77
|
+
#
|
78
|
+
# Returns true if the load operation was successful
|
79
|
+
def load!
|
80
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
81
|
+
|
82
|
+
false
|
22
83
|
end
|
23
84
|
end
|
24
85
|
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Remi
|
2
|
+
module DataSubject::CsvFile
|
3
|
+
def self.included(base)
|
4
|
+
base.extend(CsvFileClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
def field_symbolizer
|
8
|
+
self.class.default_csv_options[:header_converters]
|
9
|
+
end
|
10
|
+
|
11
|
+
module CsvFileClassMethods
|
12
|
+
def default_csv_options
|
13
|
+
@default_csv_options ||= CSV::DEFAULT_OPTIONS.merge({
|
14
|
+
headers: true,
|
15
|
+
header_converters: Remi::FieldSymbolizers[:standard],
|
16
|
+
converters: [],
|
17
|
+
col_sep: ',',
|
18
|
+
encoding: 'UTF-8',
|
19
|
+
quote_char: '"'
|
20
|
+
})
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
class DataSource::CsvFile < Remi::DataSubject
|
30
|
+
include Remi::DataSubject::DataSource
|
31
|
+
include Remi::DataSubject::CsvFile
|
32
|
+
|
33
|
+
def initialize(*args, **kargs, &block)
|
34
|
+
super
|
35
|
+
init_csv_file(*args, **kargs, &block)
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :extractor
|
39
|
+
attr_reader :csv_options
|
40
|
+
|
41
|
+
# Public: Called to extract data from the source.
|
42
|
+
#
|
43
|
+
# Returns data in a format that can be used to create a dataframe.
|
44
|
+
def extract!
|
45
|
+
@extract = Array(@extractor.extract)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Public: Converts extracted data to a dataframe.
|
49
|
+
# Currently only supports Daru DataFrames.
|
50
|
+
#
|
51
|
+
# Returns a Remi::DataFrame
|
52
|
+
def to_dataframe
|
53
|
+
# Assumes that each file has exactly the same structure
|
54
|
+
result_df = nil
|
55
|
+
extract.each_with_index do |filename, idx|
|
56
|
+
@logger.info "Converting #{filename} to a dataframe"
|
57
|
+
csv_df = Daru::DataFrame.from_csv filename, @csv_options
|
58
|
+
|
59
|
+
csv_df[@filename_field] = Daru::Vector.new([filename] * csv_df.size, index: csv_df.index) if @filename_field
|
60
|
+
if idx == 0
|
61
|
+
result_df = csv_df
|
62
|
+
else
|
63
|
+
result_df = result_df.concat csv_df
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
Remi::DataFrame.create(:daru, result_df)
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
def extractor=(arg)
|
73
|
+
case arg
|
74
|
+
when Extractor::SftpFile, Extractor::LocalFile
|
75
|
+
@extractor = arg
|
76
|
+
when String
|
77
|
+
@extractor = Extractor::LocalFile.new(path: arg)
|
78
|
+
when Regexp
|
79
|
+
raise "Adding regex matching to local files would be easy, not done yet"
|
80
|
+
else
|
81
|
+
raise "Unknown extractor of type #{arg.class}: #{arg}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Only going to support single file for now
|
86
|
+
def source_filename
|
87
|
+
raise "Multiple source files detected" if extract.size > 1
|
88
|
+
@source_filename ||= extract.first
|
89
|
+
end
|
90
|
+
|
91
|
+
def first_line
|
92
|
+
# Readline assumes \n line endings. Strip out \r if it is a DOS file.
|
93
|
+
@first_line ||= File.open(source_filename) do |f|
|
94
|
+
f.readline.gsub(/\r/,'')
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def headers
|
99
|
+
@headers ||= CSV.open(source_filename, 'r', source_csv_options) { |csv| csv.first }.headers
|
100
|
+
end
|
101
|
+
|
102
|
+
def valid_headers?
|
103
|
+
(fields.keys - headers).empty?
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def init_csv_file(*args, extractor:, csv_options: {}, filename_field: nil, **kargs, &block)
|
110
|
+
self.extractor = extractor
|
111
|
+
@csv_options = self.class.default_csv_options.merge(csv_options)
|
112
|
+
@filename_field = filename_field
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
class DataTarget::CsvFile < Remi::DataSubject
|
121
|
+
include ::Remi::DataSubject::DataTarget
|
122
|
+
include ::Remi::DataSubject::CsvFile
|
123
|
+
|
124
|
+
default_csv_options[:row_sep] = "\n"
|
125
|
+
|
126
|
+
def initialize(*args, **kargs, &block)
|
127
|
+
super
|
128
|
+
init_csv_file(*args, **kargs, &block)
|
129
|
+
end
|
130
|
+
|
131
|
+
attr_reader :csv_options
|
132
|
+
|
133
|
+
# Public: Performs the load operation, regardless of whether it has
|
134
|
+
# already executed.
|
135
|
+
#
|
136
|
+
# Returns true if the load operation was successful
|
137
|
+
def load!
|
138
|
+
@logger.info "Writing CSV file #{@path}"
|
139
|
+
df.write_csv @path, @csv_options
|
140
|
+
true
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
def init_csv_file(*args, path:, csv_options: {}, **kargs, &block)
|
147
|
+
@path = path
|
148
|
+
@csv_options = self.class.default_csv_options.merge(csv_options)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|