remi 0.2.42 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +13 -26
- data/README.md +1 -1
- data/features/step_definitions/remi_step.rb +33 -13
- data/features/sub_job_example.feature +24 -0
- data/features/sub_transform_example.feature +35 -0
- data/features/sub_transform_many_to_many.feature +49 -0
- data/features/support/env_app.rb +1 -1
- data/jobs/all_jobs_shared.rb +19 -16
- data/jobs/copy_source_job.rb +11 -9
- data/jobs/csv_file_target_job.rb +10 -9
- data/jobs/json_job.rb +18 -14
- data/jobs/metadata_job.rb +33 -28
- data/jobs/parameters_job.rb +14 -11
- data/jobs/sample_job.rb +106 -77
- data/jobs/sftp_file_target_job.rb +14 -13
- data/jobs/sub_job_example_job.rb +86 -0
- data/jobs/sub_transform_example_job.rb +43 -0
- data/jobs/sub_transform_many_to_many_job.rb +46 -0
- data/jobs/transforms/concatenate_job.rb +16 -12
- data/jobs/transforms/data_frame_sieve_job.rb +24 -19
- data/jobs/transforms/date_diff_job.rb +15 -11
- data/jobs/transforms/nvl_job.rb +16 -12
- data/jobs/transforms/parse_date_job.rb +17 -14
- data/jobs/transforms/partitioner_job.rb +27 -19
- data/jobs/transforms/prefix_job.rb +13 -10
- data/jobs/transforms/truncate_job.rb +14 -10
- data/jobs/transforms/truthy_job.rb +11 -8
- data/lib/remi.rb +25 -11
- data/lib/remi/data_frame.rb +4 -4
- data/lib/remi/data_frame/daru.rb +1 -37
- data/lib/remi/data_subject.rb +234 -48
- data/lib/remi/data_subjects/csv_file.rb +171 -0
- data/lib/remi/data_subjects/data_frame.rb +106 -0
- data/lib/remi/data_subjects/file_system.rb +115 -0
- data/lib/remi/data_subjects/local_file.rb +109 -0
- data/lib/remi/data_subjects/none.rb +31 -0
- data/lib/remi/data_subjects/postgres.rb +186 -0
- data/lib/remi/data_subjects/s3_file.rb +84 -0
- data/lib/remi/data_subjects/salesforce.rb +211 -0
- data/lib/remi/data_subjects/sftp_file.rb +196 -0
- data/lib/remi/data_subjects/sub_job.rb +50 -0
- data/lib/remi/dsl.rb +74 -0
- data/lib/remi/encoder.rb +45 -0
- data/lib/remi/extractor.rb +21 -0
- data/lib/remi/field_symbolizers.rb +1 -0
- data/lib/remi/job.rb +279 -113
- data/lib/remi/job/parameters.rb +90 -0
- data/lib/remi/job/sub_job.rb +35 -0
- data/lib/remi/job/transform.rb +165 -0
- data/lib/remi/loader.rb +22 -0
- data/lib/remi/monkeys/daru.rb +4 -0
- data/lib/remi/parser.rb +44 -0
- data/lib/remi/testing/business_rules.rb +17 -23
- data/lib/remi/testing/data_stub.rb +2 -2
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +3 -0
- data/spec/data_subject_spec.rb +475 -11
- data/spec/data_subjects/csv_file_spec.rb +69 -0
- data/spec/data_subjects/data_frame_spec.rb +52 -0
- data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
- data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
- data/spec/data_subjects/none_spec.rb +41 -0
- data/spec/data_subjects/postgres_spec.rb +80 -0
- data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
- data/spec/data_subjects/salesforce_spec.rb +117 -0
- data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
- data/spec/data_subjects/sub_job_spec.rb +33 -0
- data/spec/encoder_spec.rb +38 -0
- data/spec/extractor_spec.rb +11 -0
- data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
- data/spec/job/transform_spec.rb +257 -0
- data/spec/job_spec.rb +507 -0
- data/spec/loader_spec.rb +11 -0
- data/spec/parser_spec.rb +38 -0
- data/spec/sf_bulk_helper_spec.rb +117 -0
- data/spec/testing/data_stub_spec.rb +5 -3
- metadata +109 -27
- data/features/aggregate.feature +0 -42
- data/jobs/aggregate_job.rb +0 -31
- data/jobs/transforms/transform_jobs.rb +0 -4
- data/lib/remi/data_subject/csv_file.rb +0 -162
- data/lib/remi/data_subject/data_frame.rb +0 -52
- data/lib/remi/data_subject/postgres.rb +0 -134
- data/lib/remi/data_subject/salesforce.rb +0 -136
- data/lib/remi/data_subject/sftp_file.rb +0 -65
- data/lib/remi/extractor/file_system.rb +0 -92
- data/lib/remi/extractor/local_file.rb +0 -43
- data/lib/remi/extractor/s3_file.rb +0 -57
- data/lib/remi/extractor/sftp_file.rb +0 -83
- data/spec/data_subject/csv_file_spec.rb +0 -79
- data/spec/data_subject/data_frame.rb +0 -27
data/lib/remi.rb
CHANGED
@@ -36,7 +36,11 @@ require 'active_support/core_ext/time/calculations'
|
|
36
36
|
require 'remi/version.rb'
|
37
37
|
|
38
38
|
require 'remi/settings'
|
39
|
+
require 'remi/dsl'
|
39
40
|
require 'remi/job'
|
41
|
+
require 'remi/job/parameters'
|
42
|
+
require 'remi/job/sub_job'
|
43
|
+
require 'remi/job/transform'
|
40
44
|
require 'remi/source_to_target_map'
|
41
45
|
require 'remi/source_to_target_map/map'
|
42
46
|
require 'remi/source_to_target_map/row'
|
@@ -44,21 +48,31 @@ require 'remi/field_symbolizers'
|
|
44
48
|
|
45
49
|
require 'remi/refinements/symbolizer'
|
46
50
|
|
47
|
-
require 'remi/extractor
|
48
|
-
require 'remi/
|
49
|
-
require 'remi/
|
50
|
-
require 'remi/
|
51
|
+
require 'remi/extractor'
|
52
|
+
require 'remi/parser'
|
53
|
+
require 'remi/encoder'
|
54
|
+
require 'remi/loader'
|
51
55
|
|
56
|
+
require 'remi/data_subject'
|
57
|
+
require 'remi/data_subjects/file_system'
|
58
|
+
require 'remi/data_subjects/local_file'
|
59
|
+
require 'remi/data_subjects/sftp_file'
|
60
|
+
require 'remi/data_subjects/s3_file'
|
61
|
+
require 'remi/data_subjects/csv_file'
|
62
|
+
#require 'remi/data_subjects/salesforce' # intentionally not included by default
|
63
|
+
require 'remi/data_subjects/postgres'
|
64
|
+
require 'remi/data_subjects/data_frame'
|
65
|
+
require 'remi/data_subjects/none'
|
66
|
+
require 'remi/data_subjects/sub_job'
|
52
67
|
|
53
68
|
require 'remi/fields'
|
54
69
|
require 'remi/data_frame'
|
55
70
|
require 'remi/data_frame/daru'
|
56
71
|
|
57
|
-
require 'remi/data_subject'
|
58
|
-
require 'remi/data_subject/csv_file'
|
59
|
-
#require 'remi/data_subject/salesforce' # intentionally not included by default
|
60
|
-
require 'remi/data_subject/postgres'
|
61
|
-
require 'remi/data_subject/sftp_file'
|
62
|
-
require 'remi/data_subject/data_frame'
|
63
|
-
|
64
72
|
require 'remi/transform'
|
73
|
+
|
74
|
+
require 'remi/monkeys/daru'
|
75
|
+
|
76
|
+
# Remi is Ruby Extract Modify and Integrate, a framework for writing ETL job in Ruby.
|
77
|
+
module Remi
|
78
|
+
end
|
data/lib/remi/data_frame.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Remi
|
2
2
|
module DataFrame
|
3
3
|
class << self
|
4
|
-
def create(
|
5
|
-
dataframe = case
|
4
|
+
def create(df_type = :daru, *args, **kargs, &block)
|
5
|
+
dataframe = case df_type
|
6
6
|
when :daru
|
7
7
|
Remi::DataFrame::Daru.new(*args, **kargs, &block)
|
8
8
|
else
|
9
|
-
raise TypeError, "Unknown frame type: #{
|
9
|
+
raise TypeError, "Unknown frame type: #{df_type}"
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
@@ -29,7 +29,7 @@ module Remi
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# Public: Returns the type of DataFrame
|
32
|
-
def
|
32
|
+
def df_type
|
33
33
|
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
34
34
|
end
|
35
35
|
end
|
data/lib/remi/data_frame/daru.rb
CHANGED
@@ -13,7 +13,7 @@ module Remi
|
|
13
13
|
|
14
14
|
|
15
15
|
# Public: Returns the type of DataFrame
|
16
|
-
def
|
16
|
+
def df_type
|
17
17
|
:daru
|
18
18
|
end
|
19
19
|
|
@@ -26,42 +26,6 @@ module Remi
|
|
26
26
|
def self.from_hash_dump(filename)
|
27
27
|
Marshal.load(File.binread(filename))
|
28
28
|
end
|
29
|
-
|
30
|
-
# Public: Allows the user to define an arbitrary aggregation function.
|
31
|
-
#
|
32
|
-
# by - The name of the DataFrame vector to use to group records.
|
33
|
-
# func - A lambda function that accepts three arguments - the
|
34
|
-
# first argument is the DataFrame, the second is the
|
35
|
-
# key to the current group, and the third is the index
|
36
|
-
# of the elements belonging to a group.
|
37
|
-
#
|
38
|
-
# Example:
|
39
|
-
# df = Remi::DataFrame::Daru.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
|
40
|
-
#
|
41
|
-
# mymin = lambda do |vector, df, group_key, indices|
|
42
|
-
# values = indices.map { |idx| df.row[idx][vector] }
|
43
|
-
# "Group #{group_key} has a minimum value of #{values.min}"
|
44
|
-
# end
|
45
|
-
#
|
46
|
-
# df.aggregate(by: :a, func: mymin.curry.(:year))
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# Returns a Daru::Vector.
|
50
|
-
def aggregate(by:, func:)
|
51
|
-
grouped = self.group_by(by)
|
52
|
-
df_indices = self.index.to_a
|
53
|
-
::Daru::Vector.new(
|
54
|
-
grouped.groups.reduce({}) do |h, (key, indices)|
|
55
|
-
# Daru groups don't use the index of the dataframe when returning groups (WTF?).
|
56
|
-
# Instead they return the position of the record in the dataframe. Here, we
|
57
|
-
group_df_indices = indices.map { |v| df_indices[v] }
|
58
|
-
group_key = key.size == 1 ? key.first : key
|
59
|
-
h[group_key] = func.(self, group_key, group_df_indices)
|
60
|
-
h
|
61
|
-
end
|
62
|
-
)
|
63
|
-
end
|
64
|
-
|
65
29
|
end
|
66
30
|
end
|
67
31
|
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -1,45 +1,92 @@
|
|
1
1
|
module Remi
|
2
|
+
|
3
|
+
# The DataSubject is the parent class for DataSource and DataTarget. It is not intended
|
4
|
+
# to be used as a standalone class.
|
5
|
+
#
|
6
|
+
# A DataSubject is either a source or a target. It is largely used to associate
|
7
|
+
# a dataframe with a set of "fields" containing metadata describing how the vectors
|
8
|
+
# of the dataframe are meant to be interpreted. For example, one of the fields
|
9
|
+
# might represent a date with MM-DD-YYYY format.
|
10
|
+
#
|
11
|
+
# DataSubjects can be defined either using the standard `DataSubject.new(<args>)`
|
12
|
+
# convention, or through a DSL, which is convenient for data subjects defined
|
13
|
+
# in as part of job class definition.
|
2
14
|
class DataSubject
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
15
|
+
|
16
|
+
# @param context [Object] the context in which the DSL is evaluated
|
17
|
+
# @param name [Symbol,String] the name of the data subject
|
18
|
+
# @param block [Proc] a block of code to be executed to define the data subject
|
19
|
+
def initialize(context=nil, name: 'NOT DEFINED', **kargs, &block)
|
20
|
+
@context = context
|
21
|
+
@name = name
|
22
|
+
@block = block
|
23
|
+
@df_type = :daru
|
24
|
+
@fields = Remi::Fields.new
|
25
|
+
@field_symbolizer = Remi::FieldSymbolizers[:standard]
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_accessor :context, :name
|
29
|
+
|
30
|
+
|
31
|
+
# @param arg [Symbol] sets the type of dataframe to use for this subject
|
32
|
+
# @return [Symbol] the type of dataframe (defaults to `:daru` if not explicitly set)
|
33
|
+
def df_type(arg = nil)
|
34
|
+
return get_df_type unless arg
|
35
|
+
set_df_type arg
|
7
36
|
end
|
8
37
|
|
9
|
-
#
|
10
|
-
|
38
|
+
# @param arg [Hash, Remi::Fields] set the field metadata for this data subject
|
39
|
+
# @return [Remi::Fields] the field metadata for this data subject
|
40
|
+
def fields(arg = nil)
|
41
|
+
return get_fields unless arg
|
42
|
+
set_fields arg
|
43
|
+
end
|
11
44
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
45
|
+
# @param arg [Hash, Remi::Fields] set the field metadata for this data subject
|
46
|
+
# @return [Remi::Fields] the field metadata for this data subject
|
47
|
+
def fields=(arg)
|
48
|
+
@fields = Remi::Fields.new(arg)
|
15
49
|
end
|
16
50
|
|
17
|
-
#
|
51
|
+
# Field symbolizer used to convert field names into symbols. This method sets
|
52
|
+
# the symbolizer for the data subject and also sets the symbolizers for
|
53
|
+
# any associated parser and encoders.
|
18
54
|
#
|
19
|
-
#
|
55
|
+
# @return [Proc] the method for symbolizing field names
|
56
|
+
def field_symbolizer(arg = nil)
|
57
|
+
return @field_symbolizer unless arg
|
58
|
+
@field_symbolizer = if arg.is_a? Symbol
|
59
|
+
Remi::FieldSymbolizers[arg]
|
60
|
+
else
|
61
|
+
arg
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [Remi::DataFrame] the dataframe associated with this DataSubject
|
20
66
|
def df
|
21
|
-
@dataframe ||= Remi::DataFrame.create(
|
67
|
+
@dataframe ||= Remi::DataFrame.create(df_type, [], order: fields.keys)
|
22
68
|
end
|
23
69
|
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
70
|
+
# Reassigns the dataframe associated with this DataSubject.
|
71
|
+
# @param new_dataframe [Object] The new dataframe object to be associated.
|
72
|
+
# @return [Remi::DataFrame] the associated dataframe
|
27
73
|
def df=(new_dataframe)
|
28
|
-
if new_dataframe.respond_to? :
|
74
|
+
if new_dataframe.respond_to? :df_type
|
29
75
|
@dataframe = new_dataframe
|
30
76
|
else
|
31
|
-
@dataframe = Remi::DataFrame.create(
|
77
|
+
@dataframe = Remi::DataFrame.create(df_type, new_dataframe)
|
32
78
|
end
|
33
79
|
end
|
34
80
|
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
81
|
+
# Enforces the types defined in the field metadata. Throws an
|
82
|
+
# error if a data element does not conform to the type. For
|
83
|
+
# example, if a field has metadata with type: :date, then the type
|
84
|
+
# enforcer will convert data in that field into a date, and will
|
38
85
|
# throw an error if it is unable to parse any of the values.
|
39
86
|
#
|
40
|
-
# types
|
41
|
-
#
|
42
|
-
#
|
87
|
+
# @param types [Array<Symbol>] a list of metadata types to use to enforce. If none are given,
|
88
|
+
# all types are enforced.
|
89
|
+
# @return [self]
|
43
90
|
def enforce_types(*types)
|
44
91
|
sttm = SourceToTargetMap.new(df, source_metadata: fields)
|
45
92
|
fields.keys.each do |field|
|
@@ -47,63 +94,202 @@ module Remi
|
|
47
94
|
sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
|
48
95
|
end
|
49
96
|
|
50
|
-
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
# Defines the subject using the DSL in the block provided
|
101
|
+
#
|
102
|
+
# @return [self]
|
103
|
+
def dsl_eval
|
104
|
+
dsl_eval! unless @dsl_evaluated
|
105
|
+
@dsl_evaluated = true
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
def dsl_eval!
|
110
|
+
return self unless @block
|
111
|
+
Dsl.dsl_eval(self, @context, &@block)
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def set_fields(arg)
|
117
|
+
self.fields = arg
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_fields
|
121
|
+
dsl_eval
|
122
|
+
@fields
|
123
|
+
end
|
124
|
+
|
125
|
+
def set_df_type(arg)
|
126
|
+
@df_type = arg
|
127
|
+
end
|
128
|
+
|
129
|
+
def get_df_type
|
130
|
+
dsl_eval
|
131
|
+
@df_type
|
51
132
|
end
|
52
133
|
end
|
53
134
|
|
54
135
|
|
136
|
+
|
137
|
+
# The DataSource is a DataSubject meant to extract data from an external source
|
138
|
+
# and convert (parse) it into a dataframe.
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
#
|
142
|
+
# my_data_source = DataSource.new do
|
143
|
+
# extractor some_extractor
|
144
|
+
# parser some_parser
|
145
|
+
# end
|
146
|
+
#
|
147
|
+
# my_data_source.df #=> Returns a dataframe that is created by extracting data
|
148
|
+
# # from some_extractor and parsing it using some_parser.
|
55
149
|
class DataSource < DataSubject
|
56
150
|
|
57
|
-
|
151
|
+
def initialize(*args, **kargs, &block)
|
152
|
+
@parser = Parser::None.new
|
153
|
+
@parser.context = self
|
154
|
+
super
|
155
|
+
end
|
156
|
+
|
157
|
+
# @return [Array] the list of extractors that are defined for this data source
|
158
|
+
def extractors
|
159
|
+
@extractors ||= []
|
160
|
+
end
|
161
|
+
|
162
|
+
# @param obj [Object] adds an extractor object to the list of extractors
|
163
|
+
# @return [Array] the full list of extractors
|
164
|
+
def extractor(obj)
|
165
|
+
extractors << obj unless extractors.include? obj
|
166
|
+
end
|
167
|
+
|
168
|
+
# @param obj [Object] sets the parser for this data source
|
169
|
+
# @return [Object] the parser set for this data source
|
170
|
+
def parser(obj = nil)
|
171
|
+
return @parser unless obj
|
172
|
+
obj.context = self
|
173
|
+
|
174
|
+
@parser = obj
|
175
|
+
end
|
176
|
+
|
177
|
+
# Extracts data from all of the extractors.
|
178
|
+
# @return [Array] the result of each extractor
|
179
|
+
def extract!
|
180
|
+
extractors.map { |e| e.extract }
|
181
|
+
end
|
182
|
+
|
183
|
+
# Converts all of the extracted data to a dataframe
|
184
|
+
# @return [Remi::DataFrame]
|
185
|
+
def parse
|
186
|
+
parser.parse *extract
|
187
|
+
end
|
188
|
+
|
189
|
+
# The dataframe will only be extracted and parsed once, and only if it
|
190
|
+
# has not already been set (e.g., using #df=).
|
58
191
|
#
|
59
|
-
#
|
192
|
+
# @return [Remi::DataFrame] the dataframe associated with this DataSubject
|
60
193
|
def df
|
61
|
-
@dataframe ||=
|
194
|
+
@dataframe ||= parsed_as_dataframe
|
62
195
|
end
|
63
196
|
|
64
|
-
#
|
197
|
+
# This clears any previously extracted and parsed results.
|
198
|
+
# A subsequent call to #df will redo the extract and parse.
|
199
|
+
#
|
200
|
+
# @return [Remi::DataFrame] the dataframe associated with this DataSubject
|
201
|
+
def reset
|
202
|
+
@block = nil
|
203
|
+
@dataframe = nil
|
204
|
+
@extract = nil
|
205
|
+
end
|
206
|
+
|
207
|
+
# @return [Array<Object>] all of the data extracted from the extractors (memoized).
|
65
208
|
def extract
|
66
209
|
@extract ||= extract!
|
67
210
|
end
|
68
211
|
|
69
|
-
# Public: Called to extract data from the source.
|
70
|
-
#
|
71
|
-
# Returns data in a format that can be used to create a dataframe.
|
72
|
-
def extract!
|
73
|
-
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
74
|
-
@extract
|
75
|
-
end
|
76
212
|
|
77
|
-
|
78
|
-
|
79
|
-
#
|
80
|
-
|
81
|
-
|
213
|
+
private
|
214
|
+
|
215
|
+
# Runs the DSL definitions and all extracts, parses, and enforced types
|
216
|
+
# @return [Remi::DataFrame] the source extracted and parsed as a dataframe
|
217
|
+
def parsed_as_dataframe
|
218
|
+
dsl_eval if @block
|
219
|
+
dataframe = parse
|
220
|
+
dataframe
|
82
221
|
end
|
83
222
|
end
|
84
223
|
|
85
224
|
|
225
|
+
# The DataTarget is a DataSubject meant to load data from an associated dataframe
|
226
|
+
# into one or more target systems.
|
227
|
+
#
|
228
|
+
# @example
|
229
|
+
#
|
230
|
+
# my_data_target = DataTarget.new do
|
231
|
+
# encoder some_encoder
|
232
|
+
# loader some_loader
|
233
|
+
# end
|
234
|
+
#
|
235
|
+
# my_data_target.df = some_great_dataframe
|
236
|
+
# my_data_target.load #=> loads data from the dataframe into some target defined by some_loader
|
86
237
|
class DataTarget < DataSubject
|
87
238
|
|
88
|
-
|
239
|
+
def initialize(*args, **kargs, &block)
|
240
|
+
@encoder = Encoder::None.new
|
241
|
+
@encoder.context = self
|
242
|
+
super
|
243
|
+
end
|
244
|
+
|
245
|
+
# @param obj [Object] sets the encoder for this data target
|
246
|
+
# @return [Object] the encoder set for this data source
|
247
|
+
def encoder(obj = nil)
|
248
|
+
return @encoder unless obj
|
249
|
+
obj.context = self
|
250
|
+
|
251
|
+
@encoder = obj
|
252
|
+
end
|
253
|
+
|
254
|
+
# @return [Array] the list of loaders associated with the this data target
|
255
|
+
def loaders
|
256
|
+
@loaders ||= []
|
257
|
+
end
|
258
|
+
|
259
|
+
# @param obj [Object] adds a loader object to the list of loaders
|
260
|
+
# @return [Array] the full list of loaders
|
261
|
+
def loader(obj)
|
262
|
+
loaders << obj unless loaders.include? obj
|
263
|
+
end
|
264
|
+
|
265
|
+
# Loads data to all targets. This is automatically called
|
89
266
|
# after all transforms have executed, but could also get called manually.
|
90
267
|
# The actual load operation is only executed if hasn't already.
|
91
268
|
#
|
92
|
-
#
|
269
|
+
# @return [true] if successful
|
93
270
|
def load
|
94
|
-
return
|
271
|
+
return nil if @loaded || df.size == 0
|
272
|
+
dsl_eval if @block
|
95
273
|
|
96
|
-
|
274
|
+
load!
|
275
|
+
@loaded = true
|
97
276
|
end
|
98
277
|
|
99
|
-
#
|
278
|
+
# Performs the load operation, regardless of whether it has
|
100
279
|
# already executed.
|
101
280
|
#
|
102
|
-
#
|
281
|
+
# @return [nil] nothing
|
103
282
|
def load!
|
104
|
-
|
283
|
+
loaders.each { |l| l.load encoded_dataframe }
|
284
|
+
true
|
285
|
+
end
|
286
|
+
|
287
|
+
private
|
105
288
|
|
106
|
-
|
289
|
+
# @return [Object] the encoded data suitable for the loaders
|
290
|
+
def encoded_dataframe
|
291
|
+
@encoded_dataframe ||= encoder.encode df
|
107
292
|
end
|
293
|
+
|
108
294
|
end
|
109
295
|
end
|