remi 0.2.42 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +13 -26
- data/README.md +1 -1
- data/features/step_definitions/remi_step.rb +33 -13
- data/features/sub_job_example.feature +24 -0
- data/features/sub_transform_example.feature +35 -0
- data/features/sub_transform_many_to_many.feature +49 -0
- data/features/support/env_app.rb +1 -1
- data/jobs/all_jobs_shared.rb +19 -16
- data/jobs/copy_source_job.rb +11 -9
- data/jobs/csv_file_target_job.rb +10 -9
- data/jobs/json_job.rb +18 -14
- data/jobs/metadata_job.rb +33 -28
- data/jobs/parameters_job.rb +14 -11
- data/jobs/sample_job.rb +106 -77
- data/jobs/sftp_file_target_job.rb +14 -13
- data/jobs/sub_job_example_job.rb +86 -0
- data/jobs/sub_transform_example_job.rb +43 -0
- data/jobs/sub_transform_many_to_many_job.rb +46 -0
- data/jobs/transforms/concatenate_job.rb +16 -12
- data/jobs/transforms/data_frame_sieve_job.rb +24 -19
- data/jobs/transforms/date_diff_job.rb +15 -11
- data/jobs/transforms/nvl_job.rb +16 -12
- data/jobs/transforms/parse_date_job.rb +17 -14
- data/jobs/transforms/partitioner_job.rb +27 -19
- data/jobs/transforms/prefix_job.rb +13 -10
- data/jobs/transforms/truncate_job.rb +14 -10
- data/jobs/transforms/truthy_job.rb +11 -8
- data/lib/remi.rb +25 -11
- data/lib/remi/data_frame.rb +4 -4
- data/lib/remi/data_frame/daru.rb +1 -37
- data/lib/remi/data_subject.rb +234 -48
- data/lib/remi/data_subjects/csv_file.rb +171 -0
- data/lib/remi/data_subjects/data_frame.rb +106 -0
- data/lib/remi/data_subjects/file_system.rb +115 -0
- data/lib/remi/data_subjects/local_file.rb +109 -0
- data/lib/remi/data_subjects/none.rb +31 -0
- data/lib/remi/data_subjects/postgres.rb +186 -0
- data/lib/remi/data_subjects/s3_file.rb +84 -0
- data/lib/remi/data_subjects/salesforce.rb +211 -0
- data/lib/remi/data_subjects/sftp_file.rb +196 -0
- data/lib/remi/data_subjects/sub_job.rb +50 -0
- data/lib/remi/dsl.rb +74 -0
- data/lib/remi/encoder.rb +45 -0
- data/lib/remi/extractor.rb +21 -0
- data/lib/remi/field_symbolizers.rb +1 -0
- data/lib/remi/job.rb +279 -113
- data/lib/remi/job/parameters.rb +90 -0
- data/lib/remi/job/sub_job.rb +35 -0
- data/lib/remi/job/transform.rb +165 -0
- data/lib/remi/loader.rb +22 -0
- data/lib/remi/monkeys/daru.rb +4 -0
- data/lib/remi/parser.rb +44 -0
- data/lib/remi/testing/business_rules.rb +17 -23
- data/lib/remi/testing/data_stub.rb +2 -2
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +3 -0
- data/spec/data_subject_spec.rb +475 -11
- data/spec/data_subjects/csv_file_spec.rb +69 -0
- data/spec/data_subjects/data_frame_spec.rb +52 -0
- data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
- data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
- data/spec/data_subjects/none_spec.rb +41 -0
- data/spec/data_subjects/postgres_spec.rb +80 -0
- data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
- data/spec/data_subjects/salesforce_spec.rb +117 -0
- data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
- data/spec/data_subjects/sub_job_spec.rb +33 -0
- data/spec/encoder_spec.rb +38 -0
- data/spec/extractor_spec.rb +11 -0
- data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
- data/spec/job/transform_spec.rb +257 -0
- data/spec/job_spec.rb +507 -0
- data/spec/loader_spec.rb +11 -0
- data/spec/parser_spec.rb +38 -0
- data/spec/sf_bulk_helper_spec.rb +117 -0
- data/spec/testing/data_stub_spec.rb +5 -3
- metadata +109 -27
- data/features/aggregate.feature +0 -42
- data/jobs/aggregate_job.rb +0 -31
- data/jobs/transforms/transform_jobs.rb +0 -4
- data/lib/remi/data_subject/csv_file.rb +0 -162
- data/lib/remi/data_subject/data_frame.rb +0 -52
- data/lib/remi/data_subject/postgres.rb +0 -134
- data/lib/remi/data_subject/salesforce.rb +0 -136
- data/lib/remi/data_subject/sftp_file.rb +0 -65
- data/lib/remi/extractor/file_system.rb +0 -92
- data/lib/remi/extractor/local_file.rb +0 -43
- data/lib/remi/extractor/s3_file.rb +0 -57
- data/lib/remi/extractor/sftp_file.rb +0 -83
- data/spec/data_subject/csv_file_spec.rb +0 -79
- data/spec/data_subject/data_frame.rb +0 -27
data/lib/remi.rb
CHANGED
@@ -36,7 +36,11 @@ require 'active_support/core_ext/time/calculations'
|
|
36
36
|
require 'remi/version.rb'
|
37
37
|
|
38
38
|
require 'remi/settings'
|
39
|
+
require 'remi/dsl'
|
39
40
|
require 'remi/job'
|
41
|
+
require 'remi/job/parameters'
|
42
|
+
require 'remi/job/sub_job'
|
43
|
+
require 'remi/job/transform'
|
40
44
|
require 'remi/source_to_target_map'
|
41
45
|
require 'remi/source_to_target_map/map'
|
42
46
|
require 'remi/source_to_target_map/row'
|
@@ -44,21 +48,31 @@ require 'remi/field_symbolizers'
|
|
44
48
|
|
45
49
|
require 'remi/refinements/symbolizer'
|
46
50
|
|
47
|
-
require 'remi/extractor
|
48
|
-
require 'remi/
|
49
|
-
require 'remi/
|
50
|
-
require 'remi/
|
51
|
+
require 'remi/extractor'
|
52
|
+
require 'remi/parser'
|
53
|
+
require 'remi/encoder'
|
54
|
+
require 'remi/loader'
|
51
55
|
|
56
|
+
require 'remi/data_subject'
|
57
|
+
require 'remi/data_subjects/file_system'
|
58
|
+
require 'remi/data_subjects/local_file'
|
59
|
+
require 'remi/data_subjects/sftp_file'
|
60
|
+
require 'remi/data_subjects/s3_file'
|
61
|
+
require 'remi/data_subjects/csv_file'
|
62
|
+
#require 'remi/data_subjects/salesforce' # intentionally not included by default
|
63
|
+
require 'remi/data_subjects/postgres'
|
64
|
+
require 'remi/data_subjects/data_frame'
|
65
|
+
require 'remi/data_subjects/none'
|
66
|
+
require 'remi/data_subjects/sub_job'
|
52
67
|
|
53
68
|
require 'remi/fields'
|
54
69
|
require 'remi/data_frame'
|
55
70
|
require 'remi/data_frame/daru'
|
56
71
|
|
57
|
-
require 'remi/data_subject'
|
58
|
-
require 'remi/data_subject/csv_file'
|
59
|
-
#require 'remi/data_subject/salesforce' # intentionally not included by default
|
60
|
-
require 'remi/data_subject/postgres'
|
61
|
-
require 'remi/data_subject/sftp_file'
|
62
|
-
require 'remi/data_subject/data_frame'
|
63
|
-
|
64
72
|
require 'remi/transform'
|
73
|
+
|
74
|
+
require 'remi/monkeys/daru'
|
75
|
+
|
76
|
+
# Remi is Ruby Extract Modify and Integrate, a framework for writing ETL job in Ruby.
|
77
|
+
module Remi
|
78
|
+
end
|
data/lib/remi/data_frame.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Remi
|
2
2
|
module DataFrame
|
3
3
|
class << self
|
4
|
-
def create(
|
5
|
-
dataframe = case
|
4
|
+
def create(df_type = :daru, *args, **kargs, &block)
|
5
|
+
dataframe = case df_type
|
6
6
|
when :daru
|
7
7
|
Remi::DataFrame::Daru.new(*args, **kargs, &block)
|
8
8
|
else
|
9
|
-
raise TypeError, "Unknown frame type: #{
|
9
|
+
raise TypeError, "Unknown frame type: #{df_type}"
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
@@ -29,7 +29,7 @@ module Remi
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# Public: Returns the type of DataFrame
|
32
|
-
def
|
32
|
+
def df_type
|
33
33
|
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
34
34
|
end
|
35
35
|
end
|
data/lib/remi/data_frame/daru.rb
CHANGED
@@ -13,7 +13,7 @@ module Remi
|
|
13
13
|
|
14
14
|
|
15
15
|
# Public: Returns the type of DataFrame
|
16
|
-
def
|
16
|
+
def df_type
|
17
17
|
:daru
|
18
18
|
end
|
19
19
|
|
@@ -26,42 +26,6 @@ module Remi
|
|
26
26
|
def self.from_hash_dump(filename)
|
27
27
|
Marshal.load(File.binread(filename))
|
28
28
|
end
|
29
|
-
|
30
|
-
# Public: Allows the user to define an arbitrary aggregation function.
|
31
|
-
#
|
32
|
-
# by - The name of the DataFrame vector to use to group records.
|
33
|
-
# func - A lambda function that accepts three arguments - the
|
34
|
-
# first argument is the DataFrame, the second is the
|
35
|
-
# key to the current group, and the third is the index
|
36
|
-
# of the elements belonging to a group.
|
37
|
-
#
|
38
|
-
# Example:
|
39
|
-
# df = Remi::DataFrame::Daru.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] })
|
40
|
-
#
|
41
|
-
# mymin = lambda do |vector, df, group_key, indices|
|
42
|
-
# values = indices.map { |idx| df.row[idx][vector] }
|
43
|
-
# "Group #{group_key} has a minimum value of #{values.min}"
|
44
|
-
# end
|
45
|
-
#
|
46
|
-
# df.aggregate(by: :a, func: mymin.curry.(:year))
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# Returns a Daru::Vector.
|
50
|
-
def aggregate(by:, func:)
|
51
|
-
grouped = self.group_by(by)
|
52
|
-
df_indices = self.index.to_a
|
53
|
-
::Daru::Vector.new(
|
54
|
-
grouped.groups.reduce({}) do |h, (key, indices)|
|
55
|
-
# Daru groups don't use the index of the dataframe when returning groups (WTF?).
|
56
|
-
# Instead they return the position of the record in the dataframe. Here, we
|
57
|
-
group_df_indices = indices.map { |v| df_indices[v] }
|
58
|
-
group_key = key.size == 1 ? key.first : key
|
59
|
-
h[group_key] = func.(self, group_key, group_df_indices)
|
60
|
-
h
|
61
|
-
end
|
62
|
-
)
|
63
|
-
end
|
64
|
-
|
65
29
|
end
|
66
30
|
end
|
67
31
|
end
|
data/lib/remi/data_subject.rb
CHANGED
@@ -1,45 +1,92 @@
|
|
1
1
|
module Remi
|
2
|
+
|
3
|
+
# The DataSubject is the parent class for DataSource and DataTarget. It is not intended
|
4
|
+
# to be used as a standalone class.
|
5
|
+
#
|
6
|
+
# A DataSubject is either a source or a target. It is largely used to associate
|
7
|
+
# a dataframe with a set of "fields" containing metadata describing how the vectors
|
8
|
+
# of the dataframe are meant to be interpreted. For example, one of the fields
|
9
|
+
# might represent a date with MM-DD-YYYY format.
|
10
|
+
#
|
11
|
+
# DataSubjects can be defined either using the standard `DataSubject.new(<args>)`
|
12
|
+
# convention, or through a DSL, which is convenient for data subjects defined
|
13
|
+
# in as part of job class definition.
|
2
14
|
class DataSubject
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
15
|
+
|
16
|
+
# @param context [Object] the context in which the DSL is evaluated
|
17
|
+
# @param name [Symbol,String] the name of the data subject
|
18
|
+
# @param block [Proc] a block of code to be executed to define the data subject
|
19
|
+
def initialize(context=nil, name: 'NOT DEFINED', **kargs, &block)
|
20
|
+
@context = context
|
21
|
+
@name = name
|
22
|
+
@block = block
|
23
|
+
@df_type = :daru
|
24
|
+
@fields = Remi::Fields.new
|
25
|
+
@field_symbolizer = Remi::FieldSymbolizers[:standard]
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_accessor :context, :name
|
29
|
+
|
30
|
+
|
31
|
+
# @param arg [Symbol] sets the type of dataframe to use for this subject
|
32
|
+
# @return [Symbol] the type of dataframe (defaults to `:daru` if not explicitly set)
|
33
|
+
def df_type(arg = nil)
|
34
|
+
return get_df_type unless arg
|
35
|
+
set_df_type arg
|
7
36
|
end
|
8
37
|
|
9
|
-
#
|
10
|
-
|
38
|
+
# @param arg [Hash, Remi::Fields] set the field metadata for this data subject
|
39
|
+
# @return [Remi::Fields] the field metadata for this data subject
|
40
|
+
def fields(arg = nil)
|
41
|
+
return get_fields unless arg
|
42
|
+
set_fields arg
|
43
|
+
end
|
11
44
|
|
12
|
-
#
|
13
|
-
|
14
|
-
|
45
|
+
# @param arg [Hash, Remi::Fields] set the field metadata for this data subject
|
46
|
+
# @return [Remi::Fields] the field metadata for this data subject
|
47
|
+
def fields=(arg)
|
48
|
+
@fields = Remi::Fields.new(arg)
|
15
49
|
end
|
16
50
|
|
17
|
-
#
|
51
|
+
# Field symbolizer used to convert field names into symbols. This method sets
|
52
|
+
# the symbolizer for the data subject and also sets the symbolizers for
|
53
|
+
# any associated parser and encoders.
|
18
54
|
#
|
19
|
-
#
|
55
|
+
# @return [Proc] the method for symbolizing field names
|
56
|
+
def field_symbolizer(arg = nil)
|
57
|
+
return @field_symbolizer unless arg
|
58
|
+
@field_symbolizer = if arg.is_a? Symbol
|
59
|
+
Remi::FieldSymbolizers[arg]
|
60
|
+
else
|
61
|
+
arg
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# @return [Remi::DataFrame] the dataframe associated with this DataSubject
|
20
66
|
def df
|
21
|
-
@dataframe ||= Remi::DataFrame.create(
|
67
|
+
@dataframe ||= Remi::DataFrame.create(df_type, [], order: fields.keys)
|
22
68
|
end
|
23
69
|
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
70
|
+
# Reassigns the dataframe associated with this DataSubject.
|
71
|
+
# @param new_dataframe [Object] The new dataframe object to be associated.
|
72
|
+
# @return [Remi::DataFrame] the associated dataframe
|
27
73
|
def df=(new_dataframe)
|
28
|
-
if new_dataframe.respond_to? :
|
74
|
+
if new_dataframe.respond_to? :df_type
|
29
75
|
@dataframe = new_dataframe
|
30
76
|
else
|
31
|
-
@dataframe = Remi::DataFrame.create(
|
77
|
+
@dataframe = Remi::DataFrame.create(df_type, new_dataframe)
|
32
78
|
end
|
33
79
|
end
|
34
80
|
|
35
|
-
#
|
36
|
-
#
|
37
|
-
#
|
81
|
+
# Enforces the types defined in the field metadata. Throws an
|
82
|
+
# error if a data element does not conform to the type. For
|
83
|
+
# example, if a field has metadata with type: :date, then the type
|
84
|
+
# enforcer will convert data in that field into a date, and will
|
38
85
|
# throw an error if it is unable to parse any of the values.
|
39
86
|
#
|
40
|
-
# types
|
41
|
-
#
|
42
|
-
#
|
87
|
+
# @param types [Array<Symbol>] a list of metadata types to use to enforce. If none are given,
|
88
|
+
# all types are enforced.
|
89
|
+
# @return [self]
|
43
90
|
def enforce_types(*types)
|
44
91
|
sttm = SourceToTargetMap.new(df, source_metadata: fields)
|
45
92
|
fields.keys.each do |field|
|
@@ -47,63 +94,202 @@ module Remi
|
|
47
94
|
sttm.source(field).target(field).transform(Remi::Transform::EnforceType.new).execute
|
48
95
|
end
|
49
96
|
|
50
|
-
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
# Defines the subject using the DSL in the block provided
|
101
|
+
#
|
102
|
+
# @return [self]
|
103
|
+
def dsl_eval
|
104
|
+
dsl_eval! unless @dsl_evaluated
|
105
|
+
@dsl_evaluated = true
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
def dsl_eval!
|
110
|
+
return self unless @block
|
111
|
+
Dsl.dsl_eval(self, @context, &@block)
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def set_fields(arg)
|
117
|
+
self.fields = arg
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_fields
|
121
|
+
dsl_eval
|
122
|
+
@fields
|
123
|
+
end
|
124
|
+
|
125
|
+
def set_df_type(arg)
|
126
|
+
@df_type = arg
|
127
|
+
end
|
128
|
+
|
129
|
+
def get_df_type
|
130
|
+
dsl_eval
|
131
|
+
@df_type
|
51
132
|
end
|
52
133
|
end
|
53
134
|
|
54
135
|
|
136
|
+
|
137
|
+
# The DataSource is a DataSubject meant to extract data from an external source
|
138
|
+
# and convert (parse) it into a dataframe.
|
139
|
+
#
|
140
|
+
# @example
|
141
|
+
#
|
142
|
+
# my_data_source = DataSource.new do
|
143
|
+
# extractor some_extractor
|
144
|
+
# parser some_parser
|
145
|
+
# end
|
146
|
+
#
|
147
|
+
# my_data_source.df #=> Returns a dataframe that is created by extracting data
|
148
|
+
# # from some_extractor and parsing it using some_parser.
|
55
149
|
class DataSource < DataSubject
|
56
150
|
|
57
|
-
|
151
|
+
def initialize(*args, **kargs, &block)
|
152
|
+
@parser = Parser::None.new
|
153
|
+
@parser.context = self
|
154
|
+
super
|
155
|
+
end
|
156
|
+
|
157
|
+
# @return [Array] the list of extractors that are defined for this data source
|
158
|
+
def extractors
|
159
|
+
@extractors ||= []
|
160
|
+
end
|
161
|
+
|
162
|
+
# @param obj [Object] adds an extractor object to the list of extractors
|
163
|
+
# @return [Array] the full list of extractors
|
164
|
+
def extractor(obj)
|
165
|
+
extractors << obj unless extractors.include? obj
|
166
|
+
end
|
167
|
+
|
168
|
+
# @param obj [Object] sets the parser for this data source
|
169
|
+
# @return [Object] the parser set for this data source
|
170
|
+
def parser(obj = nil)
|
171
|
+
return @parser unless obj
|
172
|
+
obj.context = self
|
173
|
+
|
174
|
+
@parser = obj
|
175
|
+
end
|
176
|
+
|
177
|
+
# Extracts data from all of the extractors.
|
178
|
+
# @return [Array] the result of each extractor
|
179
|
+
def extract!
|
180
|
+
extractors.map { |e| e.extract }
|
181
|
+
end
|
182
|
+
|
183
|
+
# Converts all of the extracted data to a dataframe
|
184
|
+
# @return [Remi::DataFrame]
|
185
|
+
def parse
|
186
|
+
parser.parse *extract
|
187
|
+
end
|
188
|
+
|
189
|
+
# The dataframe will only be extracted and parsed once, and only if it
|
190
|
+
# has not already been set (e.g., using #df=).
|
58
191
|
#
|
59
|
-
#
|
192
|
+
# @return [Remi::DataFrame] the dataframe associated with this DataSubject
|
60
193
|
def df
|
61
|
-
@dataframe ||=
|
194
|
+
@dataframe ||= parsed_as_dataframe
|
62
195
|
end
|
63
196
|
|
64
|
-
#
|
197
|
+
# This clears any previously extracted and parsed results.
|
198
|
+
# A subsequent call to #df will redo the extract and parse.
|
199
|
+
#
|
200
|
+
# @return [Remi::DataFrame] the dataframe associated with this DataSubject
|
201
|
+
def reset
|
202
|
+
@block = nil
|
203
|
+
@dataframe = nil
|
204
|
+
@extract = nil
|
205
|
+
end
|
206
|
+
|
207
|
+
# @return [Array<Object>] all of the data extracted from the extractors (memoized).
|
65
208
|
def extract
|
66
209
|
@extract ||= extract!
|
67
210
|
end
|
68
211
|
|
69
|
-
# Public: Called to extract data from the source.
|
70
|
-
#
|
71
|
-
# Returns data in a format that can be used to create a dataframe.
|
72
|
-
def extract!
|
73
|
-
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
74
|
-
@extract
|
75
|
-
end
|
76
212
|
|
77
|
-
|
78
|
-
|
79
|
-
#
|
80
|
-
|
81
|
-
|
213
|
+
private
|
214
|
+
|
215
|
+
# Runs the DSL definitions and all extracts, parses, and enforced types
|
216
|
+
# @return [Remi::DataFrame] the source extracted and parsed as a dataframe
|
217
|
+
def parsed_as_dataframe
|
218
|
+
dsl_eval if @block
|
219
|
+
dataframe = parse
|
220
|
+
dataframe
|
82
221
|
end
|
83
222
|
end
|
84
223
|
|
85
224
|
|
225
|
+
# The DataTarget is a DataSubject meant to load data from an associated dataframe
|
226
|
+
# into one or more target systems.
|
227
|
+
#
|
228
|
+
# @example
|
229
|
+
#
|
230
|
+
# my_data_target = DataTarget.new do
|
231
|
+
# encoder some_encoder
|
232
|
+
# loader some_loader
|
233
|
+
# end
|
234
|
+
#
|
235
|
+
# my_data_target.df = some_great_dataframe
|
236
|
+
# my_data_target.load #=> loads data from the dataframe into some target defined by some_loader
|
86
237
|
class DataTarget < DataSubject
|
87
238
|
|
88
|
-
|
239
|
+
def initialize(*args, **kargs, &block)
|
240
|
+
@encoder = Encoder::None.new
|
241
|
+
@encoder.context = self
|
242
|
+
super
|
243
|
+
end
|
244
|
+
|
245
|
+
# @param obj [Object] sets the encoder for this data target
|
246
|
+
# @return [Object] the encoder set for this data source
|
247
|
+
def encoder(obj = nil)
|
248
|
+
return @encoder unless obj
|
249
|
+
obj.context = self
|
250
|
+
|
251
|
+
@encoder = obj
|
252
|
+
end
|
253
|
+
|
254
|
+
# @return [Array] the list of loaders associated with the this data target
|
255
|
+
def loaders
|
256
|
+
@loaders ||= []
|
257
|
+
end
|
258
|
+
|
259
|
+
# @param obj [Object] adds a loader object to the list of loaders
|
260
|
+
# @return [Array] the full list of loaders
|
261
|
+
def loader(obj)
|
262
|
+
loaders << obj unless loaders.include? obj
|
263
|
+
end
|
264
|
+
|
265
|
+
# Loads data to all targets. This is automatically called
|
89
266
|
# after all transforms have executed, but could also get called manually.
|
90
267
|
# The actual load operation is only executed if hasn't already.
|
91
268
|
#
|
92
|
-
#
|
269
|
+
# @return [true] if successful
|
93
270
|
def load
|
94
|
-
return
|
271
|
+
return nil if @loaded || df.size == 0
|
272
|
+
dsl_eval if @block
|
95
273
|
|
96
|
-
|
274
|
+
load!
|
275
|
+
@loaded = true
|
97
276
|
end
|
98
277
|
|
99
|
-
#
|
278
|
+
# Performs the load operation, regardless of whether it has
|
100
279
|
# already executed.
|
101
280
|
#
|
102
|
-
#
|
281
|
+
# @return [nil] nothing
|
103
282
|
def load!
|
104
|
-
|
283
|
+
loaders.each { |l| l.load encoded_dataframe }
|
284
|
+
true
|
285
|
+
end
|
286
|
+
|
287
|
+
private
|
105
288
|
|
106
|
-
|
289
|
+
# @return [Object] the encoded data suitable for the loaders
|
290
|
+
def encoded_dataframe
|
291
|
+
@encoded_dataframe ||= encoder.encode df
|
107
292
|
end
|
293
|
+
|
108
294
|
end
|
109
295
|
end
|