remi 0.2.42 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +13 -26
- data/README.md +1 -1
- data/features/step_definitions/remi_step.rb +33 -13
- data/features/sub_job_example.feature +24 -0
- data/features/sub_transform_example.feature +35 -0
- data/features/sub_transform_many_to_many.feature +49 -0
- data/features/support/env_app.rb +1 -1
- data/jobs/all_jobs_shared.rb +19 -16
- data/jobs/copy_source_job.rb +11 -9
- data/jobs/csv_file_target_job.rb +10 -9
- data/jobs/json_job.rb +18 -14
- data/jobs/metadata_job.rb +33 -28
- data/jobs/parameters_job.rb +14 -11
- data/jobs/sample_job.rb +106 -77
- data/jobs/sftp_file_target_job.rb +14 -13
- data/jobs/sub_job_example_job.rb +86 -0
- data/jobs/sub_transform_example_job.rb +43 -0
- data/jobs/sub_transform_many_to_many_job.rb +46 -0
- data/jobs/transforms/concatenate_job.rb +16 -12
- data/jobs/transforms/data_frame_sieve_job.rb +24 -19
- data/jobs/transforms/date_diff_job.rb +15 -11
- data/jobs/transforms/nvl_job.rb +16 -12
- data/jobs/transforms/parse_date_job.rb +17 -14
- data/jobs/transforms/partitioner_job.rb +27 -19
- data/jobs/transforms/prefix_job.rb +13 -10
- data/jobs/transforms/truncate_job.rb +14 -10
- data/jobs/transforms/truthy_job.rb +11 -8
- data/lib/remi.rb +25 -11
- data/lib/remi/data_frame.rb +4 -4
- data/lib/remi/data_frame/daru.rb +1 -37
- data/lib/remi/data_subject.rb +234 -48
- data/lib/remi/data_subjects/csv_file.rb +171 -0
- data/lib/remi/data_subjects/data_frame.rb +106 -0
- data/lib/remi/data_subjects/file_system.rb +115 -0
- data/lib/remi/data_subjects/local_file.rb +109 -0
- data/lib/remi/data_subjects/none.rb +31 -0
- data/lib/remi/data_subjects/postgres.rb +186 -0
- data/lib/remi/data_subjects/s3_file.rb +84 -0
- data/lib/remi/data_subjects/salesforce.rb +211 -0
- data/lib/remi/data_subjects/sftp_file.rb +196 -0
- data/lib/remi/data_subjects/sub_job.rb +50 -0
- data/lib/remi/dsl.rb +74 -0
- data/lib/remi/encoder.rb +45 -0
- data/lib/remi/extractor.rb +21 -0
- data/lib/remi/field_symbolizers.rb +1 -0
- data/lib/remi/job.rb +279 -113
- data/lib/remi/job/parameters.rb +90 -0
- data/lib/remi/job/sub_job.rb +35 -0
- data/lib/remi/job/transform.rb +165 -0
- data/lib/remi/loader.rb +22 -0
- data/lib/remi/monkeys/daru.rb +4 -0
- data/lib/remi/parser.rb +44 -0
- data/lib/remi/testing/business_rules.rb +17 -23
- data/lib/remi/testing/data_stub.rb +2 -2
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +3 -0
- data/spec/data_subject_spec.rb +475 -11
- data/spec/data_subjects/csv_file_spec.rb +69 -0
- data/spec/data_subjects/data_frame_spec.rb +52 -0
- data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
- data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
- data/spec/data_subjects/none_spec.rb +41 -0
- data/spec/data_subjects/postgres_spec.rb +80 -0
- data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
- data/spec/data_subjects/salesforce_spec.rb +117 -0
- data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
- data/spec/data_subjects/sub_job_spec.rb +33 -0
- data/spec/encoder_spec.rb +38 -0
- data/spec/extractor_spec.rb +11 -0
- data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
- data/spec/job/transform_spec.rb +257 -0
- data/spec/job_spec.rb +507 -0
- data/spec/loader_spec.rb +11 -0
- data/spec/parser_spec.rb +38 -0
- data/spec/sf_bulk_helper_spec.rb +117 -0
- data/spec/testing/data_stub_spec.rb +5 -3
- metadata +109 -27
- data/features/aggregate.feature +0 -42
- data/jobs/aggregate_job.rb +0 -31
- data/jobs/transforms/transform_jobs.rb +0 -4
- data/lib/remi/data_subject/csv_file.rb +0 -162
- data/lib/remi/data_subject/data_frame.rb +0 -52
- data/lib/remi/data_subject/postgres.rb +0 -134
- data/lib/remi/data_subject/salesforce.rb +0 -136
- data/lib/remi/data_subject/sftp_file.rb +0 -65
- data/lib/remi/extractor/file_system.rb +0 -92
- data/lib/remi/extractor/local_file.rb +0 -43
- data/lib/remi/extractor/s3_file.rb +0 -57
- data/lib/remi/extractor/sftp_file.rb +0 -83
- data/spec/data_subject/csv_file_spec.rb +0 -79
- data/spec/data_subject/data_frame.rb +0 -27
@@ -0,0 +1,31 @@
|
|
1
|
+
module Remi
|
2
|
+
|
3
|
+
# The None extractor doesn't do anything.
|
4
|
+
class Extractor::None < Extractor
|
5
|
+
def extract
|
6
|
+
nil
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
# The None Parser just returns what it is given.
|
12
|
+
class Parser::None < Parser
|
13
|
+
def parse(data)
|
14
|
+
data
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# The None Encoder just returns what it is given.
|
19
|
+
class Encoder::None < Encoder
|
20
|
+
def encode(data_frame)
|
21
|
+
data_frame
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# The None loader doesn't do anything.
|
26
|
+
class Loader::None < Loader
|
27
|
+
def load(data)
|
28
|
+
true
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module Remi
|
2
|
+
# Contains methods shared between Postgres Extractor/Parser/Encoder/Loader
|
3
|
+
module DataSubject::Postgres
|
4
|
+
|
5
|
+
# @return [PG::Connection] An authenticated postgres connection
|
6
|
+
def connection
|
7
|
+
@connection ||= PG.connect(
|
8
|
+
host: @credentials[:host] || 'localhost',
|
9
|
+
port: @credentials[:port] || 5432,
|
10
|
+
dbname: @credentials[:dbname],
|
11
|
+
user: @credentials[:user] || `whoami`.chomp,
|
12
|
+
password: @credentials[:password],
|
13
|
+
sslmode: @credentials[:sslmode] || 'allow'
|
14
|
+
)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Postgres extractor
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# class MyJob < Remi::Job
|
22
|
+
# source :some_table do
|
23
|
+
# extractor Remi::Extractor::Postgres.new(
|
24
|
+
# credentials: {
|
25
|
+
# dbname: 'my_local_db'
|
26
|
+
# },
|
27
|
+
# query: 'SELECT * FROM job_specs'
|
28
|
+
# )
|
29
|
+
# parser Remi::Parser::Postgres.new
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# job = MyJob.new
|
34
|
+
# job.some_table.df[:id, :name]
|
35
|
+
# # =>#<Daru::DataFrame:70153144824760 @name = 53c8e878-55e7-4859-bc34-ec29309c11fd @size = 3>
|
36
|
+
# # id name
|
37
|
+
# # 0 24 albert
|
38
|
+
# # 1 26 betsy
|
39
|
+
# # 2 25 camu
|
40
|
+
|
41
|
+
class Extractor::Postgres < Extractor
|
42
|
+
include DataSubject::Postgres
|
43
|
+
|
44
|
+
# @param credentials [Hash] Used to authenticate with the postgres db
|
45
|
+
# @option credentials [String] :host Postgres host (default: localhost)
|
46
|
+
# @option credentials [Integer] :port Postgres host (default: 5432)
|
47
|
+
# @option credentials [String] :dbname Database name
|
48
|
+
# @option credentials [String] :user Postgres login username (default: `whoami`)
|
49
|
+
# @option credentials [String] :password Postgres login password
|
50
|
+
# @option credentials [String] :sslmode Postgres SSL mode (default: allow)
|
51
|
+
# @param query [String] Query to use to extract data
|
52
|
+
def initialize(*args, **kargs, &block)
|
53
|
+
super
|
54
|
+
init_postgres_extractor(*args, **kargs, &block)
|
55
|
+
end
|
56
|
+
|
57
|
+
# @return [Object] Data extracted from Postgres system
|
58
|
+
attr_reader :data
|
59
|
+
|
60
|
+
# @return [Object] self after querying Postgres data
|
61
|
+
def extract
|
62
|
+
logger.info "Executing Postgres query #{@query}"
|
63
|
+
@data = execute_query
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
def execute_query
|
70
|
+
connection.exec @query
|
71
|
+
end
|
72
|
+
|
73
|
+
def init_postgres_extractor(*args, credentials:, query:, **kargs, &block)
|
74
|
+
@credentials = credentials
|
75
|
+
@query = query
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# Postgres parser
|
81
|
+
# Used to parse results from a postgres extractor (see Extractor::Postgres).
|
82
|
+
class Parser::Postgres < Parser
|
83
|
+
|
84
|
+
# @param postgres_extract [Extractor::Postgres] An object containing data extracted from Postgres
|
85
|
+
# @return [Remi::DataFrame] The data converted into a dataframe
|
86
|
+
def parse(postgres_extract)
|
87
|
+
# Performance for larger sets could be improved by using bulk query (via COPY)
|
88
|
+
logger.info "Converting Postgres query to a dataframe"
|
89
|
+
|
90
|
+
hash_array = {}
|
91
|
+
postgres_extract.data.each do |row|
|
92
|
+
row.each do |field, value|
|
93
|
+
(hash_array[field_symbolizer.call(field)] ||= []) << value
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# After converting to DF, clear the PG results to save memory.
|
98
|
+
postgres_extract.data.clear
|
99
|
+
|
100
|
+
Remi::DataFrame.create(:daru, hash_array, order: hash_array.keys)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# Postgres encoder
|
106
|
+
class Encoder::Postgres < Encoder
|
107
|
+
|
108
|
+
# @return [Array<String>] All records of the dataframe encoded as strings to be used by Postgres Bulk updater
|
109
|
+
attr_accessor :values
|
110
|
+
|
111
|
+
# Converts the dataframe to an array of hashes, which can be used
|
112
|
+
# by the postgres loader.
|
113
|
+
#
|
114
|
+
# @param dataframe [Remi::DataFrame] The dataframe to be encoded
|
115
|
+
# @return [Object] The encoded data to be loaded into the target
|
116
|
+
def encode(dataframe)
|
117
|
+
@values = encode_data(dataframe)
|
118
|
+
self
|
119
|
+
end
|
120
|
+
|
121
|
+
# @return [String] Field definitions to be used in the DDL
|
122
|
+
def ddl_fields
|
123
|
+
fields.map { |k,v| "#{k} #{v[:type]}" }.join(', ')
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def encode_data(dataframe)
|
129
|
+
dataframe.map(:row) do |row|
|
130
|
+
fields.keys.map do |field|
|
131
|
+
field = row[field]
|
132
|
+
case
|
133
|
+
when field.respond_to?(:strftime)
|
134
|
+
field.strftime('%Y-%m-%d %H:%M:%S')
|
135
|
+
when field.respond_to?(:map)
|
136
|
+
field.to_json.gsub("\t", '\t')
|
137
|
+
when field.blank? && !field.nil?
|
138
|
+
''
|
139
|
+
when field.nil?
|
140
|
+
'\N'
|
141
|
+
else
|
142
|
+
field.to_s.gsub(/[\t\n\r]/, "\t" => '\t', "\n" => '\n', "\r" => '\r')
|
143
|
+
end
|
144
|
+
end.join("\t")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
|
150
|
+
# Postgres Loader
|
151
|
+
# VERY PRELIMINARY IMPLEMENTAtION - ONLY LOADS TO TEMP TABLES
|
152
|
+
# IT IS THEN UP TO THE USER TO DO ELT TO LOAD THE FINAL TABLE
|
153
|
+
class Loader::Postgres < Loader
|
154
|
+
include DataSubject::Postgres
|
155
|
+
|
156
|
+
def initialize(*args, **kargs, &block)
|
157
|
+
super
|
158
|
+
init_postgres_loader(*args, **kargs, &block)
|
159
|
+
end
|
160
|
+
|
161
|
+
# @param data [Encoder::Postgres] Data that has been encoded appropriately to be loaded into the target
|
162
|
+
# @return [true] On success
|
163
|
+
def load(data)
|
164
|
+
logger.info "Performing postgres load to table #{@table_name}"
|
165
|
+
create_table_sql = "CREATE TEMPORARY TABLE #{@table_name} (#{data.ddl_fields})"
|
166
|
+
logger.info create_table_sql
|
167
|
+
connection.exec create_table_sql
|
168
|
+
|
169
|
+
connection.copy_data "COPY #{@table_name} (#{data.fields.keys.join(', ')}) FROM STDIN" do
|
170
|
+
data.values.each do |row|
|
171
|
+
connection.put_copy_data "#{row}\n"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
true
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
private
|
180
|
+
|
181
|
+
def init_postgres_loader(*args, credentials:, table_name:, **kargs, &block)
|
182
|
+
@credentials = credentials
|
183
|
+
@table_name = table_name
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Remi
|
2
|
+
|
3
|
+
# S3 File extractor
|
4
|
+
# Used to extract files from Amazon S3
|
5
|
+
#
|
6
|
+
# @example
|
7
|
+
#
|
8
|
+
# class MyJob < Remi::Job
|
9
|
+
# source :some_file do
|
10
|
+
# extractor Remi::Extractor::S3File.new(
|
11
|
+
# bucket: 'my-awesome-bucket',
|
12
|
+
# remote_path: 'some_file-',
|
13
|
+
# most_recent_only: true
|
14
|
+
# )
|
15
|
+
# parser Remi::Parser::CsvFile.new(
|
16
|
+
# csv_options: {
|
17
|
+
# headers: true,
|
18
|
+
# col_sep: '|'
|
19
|
+
# }
|
20
|
+
# )
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# job = MyJob.new
|
25
|
+
# job.some_file.df
|
26
|
+
# # =>#<Daru::DataFrame:70153153438500 @name = 4c59cfdd-7de7-4264-8666-83153f46a9e4 @size = 3>
|
27
|
+
# # id name
|
28
|
+
# # 0 1 Albert
|
29
|
+
# # 1 2 Betsy
|
30
|
+
# # 2 3 Camu
|
31
|
+
class Extractor::S3File < Extractor::FileSystem
|
32
|
+
|
33
|
+
# @param bucket_name [String] S3 bucket containing the files
|
34
|
+
def initialize(*args, **kargs, &block)
|
35
|
+
super
|
36
|
+
init_s3_file(*args, **kargs, &block)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Called to extract files from the source filesystem.
|
40
|
+
# @return [Array<String>] An array of paths to a local copy of the files extacted
|
41
|
+
def extract
|
42
|
+
entries.map do |entry|
|
43
|
+
local_file = File.join(@local_path, entry.name)
|
44
|
+
logger.info "Downloading #{entry.pathname} from S3 to #{local_file}"
|
45
|
+
File.open(local_file, 'wb') { |file| entry.raw.get(response_target: file) }
|
46
|
+
local_file
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [Array<Extractor::FileSystemEntry>] (Memoized) list of objects in the bucket/prefix
|
51
|
+
def all_entries
|
52
|
+
@all_entries ||= all_entries!
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return [Array<Extractor::FileSystemEntry>] List of objects in the bucket/prefix
|
56
|
+
def all_entries!
|
57
|
+
# S3 does not track anything like a create time, so use last modified for both
|
58
|
+
bucket.objects(prefix: @remote_path.to_s).map do |entry|
|
59
|
+
Extractor::FileSystemEntry.new(
|
60
|
+
pathname: entry.key,
|
61
|
+
create_time: entry.last_modified,
|
62
|
+
modified_time: entry.last_modified,
|
63
|
+
raw: entry
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Aws::S3::Client] The S3 client used
|
69
|
+
def s3_client
|
70
|
+
@s3_client ||= Aws::S3::Client.new
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def init_s3_file(*args, bucket:, **kargs)
|
76
|
+
@bucket_name = bucket
|
77
|
+
end
|
78
|
+
|
79
|
+
def bucket
|
80
|
+
@bucket ||= Aws::S3::Bucket.new(@bucket_name, client: s3_client)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'restforce'
|
2
|
+
require 'salesforce_bulk_api'
|
3
|
+
require 'remi/sf_bulk_helper'
|
4
|
+
|
5
|
+
module Remi
|
6
|
+
|
7
|
+
# Contains methods shared between Salesforce Extractor/Parser/Encoder/Loader
|
8
|
+
module DataSubject::Salesforce
|
9
|
+
|
10
|
+
# @return [Restforce] An authenticated restforce client
|
11
|
+
def restforce_client
|
12
|
+
@restforce_client ||= begin
|
13
|
+
client = Restforce.new(@credentials)
|
14
|
+
|
15
|
+
#run a dummy query to initiate a connection. Workaround for Bulk API problem
|
16
|
+
# https://github.com/yatish27/salesforce_bulk_api/issues/33
|
17
|
+
client.query('SELECT Id FROM Contact LIMIT 1')
|
18
|
+
client
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# Salesforce extractor
|
25
|
+
#
|
26
|
+
# @example
|
27
|
+
#
|
28
|
+
# class MyJob < Remi::Job
|
29
|
+
# source :contacts do
|
30
|
+
# extractor Remi::Extractor::Salesforce.new(
|
31
|
+
# credentials: { },
|
32
|
+
# object: :Contact,
|
33
|
+
# api: :bulk,
|
34
|
+
# query: 'SELECT Id, Student_ID__c, Name FROM Contact LIMIT 1000'
|
35
|
+
# )
|
36
|
+
# parser Remi::Parser::Salesforce.new
|
37
|
+
# end
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# job = MyJob.new
|
41
|
+
# job.contacts.df
|
42
|
+
# # #<Daru::DataFrame:70134211545860 @name = 7cddb460-6bfc-4737-a72c-60ed2c1a97d5 @size = 1>
|
43
|
+
# # Id Student_ID Name
|
44
|
+
# # 0 0031600002 test1111 Run Logan
|
45
|
+
class Extractor::Salesforce < Extractor
|
46
|
+
include Remi::DataSubject::Salesforce
|
47
|
+
|
48
|
+
class ExtractError < StandardError; end
|
49
|
+
|
50
|
+
# @param credentials [Hash] Used to authenticate with salesforce
|
51
|
+
# @option credentials [String] :host Salesforce host (e.g., login.salesforce.com)
|
52
|
+
# @option credentials [String] :client_id Salesforce Rest client id
|
53
|
+
# @option credentials [String] :client_secret Salesforce Rest client secret
|
54
|
+
# @option credentials [String] :instance_url Salesforce instance URL (e.g., https://na1.salesforce.com)
|
55
|
+
# @option credentials [String] :username Salesforce username
|
56
|
+
# @option credentials [String] :password Salesforce password
|
57
|
+
# @option credentials [String] :security_token Salesforce security token
|
58
|
+
# @param object [Symbol] Salesforce object to extract
|
59
|
+
# @param query [String] The SOQL query to execute to extract data
|
60
|
+
# @param api [Symbol] Salesforce API to use (only option supported is `:bulk`)
|
61
|
+
def initialize(*args, **kargs, &block)
|
62
|
+
super
|
63
|
+
init_salesforce_extractor(*args, **kargs, &block)
|
64
|
+
end
|
65
|
+
|
66
|
+
attr_reader :data
|
67
|
+
|
68
|
+
# @return [Object] self after querying salesforce data
|
69
|
+
def extract
|
70
|
+
logger.info "Executing salesforce query #{@query}"
|
71
|
+
@data = sf_bulk.query(@sfo, @query, 10000)
|
72
|
+
check_for_errors(@data)
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
# @return [SalesforceBulkApi::Api] The bulk API salesforce client
|
77
|
+
def sf_bulk
|
78
|
+
SalesforceBulkApi::Api.new(restforce_client).tap { |o| o.connection.set_status_throttle(5) }
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def init_salesforce_extractor(*args, object:, query:, credentials:, api: :bulk, **kargs, &block)
|
84
|
+
@sfo = object
|
85
|
+
@query = query
|
86
|
+
@credentials = credentials
|
87
|
+
@api = api
|
88
|
+
end
|
89
|
+
|
90
|
+
def check_for_errors(sf_result)
|
91
|
+
sf_result['batches'].each do |batch|
|
92
|
+
raise ExtractError, "Error with batch #{batch['id']} - #{batch['state']}: #{batch['stateMessage']}" unless batch['state'].first == 'Completed'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Salesforce parser
|
98
|
+
class Parser::Salesforce < Parser
|
99
|
+
|
100
|
+
# @param sf_extract [Extractor::Salesforce] An object containing data extracted from salesforce
|
101
|
+
# @return [Remi::DataFrame] The data converted into a dataframe
|
102
|
+
def parse(sf_extract)
|
103
|
+
logger.info 'Parsing salesforce query results'
|
104
|
+
|
105
|
+
hash_array = {}
|
106
|
+
sf_extract.data['batches'].each do |batch|
|
107
|
+
next unless batch['response']
|
108
|
+
|
109
|
+
batch['response'].each do |record|
|
110
|
+
record.each do |field, value|
|
111
|
+
next if ['xsi:type','type'].include? field
|
112
|
+
(hash_array[field.to_sym] ||= []) << case value.first
|
113
|
+
when Hash
|
114
|
+
value.first["xsi:nil"] == "true" ? nil : value.first
|
115
|
+
else
|
116
|
+
value.first
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# delete raw result at end of processing to free memory
|
122
|
+
batch['response'] = nil
|
123
|
+
end
|
124
|
+
|
125
|
+
Remi::DataFrame.create(:daru, hash_array, order: hash_array.keys)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Salesforce encoder
|
130
|
+
class Encoder::Salesforce < Encoder
|
131
|
+
# Converts the dataframe to an array of hashes, which can be used
|
132
|
+
# by the salesforce bulk api.
|
133
|
+
#
|
134
|
+
# @param dataframe [Remi::DataFrame] The dataframe to be encoded
|
135
|
+
# @return [Object] The encoded data to be loaded into the target
|
136
|
+
def encode(dataframe)
|
137
|
+
dataframe.to_a[0]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Salesforce loader
|
142
|
+
#
|
143
|
+
# @example
|
144
|
+
# class MyJob < Remi::Job
|
145
|
+
# target :contacts do
|
146
|
+
# encoder Remi::Encoder::Salesforce.new
|
147
|
+
# loader Remi::Loader::Salesforce.new(
|
148
|
+
# credentials: { },
|
149
|
+
# object: :Contact,
|
150
|
+
# api: :bulk,
|
151
|
+
# operation: :update
|
152
|
+
# )
|
153
|
+
# end
|
154
|
+
# end
|
155
|
+
#
|
156
|
+
# job = MyJob.new
|
157
|
+
# job.contacts.df = Daru::DataFrame.new({ :Id => ['0031600002Pm7'], :Student_ID__c => ['test1111']})
|
158
|
+
# job.contacts.load
|
159
|
+
class Loader::Salesforce < Loader
|
160
|
+
include Remi::DataSubject::Salesforce
|
161
|
+
|
162
|
+
# @param credentials [Hash] Used to authenticate with salesforce
|
163
|
+
# @option credentials [String] :host Salesforce host (e.g., login.salesforce.com)
|
164
|
+
# @option credentials [String] :client_id Salesforce Rest client id
|
165
|
+
# @option credentials [String] :client_secret Salesforce Rest client secret
|
166
|
+
# @option credentials [String] :instance_url Salesforce instance URL (e.g., https://na1.salesforce.com)
|
167
|
+
# @option credentials [String] :username Salesforce username
|
168
|
+
# @option credentials [String] :password Salesforce password
|
169
|
+
# @option credentials [String] :security_token Salesforce security token
|
170
|
+
# @param object [Symbol] Salesforce object to extract
|
171
|
+
# @param operation [Symbol] Salesforce operation to perform (`:update`, `:create`, `:upsert`)
|
172
|
+
# @param batch_size [Integer] Size of batch to use for updates (1-10000)
|
173
|
+
# @param external_id [Symbol, String] Field to use as an external id for upsert operations
|
174
|
+
# @param api [Symbol] Salesforce API to use (only option supported is `:bulk`)
|
175
|
+
def initialize(*args, **kargs, &block)
|
176
|
+
super
|
177
|
+
init_salesforce_loader(*args, **kargs, &block)
|
178
|
+
end
|
179
|
+
|
180
|
+
# @param data [Encoder::Salesforce] Data that has been encoded appropriately to be loaded into the target
|
181
|
+
# @return [true] On success
|
182
|
+
def load(data)
|
183
|
+
logger.info "Performing Salesforce #{@operation} on object #{@sfo}"
|
184
|
+
|
185
|
+
if @operation == :update
|
186
|
+
Remi::SfBulkHelper::SfBulkUpdate.update(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
187
|
+
elsif @operation == :create
|
188
|
+
Remi::SfBulkHelper::SfBulkCreate.create(restforce_client, @sfo, data, batch_size: @batch_size, logger: logger)
|
189
|
+
elsif @operation == :upsert
|
190
|
+
Remi::SfBulkHelper::SfBulkUpsert.upsert(restforce_client, @sfo, data, batch_size: @batch_size, external_id: @external_id, logger: logger)
|
191
|
+
else
|
192
|
+
raise ArgumentError, "Unknown operation: #{@operation}"
|
193
|
+
end
|
194
|
+
|
195
|
+
true
|
196
|
+
end
|
197
|
+
|
198
|
+
private
|
199
|
+
|
200
|
+
def init_salesforce_loader(*args, object:, operation:, credentials:, batch_size: 5000, external_id: 'Id', api: :bulk, **kargs, &block)
|
201
|
+
@sfo = object
|
202
|
+
@operation = operation
|
203
|
+
@batch_size = batch_size
|
204
|
+
@external_id = external_id
|
205
|
+
@credentials = credentials
|
206
|
+
@api = api
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
210
|
+
|
211
|
+
end
|