remi 0.0.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.gitignore +3 -2
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +123 -0
- data/LICENSE.txt +21 -0
- data/README.md +94 -3
- data/bin/remi +8 -0
- data/doc/install-rbenv-os_x.md +47 -0
- data/lib/remi.rb +56 -9
- data/lib/remi/cli.rb +56 -0
- data/lib/remi/core/daru.rb +28 -0
- data/lib/remi/core/refinements.rb +21 -0
- data/lib/remi/core/string.rb +8 -0
- data/lib/remi/cucumber.rb +7 -0
- data/lib/remi/cucumber/business_rules.rb +504 -0
- data/lib/remi/cucumber/data_source.rb +63 -0
- data/lib/remi/data_source.rb +13 -0
- data/lib/remi/data_source/csv_file.rb +79 -0
- data/lib/remi/data_source/data_frame.rb +10 -0
- data/lib/remi/data_source/postgres.rb +58 -0
- data/lib/remi/data_source/salesforce.rb +78 -0
- data/lib/remi/data_subject.rb +25 -0
- data/lib/remi/data_target.rb +15 -0
- data/lib/remi/data_target/csv_file.rb +49 -0
- data/lib/remi/data_target/data_frame.rb +14 -0
- data/lib/remi/data_target/salesforce.rb +49 -0
- data/lib/remi/extractor/sftp_file.rb +84 -0
- data/lib/remi/field_symbolizers.rb +17 -0
- data/lib/remi/job.rb +200 -0
- data/lib/remi/lookup/regex_sieve.rb +55 -0
- data/lib/remi/project/features/examples.feature +24 -0
- data/lib/remi/project/features/formulas.feature +64 -0
- data/lib/remi/project/features/sample_job.feature +304 -0
- data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
- data/lib/remi/project/features/support/env.rb +10 -0
- data/lib/remi/project/features/support/env_app.rb +3 -0
- data/lib/remi/project/features/transforms/date_diff.feature +50 -0
- data/lib/remi/project/features/transforms/parse_date.feature +34 -0
- data/lib/remi/project/features/transforms/prefix.feature +15 -0
- data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
- data/lib/remi/project/jobs/copy_source_job.rb +12 -0
- data/lib/remi/project/jobs/sample_job.rb +164 -0
- data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
- data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
- data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
- data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
- data/lib/remi/settings.rb +39 -0
- data/lib/remi/sf_bulk_helper.rb +265 -0
- data/lib/remi/source_to_target_map.rb +93 -0
- data/lib/remi/transform.rb +137 -0
- data/lib/remi/version.rb +3 -0
- data/remi.gemspec +25 -7
- data/workbooks/sample_workbook.ipynb +56 -0
- data/workbooks/workbook_helper.rb +1 -0
- metadata +234 -17
- data/lib/noodling.rb +0 -163
- data/test/test_NAME.rb +0 -19
@@ -0,0 +1,50 @@
|
|
1
|
+
Feature: Tests the date_diff transform
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'DateDiff'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
And the source 'Source Data'
|
9
|
+
And the target 'Target Data'
|
10
|
+
And the following example record for 'Source Data':
|
11
|
+
| Date1 | Date2 |
|
12
|
+
| 2015-12-31 | 2016-01-02 |
|
13
|
+
|
14
|
+
Scenario Outline: Calculating date difference in days2.
|
15
|
+
Given the job parameter 'measure' is "days"
|
16
|
+
And the source field 'Date1' has the value "<Date1>"
|
17
|
+
And the source field 'Date2' has the value "<Date2>"
|
18
|
+
Then the target field 'Difference' is set to the value "<Difference>"
|
19
|
+
Examples:
|
20
|
+
| Date1 | Date2 | Difference |
|
21
|
+
| 2015-12-31 | 2016-01-02 | 2 |
|
22
|
+
| 2014-12-31 | 2015-12-31 | 365 |
|
23
|
+
| 2016-01-02 | 2015-12-31 | -2 |
|
24
|
+
| 2015-02-28 | 2015-03-01 | 1 |
|
25
|
+
| 2016-02-28 | 2016-03-01 | 2 | # leap day
|
26
|
+
|
27
|
+
|
28
|
+
Scenario Outline: Calculating date difference in months.
|
29
|
+
Given the job parameter 'measure' is "months"
|
30
|
+
And the source field 'Date1' has the value "<Date1>"
|
31
|
+
And the source field 'Date2' has the value "<Date2>"
|
32
|
+
Then the target field 'Difference' is set to the value "<Difference>"
|
33
|
+
Examples:
|
34
|
+
| Date1 | Date2 | Difference |
|
35
|
+
| 2015-12-31 | 2016-01-02 | 1 |
|
36
|
+
| 2015-12-31 | 2016-02-02 | 2 |
|
37
|
+
| 2015-12-31 | 2017-02-02 | 14 |
|
38
|
+
| 2016-02-02 | 2015-12-31 | -2 |
|
39
|
+
|
40
|
+
Scenario Outline: Calculating date difference in years.
|
41
|
+
Given the job parameter 'measure' is "years"
|
42
|
+
And the source field 'Date1' has the value "<Date1>"
|
43
|
+
And the source field 'Date2' has the value "<Date2>"
|
44
|
+
Then the target field 'Difference' is set to the value "<Difference>"
|
45
|
+
Examples:
|
46
|
+
| Date1 | Date2 | Difference |
|
47
|
+
| 2015-12-31 | 2016-01-02 | 1 |
|
48
|
+
| 2015-01-01 | 2015-12-31 | 0 |
|
49
|
+
| 2015-12-31 | 2017-02-02 | 2 |
|
50
|
+
| 2016-02-02 | 2015-12-31 | -1 |
|
@@ -0,0 +1,34 @@
|
|
1
|
+
Feature: Tests the parse_date transform
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'ParseDate'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
And the source 'Source Data'
|
9
|
+
And the target 'Target Data'
|
10
|
+
And the following example record for 'Source Data':
|
11
|
+
| Date String |
|
12
|
+
| 2015-12-31 |
|
13
|
+
|
14
|
+
Scenario Outline: Parsing date strings.
|
15
|
+
Given the source field 'Date String' has the value "<Date String>"
|
16
|
+
And the job parameter 'format' is "<Format>"
|
17
|
+
Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
|
18
|
+
Examples:
|
19
|
+
| Date String | Format | Parsed Date |
|
20
|
+
| 2015-10-21 | %Y-%m-%d | 2015-10-21 |
|
21
|
+
| 10/21/2015 | %m/%d/%Y | 2015-10-21 |
|
22
|
+
| 20151021 | %Y%m%d | 2015-10-21 |
|
23
|
+
| | %m/%d/%Y | |
|
24
|
+
|
25
|
+
Scenario Outline: Parsing date strings for missing values.
|
26
|
+
Given the source field 'Date String' has the value ""
|
27
|
+
And the job parameter 'if_blank' is "<If Blank>"
|
28
|
+
|
29
|
+
Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
|
30
|
+
Examples:
|
31
|
+
| If Blank | Parsed Date |
|
32
|
+
| low | 1900-01-01 |
|
33
|
+
| high | 2999-12-31 |
|
34
|
+
| | |
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Feature: Test the prefix transformer.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Prefix'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
|
9
|
+
Scenario: Prefixing a field.
|
10
|
+
Given the source 'Source Data'
|
11
|
+
And the target 'Target Data'
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| Field |
|
14
|
+
| something |
|
15
|
+
Then the target field 'Field' is set to the value "prefixsomething"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# This file was auto-generated by Remi but is expected to be overwritten.
|
2
|
+
# Put anything in here that should be shared by all jobs.
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'remi'
|
6
|
+
|
7
|
+
module AllJobsShared
|
8
|
+
include Remi::Job
|
9
|
+
|
10
|
+
define_param :sftp, {
|
11
|
+
host: 'example.com',
|
12
|
+
username: 'user',
|
13
|
+
password: '1234567890'
|
14
|
+
}
|
15
|
+
|
16
|
+
define_param :salesforce_credentials, {
|
17
|
+
host: 'login.salesforce.com',
|
18
|
+
username: 'user@example.com',
|
19
|
+
password: 'password',
|
20
|
+
security_token: '4342jn3j4n32n4',
|
21
|
+
client_id: 'dkfjsdkfjoasdjdf',
|
22
|
+
client_secret: '28357245723475',
|
23
|
+
instance_url: 'https://na1.salesforce.com'
|
24
|
+
}
|
25
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require_relative 'all_jobs_shared'
|
2
|
+
|
3
|
+
class CopySourceJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
7
|
+
define_source :target_data, Remi::DataSource::DataFrame
|
8
|
+
|
9
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
10
|
+
target_data.df = source_data.df.monkey_dup
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# This is an example Remi job that was auto-generated by Remi.
|
2
|
+
|
3
|
+
require_relative 'all_jobs_shared'
|
4
|
+
|
5
|
+
class SampleJob
|
6
|
+
include AllJobsShared
|
7
|
+
|
8
|
+
define_source :existing_contacts, Remi::DataSource::Salesforce,
|
9
|
+
object: :Contact,
|
10
|
+
credentials: params[:salesforce_credentials],
|
11
|
+
api: :bulk,
|
12
|
+
fields: {
|
13
|
+
:Id => {},
|
14
|
+
:External_ID__c => {}
|
15
|
+
},
|
16
|
+
query: <<-EOQ
|
17
|
+
SELECT
|
18
|
+
Id,
|
19
|
+
External_ID__c
|
20
|
+
FROM
|
21
|
+
Contact
|
22
|
+
EOQ
|
23
|
+
|
24
|
+
|
25
|
+
define_source :sample_file, Remi::DataSource::CsvFile,
|
26
|
+
extractor: Remi::Extractor::SftpFile.new(
|
27
|
+
credentials: params[:sftp],
|
28
|
+
remote_file: /^SampleFile_(\d+)\.txt/,
|
29
|
+
remote_folder: '/',
|
30
|
+
most_recent_only: true
|
31
|
+
),
|
32
|
+
csv_options: {
|
33
|
+
headers: true,
|
34
|
+
col_sep: ",",
|
35
|
+
encoding: "ISO-8859-1:UTF-8"
|
36
|
+
},
|
37
|
+
fields: {
|
38
|
+
:student_id => {},
|
39
|
+
:school_id => {},
|
40
|
+
:school_name => {},
|
41
|
+
:program => {},
|
42
|
+
:last_name => {},
|
43
|
+
:first_name => {},
|
44
|
+
:current_email => {},
|
45
|
+
:mailing_address_line_1 => {},
|
46
|
+
:mailing_address_line_2 => {},
|
47
|
+
:mailing_city => {},
|
48
|
+
:mailing_state => {},
|
49
|
+
:mailing_postal_code => {},
|
50
|
+
:birthdate => { type: :date, format: '%m/%d/%Y'},
|
51
|
+
:applied_date => { type: :date, format: '%m/%d/%Y'}
|
52
|
+
}
|
53
|
+
|
54
|
+
define_target :all_contacts, Remi::DataTarget::DataFrame
|
55
|
+
|
56
|
+
define_target :contact_updates, Remi::DataTarget::Salesforce,
|
57
|
+
credentials: params[:salesforce_credentials],
|
58
|
+
object: :Contact,
|
59
|
+
operation: :update,
|
60
|
+
api: :bulk
|
61
|
+
|
62
|
+
define_target :contact_creates, Remi::DataTarget::Salesforce,
|
63
|
+
credentials: params[:salesforce_credentials],
|
64
|
+
object: :Contact,
|
65
|
+
operation: :create,
|
66
|
+
api: :bulk
|
67
|
+
|
68
|
+
define_lookup :program_name_lookup, Remi::Lookup::RegexSieve, {
|
69
|
+
/^BIO$/ => "Biology",
|
70
|
+
/^Fake Biology$/ => nil,
|
71
|
+
/(?:B|Microb)iology/ => "Biology",
|
72
|
+
/^CHEM$/ => "Chemistry",
|
73
|
+
/Chemistry/ => "Chemistry",
|
74
|
+
/Physics/ => "Physics"
|
75
|
+
}
|
76
|
+
|
77
|
+
define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
|
78
|
+
|
79
|
+
# Exclude all source records with an invalid program name
|
80
|
+
all_contacts.df = sample_file.df.monkey_dup
|
81
|
+
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
82
|
+
map source(:program) .target(:Major__c)
|
83
|
+
.transform(Remi::Transform[:lookup][program_name_lookup])
|
84
|
+
end
|
85
|
+
all_contacts.df = all_contacts.df.where(all_contacts.df[:Major__c].not_eq(nil))
|
86
|
+
|
87
|
+
student_id_to_sf_id = existing_contacts.df.map_rows { |row| [row[:External_ID__c], row[:Id]] }.to_h
|
88
|
+
|
89
|
+
# Map fields that are common to both creates and updates
|
90
|
+
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
91
|
+
|
92
|
+
# Prefixes source id record and then looks up existing salesforce Id
|
93
|
+
map source(:student_id) .target(:External_ID__c, :Id)
|
94
|
+
.transform(Remi::Transform[:prefix]['SAMP'])
|
95
|
+
.transform(->(v) { [v, Remi::Transform[:lookup][student_id_to_sf_id].call(v)] })
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
define_transform :map_creates, sources: :all_contacts, targets: :contact_creates do
|
101
|
+
|
102
|
+
work_contact_creates = all_contacts.df.where(all_contacts.df[:Id].eq(nil))
|
103
|
+
Remi::SourceToTargetMap.apply(work_contact_creates) do
|
104
|
+
|
105
|
+
map source(:school_id) .target(:School_ID__c)
|
106
|
+
map source(:school_name) .target(:School_Name__c)
|
107
|
+
map source(:first_name) .target(:FirstName)
|
108
|
+
.transform(Remi::Transform[:ifblank].('Not Provided'))
|
109
|
+
map source(:last_name) .target(:LastName)
|
110
|
+
.transform(Remi::Transform[:ifblank].('Not Provided'))
|
111
|
+
map source(:mailing_city) .target(:MailingCity)
|
112
|
+
map source(:mailing_state) .target(:MailingState)
|
113
|
+
map source(:mailing_postal_code) .target(:MailingPostalCode)
|
114
|
+
|
115
|
+
map source(:birthdate) .target(:Birthdate)
|
116
|
+
.transform(Remi::Transform[:format_date][from_fmt: sample_file.fields[:birthdate][:format]])
|
117
|
+
|
118
|
+
map source(:applied_date) .target(:Applied_Date__c)
|
119
|
+
.transform(Remi::Transform[:ifblank].(Date.today.strftime(sample_file.fields[:applied_date][:format])))
|
120
|
+
.transform(Remi::Transform[:format_date].(from_fmt: sample_file.fields[:applied_date][:format]))
|
121
|
+
|
122
|
+
map source(:mailing_address_line_1, :mailing_address_line_2) .target(:MailingStreet)
|
123
|
+
.transform(->(line_1, line_2) {
|
124
|
+
Remi::Transform[:ifblank].(nil).call(line_1).nil? ? [] : [line_1, line_2]
|
125
|
+
})
|
126
|
+
.transform(Remi::Transform[:concatenate].(', '))
|
127
|
+
|
128
|
+
map source(:school_id, :school_name) .target(:School__c)
|
129
|
+
.transform(->(id, name) {[
|
130
|
+
Remi::Transform[:ifblank]["Unknown"].call(id),
|
131
|
+
Remi::Transform[:ifblank]["Unknown"].call(name)
|
132
|
+
]})
|
133
|
+
.transform(Remi::Transform[:concatenate].('-'))
|
134
|
+
|
135
|
+
map source(:current_email) .target(:Email)
|
136
|
+
.transform(Remi::Transform[:replace].(/,/, '.'))
|
137
|
+
.transform(Remi::Transform[:validate_email].call)
|
138
|
+
end
|
139
|
+
|
140
|
+
contact_creates.df = work_contact_creates[
|
141
|
+
:External_ID__c,
|
142
|
+
:School_ID__c,
|
143
|
+
:School_Name__c,
|
144
|
+
:School__c,
|
145
|
+
:Major__c,
|
146
|
+
:FirstName,
|
147
|
+
:LastName,
|
148
|
+
:Email,
|
149
|
+
:MailingStreet,
|
150
|
+
:MailingCity,
|
151
|
+
:MailingState,
|
152
|
+
:MailingPostalCode,
|
153
|
+
:Birthdate,
|
154
|
+
:Applied_Date__c
|
155
|
+
]
|
156
|
+
end
|
157
|
+
|
158
|
+
define_transform :map_updates, sources: :all_contacts, targets: :contact_updates do
|
159
|
+
contact_updates.df = all_contacts.df[
|
160
|
+
:Id,
|
161
|
+
:Major__c
|
162
|
+
].where(all_contacts.df[:Id].not_eq(nil))
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class DateDiffJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :measure, :days
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
8
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
9
|
+
|
10
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
11
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
12
|
+
map source(:date1, :date2) .target(:difference)
|
13
|
+
.transform(->(d1,d2) { [Date.strptime(d1), Date.strptime(d2)] })
|
14
|
+
.transform(Remi::Transform[:date_diff].(params[:measure]))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class ParseDateJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :format, '%Y-%m-%d'
|
7
|
+
define_param :if_blank, nil
|
8
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
9
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
10
|
+
|
11
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
12
|
+
|
13
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
14
|
+
map source(:date_string) .target(:parsed_date)
|
15
|
+
.transform(Remi::Transform[:parse_date].(format: params[:format], if_blank: params[:if_blank]))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class PrefixJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :prefix, 'prefix'
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
8
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
9
|
+
|
10
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
11
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
12
|
+
map source(:field) .target(:field)
|
13
|
+
.transform(Remi::Transform[:prefix].(params[:prefix]))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Remi
|
2
|
+
module Settings
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def work_dir
|
6
|
+
@work_dir ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname('',nil))
|
7
|
+
end
|
8
|
+
|
9
|
+
def work_dir=(arg)
|
10
|
+
@work_dir = arg
|
11
|
+
end
|
12
|
+
|
13
|
+
def log_level
|
14
|
+
@log_level ||= Logger::INFO
|
15
|
+
end
|
16
|
+
|
17
|
+
def log_level=(arg)
|
18
|
+
@log_level = arg
|
19
|
+
end
|
20
|
+
|
21
|
+
def logger
|
22
|
+
return @logger.call if @logger.respond_to? :call
|
23
|
+
@logger ||= lambda do
|
24
|
+
l = Logger.new(STDOUT)
|
25
|
+
l.level = log_level
|
26
|
+
l.formatter = proc do |severity, datetime, progname, msg|
|
27
|
+
"#{msg}\n"
|
28
|
+
end
|
29
|
+
l
|
30
|
+
end
|
31
|
+
|
32
|
+
@logger.call
|
33
|
+
end
|
34
|
+
|
35
|
+
def logger=(arg)
|
36
|
+
@logger = arg
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
module Remi
|
2
|
+
module SfBulkHelper
|
3
|
+
|
4
|
+
class DupeLookupKeyError < StandardError; end
|
5
|
+
class MaxAttemptError < StandardError; end
|
6
|
+
|
7
|
+
# Public: Class used to execute SF Bulk operations. This class is not meant to be
|
8
|
+
# used directly. It is instead meant to be inherited by classes that perform the
|
9
|
+
# specific query, update, create, or upsert operations.
|
10
|
+
#
|
11
|
+
# Examples
|
12
|
+
#
|
13
|
+
# sf_query = SfBulkQuery.query(client, 'Contact', 'SELECT Id, Name FROM Contact')
|
14
|
+
# puts sf_query.result
|
15
|
+
#
|
16
|
+
# mydata = [ { 'Id' => '001G000000ncxb8IAA', 'Name' => 'Happy Feet' } ]
|
17
|
+
# sf_update = SfBulkUpdate.update(client, 'Contact', mydata)
|
18
|
+
class SfBulkOperation
|
19
|
+
|
20
|
+
# Public: Initializes a SfBulkOperation (does not execute operation).
|
21
|
+
#
|
22
|
+
# restforce_client - An instance of Restforce that is used to authenticate the connection.
|
23
|
+
# object - The name of the object to operate on (e.g., Contact, Task, etc).
|
24
|
+
# data - For query operations, this is the SOQL query string. For other
|
25
|
+
# operations, this is an array of hashes, where the hash keys are column names
|
26
|
+
# and the values are the data.
|
27
|
+
# batch_size - Batch size to use to download or upload data (default: 10000)
|
28
|
+
# max_mattempts - The maximum number of attempts to upload data (default: 2)
|
29
|
+
# logger - Logger to use (default: Logger.new(STDOUT)
|
30
|
+
def initialize(restforce_client, object, data, batch_size: 5000, max_attempts: 2, logger: Logger.new(STDOUT))
|
31
|
+
@restforce_client = restforce_client
|
32
|
+
@object = object
|
33
|
+
@data = data
|
34
|
+
@batch_size = batch_size
|
35
|
+
@max_attempts = max_attempts
|
36
|
+
@attempts = Hash.new(0)
|
37
|
+
@logger = logger
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: A symbol representing the operation to be performed (:query, :update, :create, :upsert)
|
41
|
+
def operation
|
42
|
+
:undefined
|
43
|
+
end
|
44
|
+
|
45
|
+
# Public: Returns the instance of SalesforceBulkApi::Api used for bulk operations.
|
46
|
+
def sf_bulk
|
47
|
+
@sf_bulk ||= SalesforceBulkApi::Api.new(@restforce_client).tap { |o| o.connection.set_status_throttle(5) }
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: Returns the raw result from the SalesforceBulkApi query
|
51
|
+
def raw_result
|
52
|
+
@raw_result || execute
|
53
|
+
end
|
54
|
+
|
55
|
+
# Public: Returns useful metadata about the batch query.
|
56
|
+
def info
|
57
|
+
execute if @attempts[:total] == 0
|
58
|
+
|
59
|
+
return @info if @info and @attempts[:info] == @attempts[:total]
|
60
|
+
@attempts[:info] += 1
|
61
|
+
|
62
|
+
@info = raw_result.reject { |k,v| k == 'batches' }.tap do |h|
|
63
|
+
h['query'] = @data if operation == :query
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Public: Collects the results from all of the batches and aggregates them
|
68
|
+
# into an array of hashes. Each element of the array represents a record in the
|
69
|
+
# result and the hash gives the column-value. Note that if multiple retries are
|
70
|
+
# needed, this is just the final result.
|
71
|
+
#
|
72
|
+
# Returns an array of hashes.
|
73
|
+
def result
|
74
|
+
execute if @attempts[:total] == 0
|
75
|
+
|
76
|
+
return @result if @result and @attempts[:result] == @attempts[:total]
|
77
|
+
@attempts[:result] += 1
|
78
|
+
|
79
|
+
@result = []
|
80
|
+
raw_result['batches'].each do |batch|
|
81
|
+
next unless batch['response']
|
82
|
+
|
83
|
+
batch['response'].each do |record|
|
84
|
+
@result << record.inject({}) { |h, (k,v)| h[k] = v.first unless ['xsi:type','type'].include? k; h }
|
85
|
+
end
|
86
|
+
|
87
|
+
# delete raw result at end of processing to free memory
|
88
|
+
batch['response'] = nil
|
89
|
+
end
|
90
|
+
|
91
|
+
@result
|
92
|
+
end
|
93
|
+
|
94
|
+
# Public: Converts the result into a hash that can be used to
|
95
|
+
# lookup the row for a given key (e.g., external id field).
|
96
|
+
#
|
97
|
+
# key - A string representing the name of the column to be used as the lookup key.
|
98
|
+
# duplicates - Indicates whether duplicate keys are allowed. If they are allowed,
|
99
|
+
# only the first row found will be retained. If duplicates are not allowed,
|
100
|
+
# an error is raised (default: false).
|
101
|
+
#
|
102
|
+
# Returns a hash.
|
103
|
+
def as_lookup(key:, duplicates: false)
|
104
|
+
execute if @attempts[:total] == 0
|
105
|
+
|
106
|
+
@as_lookup ||= {}
|
107
|
+
@attempts[:as_lookup] = Hash.new(0) if @attempts[:as_lookup] == 0
|
108
|
+
|
109
|
+
return @as_lookup[key] if @as_lookup[key] and @attempts[:as_lookup][key] == @attempts[:total]
|
110
|
+
@attempts[:as_lookup][key] += 1
|
111
|
+
|
112
|
+
@as_lookup[key] = result.inject({}) do |lkp,row|
|
113
|
+
raise DupeLookupKeyError, "Duplicate key: #{row[key]} found in result of query: #{@data}" if lkp.has_key?(row[key]) and not duplicates
|
114
|
+
lkp[row[key]] = row unless lkp.has_key?(row[key])
|
115
|
+
lkp
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
# Public: Returns true if any of the records failed to update.
|
121
|
+
def failed_records?
|
122
|
+
n_failed_records = result.reduce(0) do |count, row|
|
123
|
+
count += 1 if row['success'] != 'true'
|
124
|
+
count
|
125
|
+
end
|
126
|
+
|
127
|
+
n_failed_batches = raw_result['batches'].reduce(0) do |count, batch|
|
128
|
+
count += 1 if batch['state'].first != 'Completed'
|
129
|
+
count
|
130
|
+
end
|
131
|
+
|
132
|
+
n_failed_records > 0 || n_failed_batches > 0
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Private: Sends the operation to Salesforce using the bulk API.
|
139
|
+
def send_bulk_operation
|
140
|
+
raise "No SF bulk operation defined for #{operation}"
|
141
|
+
end
|
142
|
+
|
143
|
+
# Private: Executes the operation and retries if needed.
|
144
|
+
def execute
|
145
|
+
@attempts[:total] += 1
|
146
|
+
@logger.info "Executing Salesforce Bulk operation: #{operation}"
|
147
|
+
|
148
|
+
@raw_result = send_bulk_operation
|
149
|
+
@logger.info "Bulk operation response: "
|
150
|
+
JSON.pretty_generate(info).split("\n").each { |l| @logger.info l }
|
151
|
+
|
152
|
+
retry_failed if failed_records?
|
153
|
+
|
154
|
+
@logger.info JSON.pretty_generate(info)
|
155
|
+
@raw_result
|
156
|
+
end
|
157
|
+
|
158
|
+
# Private: Drops any data that has already been loaded to salesforce.
|
159
|
+
# Note that this doesn't work for created data since the initial data
|
160
|
+
# wont have a salesforce id. Sometimes batches can fail completely
|
161
|
+
# and won't give anything in the result set. Therefore, the only way
|
162
|
+
# to be able to drop data that's already been created would be to
|
163
|
+
# know how the data was split into batches and the gem we use does not
|
164
|
+
# make this simple. So for now, we live with the defect.
|
165
|
+
def drop_successfully_updated_data
|
166
|
+
lkp_result_by_id = as_lookup(key: 'id', duplicates: true)
|
167
|
+
@data.reject! do |row|
|
168
|
+
sf_bulk_result = lkp_result_by_id[row['Id'] || row[:Id]]
|
169
|
+
sf_bulk_result && (sf_bulk_result['success'] == 'true')
|
170
|
+
end
|
171
|
+
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
# Private: Selects data needed to be retried and re-executes the operation.
|
177
|
+
def retry_failed
|
178
|
+
raise MaxAttemptError if @attempts[:total] >= @max_attempts
|
179
|
+
@logger.warn "Retrying #{operation} - #{@attempts[:total]} of #{@max_attempts}"
|
180
|
+
|
181
|
+
drop_successfully_updated_data
|
182
|
+
|
183
|
+
execute
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# Public: Class used to execute SF Bulk Update operations (see SfBulkOperation class for
|
189
|
+
# more details).
|
190
|
+
class SfBulkUpdate < SfBulkOperation
|
191
|
+
def self.update(*args,**kargs)
|
192
|
+
SfBulkUpdate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
193
|
+
end
|
194
|
+
|
195
|
+
def operation
|
196
|
+
:update
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
def send_bulk_operation
|
202
|
+
sf_bulk.send(operation, @object, @data, true, false, [], @batch_size)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# Public: Class used to execute SF Bulk Create operations (see SfBulkOperation class for
|
207
|
+
# more details).
|
208
|
+
class SfBulkCreate < SfBulkOperation
|
209
|
+
def self.create(*args,**kargs)
|
210
|
+
SfBulkCreate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
211
|
+
end
|
212
|
+
|
213
|
+
def operation
|
214
|
+
:create
|
215
|
+
end
|
216
|
+
|
217
|
+
private
|
218
|
+
|
219
|
+
def send_bulk_operation
|
220
|
+
sf_bulk.send(operation, @object, @data, true, false, @batch_size)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Public: Class used to execute SF Bulk Upsert operations (see SfBulkOperation class for
|
225
|
+
# more details).
|
226
|
+
class SfBulkUpsert < SfBulkOperation
|
227
|
+
def self.upsert(*args,**kargs)
|
228
|
+
SfBulkUpsert.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
229
|
+
end
|
230
|
+
|
231
|
+
def operation
|
232
|
+
:upsert
|
233
|
+
end
|
234
|
+
|
235
|
+
private
|
236
|
+
|
237
|
+
def send_bulk_operation
|
238
|
+
# Upsert does not support external id right now
|
239
|
+
sf_bulk.send(operation, @object, @data, 'Id', true, false, [], @batch_size)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# Public: Class used to execute SF Bulk Query operations (see SfBulkOperation class for
|
244
|
+
# more details).
|
245
|
+
class SfBulkQuery < SfBulkOperation
|
246
|
+
def self.query(*args,**kargs)
|
247
|
+
SfBulkQuery.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
248
|
+
end
|
249
|
+
|
250
|
+
def operation
|
251
|
+
:query
|
252
|
+
end
|
253
|
+
|
254
|
+
def failed_records?
|
255
|
+
false
|
256
|
+
end
|
257
|
+
|
258
|
+
private
|
259
|
+
|
260
|
+
def send_bulk_operation
|
261
|
+
sf_bulk.send(operation, @object, @data, @batch_size)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|