remi 0.0.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.bundle/config +2 -0
- data/.gitignore +3 -2
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +123 -0
- data/LICENSE.txt +21 -0
- data/README.md +94 -3
- data/bin/remi +8 -0
- data/doc/install-rbenv-os_x.md +47 -0
- data/lib/remi.rb +56 -9
- data/lib/remi/cli.rb +56 -0
- data/lib/remi/core/daru.rb +28 -0
- data/lib/remi/core/refinements.rb +21 -0
- data/lib/remi/core/string.rb +8 -0
- data/lib/remi/cucumber.rb +7 -0
- data/lib/remi/cucumber/business_rules.rb +504 -0
- data/lib/remi/cucumber/data_source.rb +63 -0
- data/lib/remi/data_source.rb +13 -0
- data/lib/remi/data_source/csv_file.rb +79 -0
- data/lib/remi/data_source/data_frame.rb +10 -0
- data/lib/remi/data_source/postgres.rb +58 -0
- data/lib/remi/data_source/salesforce.rb +78 -0
- data/lib/remi/data_subject.rb +25 -0
- data/lib/remi/data_target.rb +15 -0
- data/lib/remi/data_target/csv_file.rb +49 -0
- data/lib/remi/data_target/data_frame.rb +14 -0
- data/lib/remi/data_target/salesforce.rb +49 -0
- data/lib/remi/extractor/sftp_file.rb +84 -0
- data/lib/remi/field_symbolizers.rb +17 -0
- data/lib/remi/job.rb +200 -0
- data/lib/remi/lookup/regex_sieve.rb +55 -0
- data/lib/remi/project/features/examples.feature +24 -0
- data/lib/remi/project/features/formulas.feature +64 -0
- data/lib/remi/project/features/sample_job.feature +304 -0
- data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
- data/lib/remi/project/features/support/env.rb +10 -0
- data/lib/remi/project/features/support/env_app.rb +3 -0
- data/lib/remi/project/features/transforms/date_diff.feature +50 -0
- data/lib/remi/project/features/transforms/parse_date.feature +34 -0
- data/lib/remi/project/features/transforms/prefix.feature +15 -0
- data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
- data/lib/remi/project/jobs/copy_source_job.rb +12 -0
- data/lib/remi/project/jobs/sample_job.rb +164 -0
- data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
- data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
- data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
- data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
- data/lib/remi/settings.rb +39 -0
- data/lib/remi/sf_bulk_helper.rb +265 -0
- data/lib/remi/source_to_target_map.rb +93 -0
- data/lib/remi/transform.rb +137 -0
- data/lib/remi/version.rb +3 -0
- data/remi.gemspec +25 -7
- data/workbooks/sample_workbook.ipynb +56 -0
- data/workbooks/workbook_helper.rb +1 -0
- metadata +234 -17
- data/lib/noodling.rb +0 -163
- data/test/test_NAME.rb +0 -19
@@ -0,0 +1,50 @@
|
|
1
|
+
Feature: Tests the date_diff transform
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'DateDiff'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
And the source 'Source Data'
|
9
|
+
And the target 'Target Data'
|
10
|
+
And the following example record for 'Source Data':
|
11
|
+
| Date1 | Date2 |
|
12
|
+
| 2015-12-31 | 2016-01-02 |
|
13
|
+
|
14
|
+
Scenario Outline: Calculating date difference in days2.
|
15
|
+
Given the job parameter 'measure' is "days"
|
16
|
+
And the source field 'Date1' has the value "<Date1>"
|
17
|
+
And the source field 'Date2' has the value "<Date2>"
|
18
|
+
Then the target field 'Difference' is set to the value "<Difference>"
|
19
|
+
Examples:
|
20
|
+
| Date1 | Date2 | Difference |
|
21
|
+
| 2015-12-31 | 2016-01-02 | 2 |
|
22
|
+
| 2014-12-31 | 2015-12-31 | 365 |
|
23
|
+
| 2016-01-02 | 2015-12-31 | -2 |
|
24
|
+
| 2015-02-28 | 2015-03-01 | 1 |
|
25
|
+
| 2016-02-28 | 2016-03-01 | 2 | # leap day
|
26
|
+
|
27
|
+
|
28
|
+
Scenario Outline: Calculating date difference in months.
|
29
|
+
Given the job parameter 'measure' is "months"
|
30
|
+
And the source field 'Date1' has the value "<Date1>"
|
31
|
+
And the source field 'Date2' has the value "<Date2>"
|
32
|
+
Then the target field 'Difference' is set to the value "<Difference>"
|
33
|
+
Examples:
|
34
|
+
| Date1 | Date2 | Difference |
|
35
|
+
| 2015-12-31 | 2016-01-02 | 1 |
|
36
|
+
| 2015-12-31 | 2016-02-02 | 2 |
|
37
|
+
| 2015-12-31 | 2017-02-02 | 14 |
|
38
|
+
| 2016-02-02 | 2015-12-31 | -2 |
|
39
|
+
|
40
|
+
Scenario Outline: Calculating date difference in years.
|
41
|
+
Given the job parameter 'measure' is "years"
|
42
|
+
And the source field 'Date1' has the value "<Date1>"
|
43
|
+
And the source field 'Date2' has the value "<Date2>"
|
44
|
+
Then the target field 'Difference' is set to the value "<Difference>"
|
45
|
+
Examples:
|
46
|
+
| Date1 | Date2 | Difference |
|
47
|
+
| 2015-12-31 | 2016-01-02 | 1 |
|
48
|
+
| 2015-01-01 | 2015-12-31 | 0 |
|
49
|
+
| 2015-12-31 | 2017-02-02 | 2 |
|
50
|
+
| 2016-02-02 | 2015-12-31 | -1 |
|
@@ -0,0 +1,34 @@
|
|
1
|
+
Feature: Tests the parse_date transform
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'ParseDate'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
And the source 'Source Data'
|
9
|
+
And the target 'Target Data'
|
10
|
+
And the following example record for 'Source Data':
|
11
|
+
| Date String |
|
12
|
+
| 2015-12-31 |
|
13
|
+
|
14
|
+
Scenario Outline: Parsing date strings.
|
15
|
+
Given the source field 'Date String' has the value "<Date String>"
|
16
|
+
And the job parameter 'format' is "<Format>"
|
17
|
+
Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
|
18
|
+
Examples:
|
19
|
+
| Date String | Format | Parsed Date |
|
20
|
+
| 2015-10-21 | %Y-%m-%d | 2015-10-21 |
|
21
|
+
| 10/21/2015 | %m/%d/%Y | 2015-10-21 |
|
22
|
+
| 20151021 | %Y%m%d | 2015-10-21 |
|
23
|
+
| | %m/%d/%Y | |
|
24
|
+
|
25
|
+
Scenario Outline: Parsing date strings for missing values.
|
26
|
+
Given the source field 'Date String' has the value ""
|
27
|
+
And the job parameter 'if_blank' is "<If Blank>"
|
28
|
+
|
29
|
+
Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
|
30
|
+
Examples:
|
31
|
+
| If Blank | Parsed Date |
|
32
|
+
| low | 1900-01-01 |
|
33
|
+
| high | 2999-12-31 |
|
34
|
+
| | |
|
@@ -0,0 +1,15 @@
|
|
1
|
+
Feature: Test the prefix transformer.
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given the job is 'Prefix'
|
5
|
+
And the job source 'Source Data'
|
6
|
+
And the job target 'Target Data'
|
7
|
+
|
8
|
+
|
9
|
+
Scenario: Prefixing a field.
|
10
|
+
Given the source 'Source Data'
|
11
|
+
And the target 'Target Data'
|
12
|
+
Given the following example record for 'Source Data':
|
13
|
+
| Field |
|
14
|
+
| something |
|
15
|
+
Then the target field 'Field' is set to the value "prefixsomething"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# This file was auto-generated by Remi but is expected to be overwritten.
|
2
|
+
# Put anything in here that should be shared by all jobs.
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'remi'
|
6
|
+
|
7
|
+
module AllJobsShared
|
8
|
+
include Remi::Job
|
9
|
+
|
10
|
+
define_param :sftp, {
|
11
|
+
host: 'example.com',
|
12
|
+
username: 'user',
|
13
|
+
password: '1234567890'
|
14
|
+
}
|
15
|
+
|
16
|
+
define_param :salesforce_credentials, {
|
17
|
+
host: 'login.salesforce.com',
|
18
|
+
username: 'user@example.com',
|
19
|
+
password: 'password',
|
20
|
+
security_token: '4342jn3j4n32n4',
|
21
|
+
client_id: 'dkfjsdkfjoasdjdf',
|
22
|
+
client_secret: '28357245723475',
|
23
|
+
instance_url: 'https://na1.salesforce.com'
|
24
|
+
}
|
25
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require_relative 'all_jobs_shared'
|
2
|
+
|
3
|
+
class CopySourceJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
7
|
+
define_source :target_data, Remi::DataSource::DataFrame
|
8
|
+
|
9
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
10
|
+
target_data.df = source_data.df.monkey_dup
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# This is an example Remi job that was auto-generated by Remi.
|
2
|
+
|
3
|
+
require_relative 'all_jobs_shared'
|
4
|
+
|
5
|
+
class SampleJob
|
6
|
+
include AllJobsShared
|
7
|
+
|
8
|
+
define_source :existing_contacts, Remi::DataSource::Salesforce,
|
9
|
+
object: :Contact,
|
10
|
+
credentials: params[:salesforce_credentials],
|
11
|
+
api: :bulk,
|
12
|
+
fields: {
|
13
|
+
:Id => {},
|
14
|
+
:External_ID__c => {}
|
15
|
+
},
|
16
|
+
query: <<-EOQ
|
17
|
+
SELECT
|
18
|
+
Id,
|
19
|
+
External_ID__c
|
20
|
+
FROM
|
21
|
+
Contact
|
22
|
+
EOQ
|
23
|
+
|
24
|
+
|
25
|
+
define_source :sample_file, Remi::DataSource::CsvFile,
|
26
|
+
extractor: Remi::Extractor::SftpFile.new(
|
27
|
+
credentials: params[:sftp],
|
28
|
+
remote_file: /^SampleFile_(\d+)\.txt/,
|
29
|
+
remote_folder: '/',
|
30
|
+
most_recent_only: true
|
31
|
+
),
|
32
|
+
csv_options: {
|
33
|
+
headers: true,
|
34
|
+
col_sep: ",",
|
35
|
+
encoding: "ISO-8859-1:UTF-8"
|
36
|
+
},
|
37
|
+
fields: {
|
38
|
+
:student_id => {},
|
39
|
+
:school_id => {},
|
40
|
+
:school_name => {},
|
41
|
+
:program => {},
|
42
|
+
:last_name => {},
|
43
|
+
:first_name => {},
|
44
|
+
:current_email => {},
|
45
|
+
:mailing_address_line_1 => {},
|
46
|
+
:mailing_address_line_2 => {},
|
47
|
+
:mailing_city => {},
|
48
|
+
:mailing_state => {},
|
49
|
+
:mailing_postal_code => {},
|
50
|
+
:birthdate => { type: :date, format: '%m/%d/%Y'},
|
51
|
+
:applied_date => { type: :date, format: '%m/%d/%Y'}
|
52
|
+
}
|
53
|
+
|
54
|
+
define_target :all_contacts, Remi::DataTarget::DataFrame
|
55
|
+
|
56
|
+
define_target :contact_updates, Remi::DataTarget::Salesforce,
|
57
|
+
credentials: params[:salesforce_credentials],
|
58
|
+
object: :Contact,
|
59
|
+
operation: :update,
|
60
|
+
api: :bulk
|
61
|
+
|
62
|
+
define_target :contact_creates, Remi::DataTarget::Salesforce,
|
63
|
+
credentials: params[:salesforce_credentials],
|
64
|
+
object: :Contact,
|
65
|
+
operation: :create,
|
66
|
+
api: :bulk
|
67
|
+
|
68
|
+
define_lookup :program_name_lookup, Remi::Lookup::RegexSieve, {
|
69
|
+
/^BIO$/ => "Biology",
|
70
|
+
/^Fake Biology$/ => nil,
|
71
|
+
/(?:B|Microb)iology/ => "Biology",
|
72
|
+
/^CHEM$/ => "Chemistry",
|
73
|
+
/Chemistry/ => "Chemistry",
|
74
|
+
/Physics/ => "Physics"
|
75
|
+
}
|
76
|
+
|
77
|
+
define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
|
78
|
+
|
79
|
+
# Exclude all source records with an invalid program name
|
80
|
+
all_contacts.df = sample_file.df.monkey_dup
|
81
|
+
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
82
|
+
map source(:program) .target(:Major__c)
|
83
|
+
.transform(Remi::Transform[:lookup][program_name_lookup])
|
84
|
+
end
|
85
|
+
all_contacts.df = all_contacts.df.where(all_contacts.df[:Major__c].not_eq(nil))
|
86
|
+
|
87
|
+
student_id_to_sf_id = existing_contacts.df.map_rows { |row| [row[:External_ID__c], row[:Id]] }.to_h
|
88
|
+
|
89
|
+
# Map fields that are common to both creates and updates
|
90
|
+
Remi::SourceToTargetMap.apply(all_contacts.df) do
|
91
|
+
|
92
|
+
# Prefixes source id record and then looks up existing salesforce Id
|
93
|
+
map source(:student_id) .target(:External_ID__c, :Id)
|
94
|
+
.transform(Remi::Transform[:prefix]['SAMP'])
|
95
|
+
.transform(->(v) { [v, Remi::Transform[:lookup][student_id_to_sf_id].call(v)] })
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
define_transform :map_creates, sources: :all_contacts, targets: :contact_creates do
|
101
|
+
|
102
|
+
work_contact_creates = all_contacts.df.where(all_contacts.df[:Id].eq(nil))
|
103
|
+
Remi::SourceToTargetMap.apply(work_contact_creates) do
|
104
|
+
|
105
|
+
map source(:school_id) .target(:School_ID__c)
|
106
|
+
map source(:school_name) .target(:School_Name__c)
|
107
|
+
map source(:first_name) .target(:FirstName)
|
108
|
+
.transform(Remi::Transform[:ifblank].('Not Provided'))
|
109
|
+
map source(:last_name) .target(:LastName)
|
110
|
+
.transform(Remi::Transform[:ifblank].('Not Provided'))
|
111
|
+
map source(:mailing_city) .target(:MailingCity)
|
112
|
+
map source(:mailing_state) .target(:MailingState)
|
113
|
+
map source(:mailing_postal_code) .target(:MailingPostalCode)
|
114
|
+
|
115
|
+
map source(:birthdate) .target(:Birthdate)
|
116
|
+
.transform(Remi::Transform[:format_date][from_fmt: sample_file.fields[:birthdate][:format]])
|
117
|
+
|
118
|
+
map source(:applied_date) .target(:Applied_Date__c)
|
119
|
+
.transform(Remi::Transform[:ifblank].(Date.today.strftime(sample_file.fields[:applied_date][:format])))
|
120
|
+
.transform(Remi::Transform[:format_date].(from_fmt: sample_file.fields[:applied_date][:format]))
|
121
|
+
|
122
|
+
map source(:mailing_address_line_1, :mailing_address_line_2) .target(:MailingStreet)
|
123
|
+
.transform(->(line_1, line_2) {
|
124
|
+
Remi::Transform[:ifblank].(nil).call(line_1).nil? ? [] : [line_1, line_2]
|
125
|
+
})
|
126
|
+
.transform(Remi::Transform[:concatenate].(', '))
|
127
|
+
|
128
|
+
map source(:school_id, :school_name) .target(:School__c)
|
129
|
+
.transform(->(id, name) {[
|
130
|
+
Remi::Transform[:ifblank]["Unknown"].call(id),
|
131
|
+
Remi::Transform[:ifblank]["Unknown"].call(name)
|
132
|
+
]})
|
133
|
+
.transform(Remi::Transform[:concatenate].('-'))
|
134
|
+
|
135
|
+
map source(:current_email) .target(:Email)
|
136
|
+
.transform(Remi::Transform[:replace].(/,/, '.'))
|
137
|
+
.transform(Remi::Transform[:validate_email].call)
|
138
|
+
end
|
139
|
+
|
140
|
+
contact_creates.df = work_contact_creates[
|
141
|
+
:External_ID__c,
|
142
|
+
:School_ID__c,
|
143
|
+
:School_Name__c,
|
144
|
+
:School__c,
|
145
|
+
:Major__c,
|
146
|
+
:FirstName,
|
147
|
+
:LastName,
|
148
|
+
:Email,
|
149
|
+
:MailingStreet,
|
150
|
+
:MailingCity,
|
151
|
+
:MailingState,
|
152
|
+
:MailingPostalCode,
|
153
|
+
:Birthdate,
|
154
|
+
:Applied_Date__c
|
155
|
+
]
|
156
|
+
end
|
157
|
+
|
158
|
+
define_transform :map_updates, sources: :all_contacts, targets: :contact_updates do
|
159
|
+
contact_updates.df = all_contacts.df[
|
160
|
+
:Id,
|
161
|
+
:Major__c
|
162
|
+
].where(all_contacts.df[:Id].not_eq(nil))
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class DateDiffJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :measure, :days
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
8
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
9
|
+
|
10
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
11
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
12
|
+
map source(:date1, :date2) .target(:difference)
|
13
|
+
.transform(->(d1,d2) { [Date.strptime(d1), Date.strptime(d2)] })
|
14
|
+
.transform(Remi::Transform[:date_diff].(params[:measure]))
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class ParseDateJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :format, '%Y-%m-%d'
|
7
|
+
define_param :if_blank, nil
|
8
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
9
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
10
|
+
|
11
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
12
|
+
|
13
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
14
|
+
map source(:date_string) .target(:parsed_date)
|
15
|
+
.transform(Remi::Transform[:parse_date].(format: params[:format], if_blank: params[:if_blank]))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require_relative '../all_jobs_shared'
|
2
|
+
|
3
|
+
class PrefixJob
|
4
|
+
include AllJobsShared
|
5
|
+
|
6
|
+
define_param :prefix, 'prefix'
|
7
|
+
define_source :source_data, Remi::DataSource::DataFrame
|
8
|
+
define_target :target_data, Remi::DataTarget::DataFrame
|
9
|
+
|
10
|
+
define_transform :main, sources: :source_data, targets: :target_data do
|
11
|
+
Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
|
12
|
+
map source(:field) .target(:field)
|
13
|
+
.transform(Remi::Transform[:prefix].(params[:prefix]))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Remi
|
2
|
+
module Settings
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def work_dir
|
6
|
+
@work_dir ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname('',nil))
|
7
|
+
end
|
8
|
+
|
9
|
+
def work_dir=(arg)
|
10
|
+
@work_dir = arg
|
11
|
+
end
|
12
|
+
|
13
|
+
def log_level
|
14
|
+
@log_level ||= Logger::INFO
|
15
|
+
end
|
16
|
+
|
17
|
+
def log_level=(arg)
|
18
|
+
@log_level = arg
|
19
|
+
end
|
20
|
+
|
21
|
+
def logger
|
22
|
+
return @logger.call if @logger.respond_to? :call
|
23
|
+
@logger ||= lambda do
|
24
|
+
l = Logger.new(STDOUT)
|
25
|
+
l.level = log_level
|
26
|
+
l.formatter = proc do |severity, datetime, progname, msg|
|
27
|
+
"#{msg}\n"
|
28
|
+
end
|
29
|
+
l
|
30
|
+
end
|
31
|
+
|
32
|
+
@logger.call
|
33
|
+
end
|
34
|
+
|
35
|
+
def logger=(arg)
|
36
|
+
@logger = arg
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
module Remi
|
2
|
+
module SfBulkHelper
|
3
|
+
|
4
|
+
class DupeLookupKeyError < StandardError; end
|
5
|
+
class MaxAttemptError < StandardError; end
|
6
|
+
|
7
|
+
# Public: Class used to execute SF Bulk operations. This class is not meant to be
|
8
|
+
# used directly. It is instead meant to be inherited by classes that perform the
|
9
|
+
# specific query, update, create, or upsert operations.
|
10
|
+
#
|
11
|
+
# Examples
|
12
|
+
#
|
13
|
+
# sf_query = SfBulkQuery.query(client, 'Contact', 'SELECT Id, Name FROM Contact')
|
14
|
+
# puts sf_query.result
|
15
|
+
#
|
16
|
+
# mydata = [ { 'Id' => '001G000000ncxb8IAA', 'Name' => 'Happy Feet' } ]
|
17
|
+
# sf_update = SfBulkUpdate.update(client, 'Contact', mydata)
|
18
|
+
class SfBulkOperation
|
19
|
+
|
20
|
+
# Public: Initializes a SfBulkOperation (does not execute operation).
|
21
|
+
#
|
22
|
+
# restforce_client - An instance of Restforce that is used to authenticate the connection.
|
23
|
+
# object - The name of the object to operate on (e.g., Contact, Task, etc).
|
24
|
+
# data - For query operations, this is the SOQL query string. For other
|
25
|
+
# operations, this is an array of hashes, where the hash keys are column names
|
26
|
+
# and the values are the data.
|
27
|
+
# batch_size - Batch size to use to download or upload data (default: 10000)
|
28
|
+
# max_mattempts - The maximum number of attempts to upload data (default: 2)
|
29
|
+
# logger - Logger to use (default: Logger.new(STDOUT)
|
30
|
+
def initialize(restforce_client, object, data, batch_size: 5000, max_attempts: 2, logger: Logger.new(STDOUT))
|
31
|
+
@restforce_client = restforce_client
|
32
|
+
@object = object
|
33
|
+
@data = data
|
34
|
+
@batch_size = batch_size
|
35
|
+
@max_attempts = max_attempts
|
36
|
+
@attempts = Hash.new(0)
|
37
|
+
@logger = logger
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: A symbol representing the operation to be performed (:query, :update, :create, :upsert)
|
41
|
+
def operation
|
42
|
+
:undefined
|
43
|
+
end
|
44
|
+
|
45
|
+
# Public: Returns the instance of SalesforceBulkApi::Api used for bulk operations.
|
46
|
+
def sf_bulk
|
47
|
+
@sf_bulk ||= SalesforceBulkApi::Api.new(@restforce_client).tap { |o| o.connection.set_status_throttle(5) }
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: Returns the raw result from the SalesforceBulkApi query
|
51
|
+
def raw_result
|
52
|
+
@raw_result || execute
|
53
|
+
end
|
54
|
+
|
55
|
+
# Public: Returns useful metadata about the batch query.
|
56
|
+
def info
|
57
|
+
execute if @attempts[:total] == 0
|
58
|
+
|
59
|
+
return @info if @info and @attempts[:info] == @attempts[:total]
|
60
|
+
@attempts[:info] += 1
|
61
|
+
|
62
|
+
@info = raw_result.reject { |k,v| k == 'batches' }.tap do |h|
|
63
|
+
h['query'] = @data if operation == :query
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Public: Collects the results from all of the batches and aggregates them
|
68
|
+
# into an array of hashes. Each element of the array represents a record in the
|
69
|
+
# result and the hash gives the column-value. Note that if multiple retries are
|
70
|
+
# needed, this is just the final result.
|
71
|
+
#
|
72
|
+
# Returns an array of hashes.
|
73
|
+
def result
|
74
|
+
execute if @attempts[:total] == 0
|
75
|
+
|
76
|
+
return @result if @result and @attempts[:result] == @attempts[:total]
|
77
|
+
@attempts[:result] += 1
|
78
|
+
|
79
|
+
@result = []
|
80
|
+
raw_result['batches'].each do |batch|
|
81
|
+
next unless batch['response']
|
82
|
+
|
83
|
+
batch['response'].each do |record|
|
84
|
+
@result << record.inject({}) { |h, (k,v)| h[k] = v.first unless ['xsi:type','type'].include? k; h }
|
85
|
+
end
|
86
|
+
|
87
|
+
# delete raw result at end of processing to free memory
|
88
|
+
batch['response'] = nil
|
89
|
+
end
|
90
|
+
|
91
|
+
@result
|
92
|
+
end
|
93
|
+
|
94
|
+
# Public: Converts the result into a hash that can be used to
|
95
|
+
# lookup the row for a given key (e.g., external id field).
|
96
|
+
#
|
97
|
+
# key - A string representing the name of the column to be used as the lookup key.
|
98
|
+
# duplicates - Indicates whether duplicate keys are allowed. If they are allowed,
|
99
|
+
# only the first row found will be retained. If duplicates are not allowed,
|
100
|
+
# an error is raised (default: false).
|
101
|
+
#
|
102
|
+
# Returns a hash.
|
103
|
+
def as_lookup(key:, duplicates: false)
|
104
|
+
execute if @attempts[:total] == 0
|
105
|
+
|
106
|
+
@as_lookup ||= {}
|
107
|
+
@attempts[:as_lookup] = Hash.new(0) if @attempts[:as_lookup] == 0
|
108
|
+
|
109
|
+
return @as_lookup[key] if @as_lookup[key] and @attempts[:as_lookup][key] == @attempts[:total]
|
110
|
+
@attempts[:as_lookup][key] += 1
|
111
|
+
|
112
|
+
@as_lookup[key] = result.inject({}) do |lkp,row|
|
113
|
+
raise DupeLookupKeyError, "Duplicate key: #{row[key]} found in result of query: #{@data}" if lkp.has_key?(row[key]) and not duplicates
|
114
|
+
lkp[row[key]] = row unless lkp.has_key?(row[key])
|
115
|
+
lkp
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
# Public: Returns true if any of the records failed to update.
|
121
|
+
def failed_records?
|
122
|
+
n_failed_records = result.reduce(0) do |count, row|
|
123
|
+
count += 1 if row['success'] != 'true'
|
124
|
+
count
|
125
|
+
end
|
126
|
+
|
127
|
+
n_failed_batches = raw_result['batches'].reduce(0) do |count, batch|
|
128
|
+
count += 1 if batch['state'].first != 'Completed'
|
129
|
+
count
|
130
|
+
end
|
131
|
+
|
132
|
+
n_failed_records > 0 || n_failed_batches > 0
|
133
|
+
end
|
134
|
+
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
# Private: Sends the operation to Salesforce using the bulk API.
|
139
|
+
def send_bulk_operation
|
140
|
+
raise "No SF bulk operation defined for #{operation}"
|
141
|
+
end
|
142
|
+
|
143
|
+
# Private: Executes the operation and retries if needed.
|
144
|
+
def execute
|
145
|
+
@attempts[:total] += 1
|
146
|
+
@logger.info "Executing Salesforce Bulk operation: #{operation}"
|
147
|
+
|
148
|
+
@raw_result = send_bulk_operation
|
149
|
+
@logger.info "Bulk operation response: "
|
150
|
+
JSON.pretty_generate(info).split("\n").each { |l| @logger.info l }
|
151
|
+
|
152
|
+
retry_failed if failed_records?
|
153
|
+
|
154
|
+
@logger.info JSON.pretty_generate(info)
|
155
|
+
@raw_result
|
156
|
+
end
|
157
|
+
|
158
|
+
# Private: Drops any data that has already been loaded to salesforce.
|
159
|
+
# Note that this doesn't work for created data since the initial data
|
160
|
+
# wont have a salesforce id. Sometimes batches can fail completely
|
161
|
+
# and won't give anything in the result set. Therefore, the only way
|
162
|
+
# to be able to drop data that's already been created would be to
|
163
|
+
# know how the data was split into batches and the gem we use does not
|
164
|
+
# make this simple. So for now, we live with the defect.
|
165
|
+
def drop_successfully_updated_data
|
166
|
+
lkp_result_by_id = as_lookup(key: 'id', duplicates: true)
|
167
|
+
@data.reject! do |row|
|
168
|
+
sf_bulk_result = lkp_result_by_id[row['Id'] || row[:Id]]
|
169
|
+
sf_bulk_result && (sf_bulk_result['success'] == 'true')
|
170
|
+
end
|
171
|
+
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
# Private: Selects data needed to be retried and re-executes the operation.
|
177
|
+
def retry_failed
|
178
|
+
raise MaxAttemptError if @attempts[:total] >= @max_attempts
|
179
|
+
@logger.warn "Retrying #{operation} - #{@attempts[:total]} of #{@max_attempts}"
|
180
|
+
|
181
|
+
drop_successfully_updated_data
|
182
|
+
|
183
|
+
execute
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# Public: Class used to execute SF Bulk Update operations (see SfBulkOperation class for
|
189
|
+
# more details).
|
190
|
+
class SfBulkUpdate < SfBulkOperation
|
191
|
+
def self.update(*args,**kargs)
|
192
|
+
SfBulkUpdate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
193
|
+
end
|
194
|
+
|
195
|
+
def operation
|
196
|
+
:update
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
def send_bulk_operation
|
202
|
+
sf_bulk.send(operation, @object, @data, true, false, [], @batch_size)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# Public: Class used to execute SF Bulk Create operations (see SfBulkOperation class for
|
207
|
+
# more details).
|
208
|
+
class SfBulkCreate < SfBulkOperation
|
209
|
+
def self.create(*args,**kargs)
|
210
|
+
SfBulkCreate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
211
|
+
end
|
212
|
+
|
213
|
+
def operation
|
214
|
+
:create
|
215
|
+
end
|
216
|
+
|
217
|
+
private
|
218
|
+
|
219
|
+
def send_bulk_operation
|
220
|
+
sf_bulk.send(operation, @object, @data, true, false, @batch_size)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Public: Class used to execute SF Bulk Upsert operations (see SfBulkOperation class for
|
225
|
+
# more details).
|
226
|
+
class SfBulkUpsert < SfBulkOperation
|
227
|
+
def self.upsert(*args,**kargs)
|
228
|
+
SfBulkUpsert.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
229
|
+
end
|
230
|
+
|
231
|
+
def operation
|
232
|
+
:upsert
|
233
|
+
end
|
234
|
+
|
235
|
+
private
|
236
|
+
|
237
|
+
def send_bulk_operation
|
238
|
+
# Upsert does not support external id right now
|
239
|
+
sf_bulk.send(operation, @object, @data, 'Id', true, false, [], @batch_size)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# Public: Class used to execute SF Bulk Query operations (see SfBulkOperation class for
|
244
|
+
# more details).
|
245
|
+
class SfBulkQuery < SfBulkOperation
|
246
|
+
def self.query(*args,**kargs)
|
247
|
+
SfBulkQuery.new(*args,**kargs).tap { |sf| sf.send(:execute) }
|
248
|
+
end
|
249
|
+
|
250
|
+
def operation
|
251
|
+
:query
|
252
|
+
end
|
253
|
+
|
254
|
+
def failed_records?
|
255
|
+
false
|
256
|
+
end
|
257
|
+
|
258
|
+
private
|
259
|
+
|
260
|
+
def send_bulk_operation
|
261
|
+
sf_bulk.send(operation, @object, @data, @batch_size)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|