remi 0.0.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.bundle/config +2 -0
  3. data/.gitignore +3 -2
  4. data/.rspec +2 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +123 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +94 -3
  10. data/bin/remi +8 -0
  11. data/doc/install-rbenv-os_x.md +47 -0
  12. data/lib/remi.rb +56 -9
  13. data/lib/remi/cli.rb +56 -0
  14. data/lib/remi/core/daru.rb +28 -0
  15. data/lib/remi/core/refinements.rb +21 -0
  16. data/lib/remi/core/string.rb +8 -0
  17. data/lib/remi/cucumber.rb +7 -0
  18. data/lib/remi/cucumber/business_rules.rb +504 -0
  19. data/lib/remi/cucumber/data_source.rb +63 -0
  20. data/lib/remi/data_source.rb +13 -0
  21. data/lib/remi/data_source/csv_file.rb +79 -0
  22. data/lib/remi/data_source/data_frame.rb +10 -0
  23. data/lib/remi/data_source/postgres.rb +58 -0
  24. data/lib/remi/data_source/salesforce.rb +78 -0
  25. data/lib/remi/data_subject.rb +25 -0
  26. data/lib/remi/data_target.rb +15 -0
  27. data/lib/remi/data_target/csv_file.rb +49 -0
  28. data/lib/remi/data_target/data_frame.rb +14 -0
  29. data/lib/remi/data_target/salesforce.rb +49 -0
  30. data/lib/remi/extractor/sftp_file.rb +84 -0
  31. data/lib/remi/field_symbolizers.rb +17 -0
  32. data/lib/remi/job.rb +200 -0
  33. data/lib/remi/lookup/regex_sieve.rb +55 -0
  34. data/lib/remi/project/features/examples.feature +24 -0
  35. data/lib/remi/project/features/formulas.feature +64 -0
  36. data/lib/remi/project/features/sample_job.feature +304 -0
  37. data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
  38. data/lib/remi/project/features/support/env.rb +10 -0
  39. data/lib/remi/project/features/support/env_app.rb +3 -0
  40. data/lib/remi/project/features/transforms/date_diff.feature +50 -0
  41. data/lib/remi/project/features/transforms/parse_date.feature +34 -0
  42. data/lib/remi/project/features/transforms/prefix.feature +15 -0
  43. data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
  44. data/lib/remi/project/jobs/copy_source_job.rb +12 -0
  45. data/lib/remi/project/jobs/sample_job.rb +164 -0
  46. data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
  47. data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
  48. data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
  49. data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
  50. data/lib/remi/settings.rb +39 -0
  51. data/lib/remi/sf_bulk_helper.rb +265 -0
  52. data/lib/remi/source_to_target_map.rb +93 -0
  53. data/lib/remi/transform.rb +137 -0
  54. data/lib/remi/version.rb +3 -0
  55. data/remi.gemspec +25 -7
  56. data/workbooks/sample_workbook.ipynb +56 -0
  57. data/workbooks/workbook_helper.rb +1 -0
  58. metadata +234 -17
  59. data/lib/noodling.rb +0 -163
  60. data/test/test_NAME.rb +0 -19
@@ -0,0 +1,10 @@
1
+ # Auto-generated by Remi.
2
+ # Add user-customizations to env_app.rb
3
+
4
+ require 'bundler/setup'
5
+ require 'remi'
6
+ require 'remi/cucumber'
7
+
8
+ require_relative 'env_app.rb'
9
+
10
+ Remi::Settings.log_level = Logger::ERROR
@@ -0,0 +1,3 @@
1
+ require_relative '../../jobs/sample_job'
2
+ require_relative '../../jobs/copy_source_job'
3
+ require_relative '../../jobs/transforms/transform_jobs'
@@ -0,0 +1,50 @@
1
+ Feature: Tests the date_diff transform
2
+
3
+ Background:
4
+ Given the job is 'DateDiff'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+ And the source 'Source Data'
9
+ And the target 'Target Data'
10
+ And the following example record for 'Source Data':
11
+ | Date1 | Date2 |
12
+ | 2015-12-31 | 2016-01-02 |
13
+
14
+ Scenario Outline: Calculating date difference in days2.
15
+ Given the job parameter 'measure' is "days"
16
+ And the source field 'Date1' has the value "<Date1>"
17
+ And the source field 'Date2' has the value "<Date2>"
18
+ Then the target field 'Difference' is set to the value "<Difference>"
19
+ Examples:
20
+ | Date1 | Date2 | Difference |
21
+ | 2015-12-31 | 2016-01-02 | 2 |
22
+ | 2014-12-31 | 2015-12-31 | 365 |
23
+ | 2016-01-02 | 2015-12-31 | -2 |
24
+ | 2015-02-28 | 2015-03-01 | 1 |
25
+ | 2016-02-28 | 2016-03-01 | 2 | # leap day
26
+
27
+
28
+ Scenario Outline: Calculating date difference in months.
29
+ Given the job parameter 'measure' is "months"
30
+ And the source field 'Date1' has the value "<Date1>"
31
+ And the source field 'Date2' has the value "<Date2>"
32
+ Then the target field 'Difference' is set to the value "<Difference>"
33
+ Examples:
34
+ | Date1 | Date2 | Difference |
35
+ | 2015-12-31 | 2016-01-02 | 1 |
36
+ | 2015-12-31 | 2016-02-02 | 2 |
37
+ | 2015-12-31 | 2017-02-02 | 14 |
38
+ | 2016-02-02 | 2015-12-31 | -2 |
39
+
40
+ Scenario Outline: Calculating date difference in years.
41
+ Given the job parameter 'measure' is "years"
42
+ And the source field 'Date1' has the value "<Date1>"
43
+ And the source field 'Date2' has the value "<Date2>"
44
+ Then the target field 'Difference' is set to the value "<Difference>"
45
+ Examples:
46
+ | Date1 | Date2 | Difference |
47
+ | 2015-12-31 | 2016-01-02 | 1 |
48
+ | 2015-01-01 | 2015-12-31 | 0 |
49
+ | 2015-12-31 | 2017-02-02 | 2 |
50
+ | 2016-02-02 | 2015-12-31 | -1 |
@@ -0,0 +1,34 @@
1
+ Feature: Tests the parse_date transform
2
+
3
+ Background:
4
+ Given the job is 'ParseDate'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+ And the source 'Source Data'
9
+ And the target 'Target Data'
10
+ And the following example record for 'Source Data':
11
+ | Date String |
12
+ | 2015-12-31 |
13
+
14
+ Scenario Outline: Parsing date strings.
15
+ Given the source field 'Date String' has the value "<Date String>"
16
+ And the job parameter 'format' is "<Format>"
17
+ Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
18
+ Examples:
19
+ | Date String | Format | Parsed Date |
20
+ | 2015-10-21 | %Y-%m-%d | 2015-10-21 |
21
+ | 10/21/2015 | %m/%d/%Y | 2015-10-21 |
22
+ | 20151021 | %Y%m%d | 2015-10-21 |
23
+ | | %m/%d/%Y | |
24
+
25
+ Scenario Outline: Parsing date strings for missing values.
26
+ Given the source field 'Date String' has the value ""
27
+ And the job parameter 'if_blank' is "<If Blank>"
28
+
29
+ Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
30
+ Examples:
31
+ | If Blank | Parsed Date |
32
+ | low | 1900-01-01 |
33
+ | high | 2999-12-31 |
34
+ | | |
@@ -0,0 +1,15 @@
1
+ Feature: Test the prefix transformer.
2
+
3
+ Background:
4
+ Given the job is 'Prefix'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+
9
+ Scenario: Prefixing a field.
10
+ Given the source 'Source Data'
11
+ And the target 'Target Data'
12
+ Given the following example record for 'Source Data':
13
+ | Field |
14
+ | something |
15
+ Then the target field 'Field' is set to the value "prefixsomething"
@@ -0,0 +1,25 @@
1
+ # This file was auto-generated by Remi but is expected to be overwritten.
2
+ # Put anything in here that should be shared by all jobs.
3
+
4
+ require 'bundler/setup'
5
+ require 'remi'
6
+
7
+ module AllJobsShared
8
+ include Remi::Job
9
+
10
+ define_param :sftp, {
11
+ host: 'example.com',
12
+ username: 'user',
13
+ password: '1234567890'
14
+ }
15
+
16
+ define_param :salesforce_credentials, {
17
+ host: 'login.salesforce.com',
18
+ username: 'user@example.com',
19
+ password: 'password',
20
+ security_token: '4342jn3j4n32n4',
21
+ client_id: 'dkfjsdkfjoasdjdf',
22
+ client_secret: '28357245723475',
23
+ instance_url: 'https://na1.salesforce.com'
24
+ }
25
+ end
@@ -0,0 +1,12 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class CopySourceJob
4
+ include AllJobsShared
5
+
6
+ define_source :source_data, Remi::DataSource::DataFrame
7
+ define_source :target_data, Remi::DataSource::DataFrame
8
+
9
+ define_transform :main, sources: :source_data, targets: :target_data do
10
+ target_data.df = source_data.df.monkey_dup
11
+ end
12
+ end
@@ -0,0 +1,164 @@
1
+ # This is an example Remi job that was auto-generated by Remi.
2
+
3
+ require_relative 'all_jobs_shared'
4
+
5
+ class SampleJob
6
+ include AllJobsShared
7
+
8
+ define_source :existing_contacts, Remi::DataSource::Salesforce,
9
+ object: :Contact,
10
+ credentials: params[:salesforce_credentials],
11
+ api: :bulk,
12
+ fields: {
13
+ :Id => {},
14
+ :External_ID__c => {}
15
+ },
16
+ query: <<-EOQ
17
+ SELECT
18
+ Id,
19
+ External_ID__c
20
+ FROM
21
+ Contact
22
+ EOQ
23
+
24
+
25
+ define_source :sample_file, Remi::DataSource::CsvFile,
26
+ extractor: Remi::Extractor::SftpFile.new(
27
+ credentials: params[:sftp],
28
+ remote_file: /^SampleFile_(\d+)\.txt/,
29
+ remote_folder: '/',
30
+ most_recent_only: true
31
+ ),
32
+ csv_options: {
33
+ headers: true,
34
+ col_sep: ",",
35
+ encoding: "ISO-8859-1:UTF-8"
36
+ },
37
+ fields: {
38
+ :student_id => {},
39
+ :school_id => {},
40
+ :school_name => {},
41
+ :program => {},
42
+ :last_name => {},
43
+ :first_name => {},
44
+ :current_email => {},
45
+ :mailing_address_line_1 => {},
46
+ :mailing_address_line_2 => {},
47
+ :mailing_city => {},
48
+ :mailing_state => {},
49
+ :mailing_postal_code => {},
50
+ :birthdate => { type: :date, format: '%m/%d/%Y'},
51
+ :applied_date => { type: :date, format: '%m/%d/%Y'}
52
+ }
53
+
54
+ define_target :all_contacts, Remi::DataTarget::DataFrame
55
+
56
+ define_target :contact_updates, Remi::DataTarget::Salesforce,
57
+ credentials: params[:salesforce_credentials],
58
+ object: :Contact,
59
+ operation: :update,
60
+ api: :bulk
61
+
62
+ define_target :contact_creates, Remi::DataTarget::Salesforce,
63
+ credentials: params[:salesforce_credentials],
64
+ object: :Contact,
65
+ operation: :create,
66
+ api: :bulk
67
+
68
+ define_lookup :program_name_lookup, Remi::Lookup::RegexSieve, {
69
+ /^BIO$/ => "Biology",
70
+ /^Fake Biology$/ => nil,
71
+ /(?:B|Microb)iology/ => "Biology",
72
+ /^CHEM$/ => "Chemistry",
73
+ /Chemistry/ => "Chemistry",
74
+ /Physics/ => "Physics"
75
+ }
76
+
77
+ define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
78
+
79
+ # Exclude all source records with an invalid program name
80
+ all_contacts.df = sample_file.df.monkey_dup
81
+ Remi::SourceToTargetMap.apply(all_contacts.df) do
82
+ map source(:program) .target(:Major__c)
83
+ .transform(Remi::Transform[:lookup][program_name_lookup])
84
+ end
85
+ all_contacts.df = all_contacts.df.where(all_contacts.df[:Major__c].not_eq(nil))
86
+
87
+ student_id_to_sf_id = existing_contacts.df.map_rows { |row| [row[:External_ID__c], row[:Id]] }.to_h
88
+
89
+ # Map fields that are common to both creates and updates
90
+ Remi::SourceToTargetMap.apply(all_contacts.df) do
91
+
92
+ # Prefixes source id record and then looks up existing salesforce Id
93
+ map source(:student_id) .target(:External_ID__c, :Id)
94
+ .transform(Remi::Transform[:prefix]['SAMP'])
95
+ .transform(->(v) { [v, Remi::Transform[:lookup][student_id_to_sf_id].call(v)] })
96
+ end
97
+ end
98
+
99
+
100
+ define_transform :map_creates, sources: :all_contacts, targets: :contact_creates do
101
+
102
+ work_contact_creates = all_contacts.df.where(all_contacts.df[:Id].eq(nil))
103
+ Remi::SourceToTargetMap.apply(work_contact_creates) do
104
+
105
+ map source(:school_id) .target(:School_ID__c)
106
+ map source(:school_name) .target(:School_Name__c)
107
+ map source(:first_name) .target(:FirstName)
108
+ .transform(Remi::Transform[:ifblank].('Not Provided'))
109
+ map source(:last_name) .target(:LastName)
110
+ .transform(Remi::Transform[:ifblank].('Not Provided'))
111
+ map source(:mailing_city) .target(:MailingCity)
112
+ map source(:mailing_state) .target(:MailingState)
113
+ map source(:mailing_postal_code) .target(:MailingPostalCode)
114
+
115
+ map source(:birthdate) .target(:Birthdate)
116
+ .transform(Remi::Transform[:format_date][from_fmt: sample_file.fields[:birthdate][:format]])
117
+
118
+ map source(:applied_date) .target(:Applied_Date__c)
119
+ .transform(Remi::Transform[:ifblank].(Date.today.strftime(sample_file.fields[:applied_date][:format])))
120
+ .transform(Remi::Transform[:format_date].(from_fmt: sample_file.fields[:applied_date][:format]))
121
+
122
+ map source(:mailing_address_line_1, :mailing_address_line_2) .target(:MailingStreet)
123
+ .transform(->(line_1, line_2) {
124
+ Remi::Transform[:ifblank].(nil).call(line_1).nil? ? [] : [line_1, line_2]
125
+ })
126
+ .transform(Remi::Transform[:concatenate].(', '))
127
+
128
+ map source(:school_id, :school_name) .target(:School__c)
129
+ .transform(->(id, name) {[
130
+ Remi::Transform[:ifblank]["Unknown"].call(id),
131
+ Remi::Transform[:ifblank]["Unknown"].call(name)
132
+ ]})
133
+ .transform(Remi::Transform[:concatenate].('-'))
134
+
135
+ map source(:current_email) .target(:Email)
136
+ .transform(Remi::Transform[:replace].(/,/, '.'))
137
+ .transform(Remi::Transform[:validate_email].call)
138
+ end
139
+
140
+ contact_creates.df = work_contact_creates[
141
+ :External_ID__c,
142
+ :School_ID__c,
143
+ :School_Name__c,
144
+ :School__c,
145
+ :Major__c,
146
+ :FirstName,
147
+ :LastName,
148
+ :Email,
149
+ :MailingStreet,
150
+ :MailingCity,
151
+ :MailingState,
152
+ :MailingPostalCode,
153
+ :Birthdate,
154
+ :Applied_Date__c
155
+ ]
156
+ end
157
+
158
+ define_transform :map_updates, sources: :all_contacts, targets: :contact_updates do
159
+ contact_updates.df = all_contacts.df[
160
+ :Id,
161
+ :Major__c
162
+ ].where(all_contacts.df[:Id].not_eq(nil))
163
+ end
164
+ end
@@ -0,0 +1,17 @@
1
+ require_relative '../all_jobs_shared'
2
+
3
+ class DateDiffJob
4
+ include AllJobsShared
5
+
6
+ define_param :measure, :days
7
+ define_source :source_data, Remi::DataSource::DataFrame
8
+ define_target :target_data, Remi::DataTarget::DataFrame
9
+
10
+ define_transform :main, sources: :source_data, targets: :target_data do
11
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
12
+ map source(:date1, :date2) .target(:difference)
13
+ .transform(->(d1,d2) { [Date.strptime(d1), Date.strptime(d2)] })
14
+ .transform(Remi::Transform[:date_diff].(params[:measure]))
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ require_relative '../all_jobs_shared'
2
+
3
+ class ParseDateJob
4
+ include AllJobsShared
5
+
6
+ define_param :format, '%Y-%m-%d'
7
+ define_param :if_blank, nil
8
+ define_source :source_data, Remi::DataSource::DataFrame
9
+ define_target :target_data, Remi::DataTarget::DataFrame
10
+
11
+ define_transform :main, sources: :source_data, targets: :target_data do
12
+
13
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
14
+ map source(:date_string) .target(:parsed_date)
15
+ .transform(Remi::Transform[:parse_date].(format: params[:format], if_blank: params[:if_blank]))
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,16 @@
1
+ require_relative '../all_jobs_shared'
2
+
3
+ class PrefixJob
4
+ include AllJobsShared
5
+
6
+ define_param :prefix, 'prefix'
7
+ define_source :source_data, Remi::DataSource::DataFrame
8
+ define_target :target_data, Remi::DataTarget::DataFrame
9
+
10
+ define_transform :main, sources: :source_data, targets: :target_data do
11
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
12
+ map source(:field) .target(:field)
13
+ .transform(Remi::Transform[:prefix].(params[:prefix]))
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ require_relative 'prefix_job'
2
+ require_relative 'parse_date_job'
3
+ require_relative 'date_diff_job'
@@ -0,0 +1,39 @@
1
+ module Remi
2
+ module Settings
3
+ extend self
4
+
5
+ def work_dir
6
+ @work_dir ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname('',nil))
7
+ end
8
+
9
+ def work_dir=(arg)
10
+ @work_dir = arg
11
+ end
12
+
13
+ def log_level
14
+ @log_level ||= Logger::INFO
15
+ end
16
+
17
+ def log_level=(arg)
18
+ @log_level = arg
19
+ end
20
+
21
+ def logger
22
+ return @logger.call if @logger.respond_to? :call
23
+ @logger ||= lambda do
24
+ l = Logger.new(STDOUT)
25
+ l.level = log_level
26
+ l.formatter = proc do |severity, datetime, progname, msg|
27
+ "#{msg}\n"
28
+ end
29
+ l
30
+ end
31
+
32
+ @logger.call
33
+ end
34
+
35
+ def logger=(arg)
36
+ @logger = arg
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,265 @@
1
+ module Remi
2
+ module SfBulkHelper
3
+
4
+ class DupeLookupKeyError < StandardError; end
5
+ class MaxAttemptError < StandardError; end
6
+
7
+ # Public: Class used to execute SF Bulk operations. This class is not meant to be
8
+ # used directly. It is instead meant to be inherited by classes that perform the
9
+ # specific query, update, create, or upsert operations.
10
+ #
11
+ # Examples
12
+ #
13
+ # sf_query = SfBulkQuery.query(client, 'Contact', 'SELECT Id, Name FROM Contact')
14
+ # puts sf_query.result
15
+ #
16
+ # mydata = [ { 'Id' => '001G000000ncxb8IAA', 'Name' => 'Happy Feet' } ]
17
+ # sf_update = SfBulkUpdate.update(client, 'Contact', mydata)
18
+ class SfBulkOperation
19
+
20
+ # Public: Initializes a SfBulkOperation (does not execute operation).
21
+ #
22
+ # restforce_client - An instance of Restforce that is used to authenticate the connection.
23
+ # object - The name of the object to operate on (e.g., Contact, Task, etc).
24
+ # data - For query operations, this is the SOQL query string. For other
25
+ # operations, this is an array of hashes, where the hash keys are column names
26
+ # and the values are the data.
27
+ # batch_size - Batch size to use to download or upload data (default: 10000)
28
+ # max_mattempts - The maximum number of attempts to upload data (default: 2)
29
+ # logger - Logger to use (default: Logger.new(STDOUT)
30
+ def initialize(restforce_client, object, data, batch_size: 5000, max_attempts: 2, logger: Logger.new(STDOUT))
31
+ @restforce_client = restforce_client
32
+ @object = object
33
+ @data = data
34
+ @batch_size = batch_size
35
+ @max_attempts = max_attempts
36
+ @attempts = Hash.new(0)
37
+ @logger = logger
38
+ end
39
+
40
+ # Public: A symbol representing the operation to be performed (:query, :update, :create, :upsert)
41
+ def operation
42
+ :undefined
43
+ end
44
+
45
+ # Public: Returns the instance of SalesforceBulkApi::Api used for bulk operations.
46
+ def sf_bulk
47
+ @sf_bulk ||= SalesforceBulkApi::Api.new(@restforce_client).tap { |o| o.connection.set_status_throttle(5) }
48
+ end
49
+
50
+ # Public: Returns the raw result from the SalesforceBulkApi query
51
+ def raw_result
52
+ @raw_result || execute
53
+ end
54
+
55
+ # Public: Returns useful metadata about the batch query.
56
+ def info
57
+ execute if @attempts[:total] == 0
58
+
59
+ return @info if @info and @attempts[:info] == @attempts[:total]
60
+ @attempts[:info] += 1
61
+
62
+ @info = raw_result.reject { |k,v| k == 'batches' }.tap do |h|
63
+ h['query'] = @data if operation == :query
64
+ end
65
+ end
66
+
67
+ # Public: Collects the results from all of the batches and aggregates them
68
+ # into an array of hashes. Each element of the array represents a record in the
69
+ # result and the hash gives the column-value. Note that if multiple retries are
70
+ # needed, this is just the final result.
71
+ #
72
+ # Returns an array of hashes.
73
+ def result
74
+ execute if @attempts[:total] == 0
75
+
76
+ return @result if @result and @attempts[:result] == @attempts[:total]
77
+ @attempts[:result] += 1
78
+
79
+ @result = []
80
+ raw_result['batches'].each do |batch|
81
+ next unless batch['response']
82
+
83
+ batch['response'].each do |record|
84
+ @result << record.inject({}) { |h, (k,v)| h[k] = v.first unless ['xsi:type','type'].include? k; h }
85
+ end
86
+
87
+ # delete raw result at end of processing to free memory
88
+ batch['response'] = nil
89
+ end
90
+
91
+ @result
92
+ end
93
+
94
+ # Public: Converts the result into a hash that can be used to
95
+ # lookup the row for a given key (e.g., external id field).
96
+ #
97
+ # key - A string representing the name of the column to be used as the lookup key.
98
+ # duplicates - Indicates whether duplicate keys are allowed. If they are allowed,
99
+ # only the first row found will be retained. If duplicates are not allowed,
100
+ # an error is raised (default: false).
101
+ #
102
+ # Returns a hash.
103
+ def as_lookup(key:, duplicates: false)
104
+ execute if @attempts[:total] == 0
105
+
106
+ @as_lookup ||= {}
107
+ @attempts[:as_lookup] = Hash.new(0) if @attempts[:as_lookup] == 0
108
+
109
+ return @as_lookup[key] if @as_lookup[key] and @attempts[:as_lookup][key] == @attempts[:total]
110
+ @attempts[:as_lookup][key] += 1
111
+
112
+ @as_lookup[key] = result.inject({}) do |lkp,row|
113
+ raise DupeLookupKeyError, "Duplicate key: #{row[key]} found in result of query: #{@data}" if lkp.has_key?(row[key]) and not duplicates
114
+ lkp[row[key]] = row unless lkp.has_key?(row[key])
115
+ lkp
116
+ end
117
+ end
118
+
119
+
120
+ # Public: Returns true if any of the records failed to update.
121
+ def failed_records?
122
+ n_failed_records = result.reduce(0) do |count, row|
123
+ count += 1 if row['success'] != 'true'
124
+ count
125
+ end
126
+
127
+ n_failed_batches = raw_result['batches'].reduce(0) do |count, batch|
128
+ count += 1 if batch['state'].first != 'Completed'
129
+ count
130
+ end
131
+
132
+ n_failed_records > 0 || n_failed_batches > 0
133
+ end
134
+
135
+
136
+ private
137
+
138
+ # Private: Sends the operation to Salesforce using the bulk API.
139
+ def send_bulk_operation
140
+ raise "No SF bulk operation defined for #{operation}"
141
+ end
142
+
143
+ # Private: Executes the operation and retries if needed.
144
+ def execute
145
+ @attempts[:total] += 1
146
+ @logger.info "Executing Salesforce Bulk operation: #{operation}"
147
+
148
+ @raw_result = send_bulk_operation
149
+ @logger.info "Bulk operation response: "
150
+ JSON.pretty_generate(info).split("\n").each { |l| @logger.info l }
151
+
152
+ retry_failed if failed_records?
153
+
154
+ @logger.info JSON.pretty_generate(info)
155
+ @raw_result
156
+ end
157
+
158
+ # Private: Drops any data that has already been loaded to salesforce.
159
+ # Note that this doesn't work for created data since the initial data
160
+ # wont have a salesforce id. Sometimes batches can fail completely
161
+ # and won't give anything in the result set. Therefore, the only way
162
+ # to be able to drop data that's already been created would be to
163
+ # know how the data was split into batches and the gem we use does not
164
+ # make this simple. So for now, we live with the defect.
165
+ def drop_successfully_updated_data
166
+ lkp_result_by_id = as_lookup(key: 'id', duplicates: true)
167
+ @data.reject! do |row|
168
+ sf_bulk_result = lkp_result_by_id[row['Id'] || row[:Id]]
169
+ sf_bulk_result && (sf_bulk_result['success'] == 'true')
170
+ end
171
+
172
+ nil
173
+ end
174
+
175
+
176
+ # Private: Selects data needed to be retried and re-executes the operation.
177
+ def retry_failed
178
+ raise MaxAttemptError if @attempts[:total] >= @max_attempts
179
+ @logger.warn "Retrying #{operation} - #{@attempts[:total]} of #{@max_attempts}"
180
+
181
+ drop_successfully_updated_data
182
+
183
+ execute
184
+ end
185
+ end
186
+
187
+
188
+ # Public: Class used to execute SF Bulk Update operations (see SfBulkOperation class for
189
+ # more details).
190
+ class SfBulkUpdate < SfBulkOperation
191
+ def self.update(*args,**kargs)
192
+ SfBulkUpdate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
193
+ end
194
+
195
+ def operation
196
+ :update
197
+ end
198
+
199
+ private
200
+
201
+ def send_bulk_operation
202
+ sf_bulk.send(operation, @object, @data, true, false, [], @batch_size)
203
+ end
204
+ end
205
+
206
+ # Public: Class used to execute SF Bulk Create operations (see SfBulkOperation class for
207
+ # more details).
208
+ class SfBulkCreate < SfBulkOperation
209
+ def self.create(*args,**kargs)
210
+ SfBulkCreate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
211
+ end
212
+
213
+ def operation
214
+ :create
215
+ end
216
+
217
+ private
218
+
219
+ def send_bulk_operation
220
+ sf_bulk.send(operation, @object, @data, true, false, @batch_size)
221
+ end
222
+ end
223
+
224
+ # Public: Class used to execute SF Bulk Upsert operations (see SfBulkOperation class for
225
+ # more details).
226
+ class SfBulkUpsert < SfBulkOperation
227
+ def self.upsert(*args,**kargs)
228
+ SfBulkUpsert.new(*args,**kargs).tap { |sf| sf.send(:execute) }
229
+ end
230
+
231
+ def operation
232
+ :upsert
233
+ end
234
+
235
+ private
236
+
237
+ def send_bulk_operation
238
+ # Upsert does not support external id right now
239
+ sf_bulk.send(operation, @object, @data, 'Id', true, false, [], @batch_size)
240
+ end
241
+ end
242
+
243
+ # Public: Class used to execute SF Bulk Query operations (see SfBulkOperation class for
244
+ # more details).
245
+ class SfBulkQuery < SfBulkOperation
246
+ def self.query(*args,**kargs)
247
+ SfBulkQuery.new(*args,**kargs).tap { |sf| sf.send(:execute) }
248
+ end
249
+
250
+ def operation
251
+ :query
252
+ end
253
+
254
+ def failed_records?
255
+ false
256
+ end
257
+
258
+ private
259
+
260
+ def send_bulk_operation
261
+ sf_bulk.send(operation, @object, @data, @batch_size)
262
+ end
263
+ end
264
+ end
265
+ end