remi 0.0.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.bundle/config +2 -0
  3. data/.gitignore +3 -2
  4. data/.rspec +2 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +123 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +94 -3
  10. data/bin/remi +8 -0
  11. data/doc/install-rbenv-os_x.md +47 -0
  12. data/lib/remi.rb +56 -9
  13. data/lib/remi/cli.rb +56 -0
  14. data/lib/remi/core/daru.rb +28 -0
  15. data/lib/remi/core/refinements.rb +21 -0
  16. data/lib/remi/core/string.rb +8 -0
  17. data/lib/remi/cucumber.rb +7 -0
  18. data/lib/remi/cucumber/business_rules.rb +504 -0
  19. data/lib/remi/cucumber/data_source.rb +63 -0
  20. data/lib/remi/data_source.rb +13 -0
  21. data/lib/remi/data_source/csv_file.rb +79 -0
  22. data/lib/remi/data_source/data_frame.rb +10 -0
  23. data/lib/remi/data_source/postgres.rb +58 -0
  24. data/lib/remi/data_source/salesforce.rb +78 -0
  25. data/lib/remi/data_subject.rb +25 -0
  26. data/lib/remi/data_target.rb +15 -0
  27. data/lib/remi/data_target/csv_file.rb +49 -0
  28. data/lib/remi/data_target/data_frame.rb +14 -0
  29. data/lib/remi/data_target/salesforce.rb +49 -0
  30. data/lib/remi/extractor/sftp_file.rb +84 -0
  31. data/lib/remi/field_symbolizers.rb +17 -0
  32. data/lib/remi/job.rb +200 -0
  33. data/lib/remi/lookup/regex_sieve.rb +55 -0
  34. data/lib/remi/project/features/examples.feature +24 -0
  35. data/lib/remi/project/features/formulas.feature +64 -0
  36. data/lib/remi/project/features/sample_job.feature +304 -0
  37. data/lib/remi/project/features/step_definitions/remi_step.rb +310 -0
  38. data/lib/remi/project/features/support/env.rb +10 -0
  39. data/lib/remi/project/features/support/env_app.rb +3 -0
  40. data/lib/remi/project/features/transforms/date_diff.feature +50 -0
  41. data/lib/remi/project/features/transforms/parse_date.feature +34 -0
  42. data/lib/remi/project/features/transforms/prefix.feature +15 -0
  43. data/lib/remi/project/jobs/all_jobs_shared.rb +25 -0
  44. data/lib/remi/project/jobs/copy_source_job.rb +12 -0
  45. data/lib/remi/project/jobs/sample_job.rb +164 -0
  46. data/lib/remi/project/jobs/transforms/date_diff_job.rb +17 -0
  47. data/lib/remi/project/jobs/transforms/parse_date_job.rb +18 -0
  48. data/lib/remi/project/jobs/transforms/prefix_job.rb +16 -0
  49. data/lib/remi/project/jobs/transforms/transform_jobs.rb +3 -0
  50. data/lib/remi/settings.rb +39 -0
  51. data/lib/remi/sf_bulk_helper.rb +265 -0
  52. data/lib/remi/source_to_target_map.rb +93 -0
  53. data/lib/remi/transform.rb +137 -0
  54. data/lib/remi/version.rb +3 -0
  55. data/remi.gemspec +25 -7
  56. data/workbooks/sample_workbook.ipynb +56 -0
  57. data/workbooks/workbook_helper.rb +1 -0
  58. metadata +234 -17
  59. data/lib/noodling.rb +0 -163
  60. data/test/test_NAME.rb +0 -19
@@ -0,0 +1,10 @@
1
+ # Auto-generated by Remi.
2
+ # Add user-customizations to env_app.rb
3
+
4
+ require 'bundler/setup'
5
+ require 'remi'
6
+ require 'remi/cucumber'
7
+
8
+ require_relative 'env_app.rb'
9
+
10
+ Remi::Settings.log_level = Logger::ERROR
@@ -0,0 +1,3 @@
1
+ require_relative '../../jobs/sample_job'
2
+ require_relative '../../jobs/copy_source_job'
3
+ require_relative '../../jobs/transforms/transform_jobs'
@@ -0,0 +1,50 @@
1
+ Feature: Tests the date_diff transform
2
+
3
+ Background:
4
+ Given the job is 'DateDiff'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+ And the source 'Source Data'
9
+ And the target 'Target Data'
10
+ And the following example record for 'Source Data':
11
+ | Date1 | Date2 |
12
+ | 2015-12-31 | 2016-01-02 |
13
+
14
+ Scenario Outline: Calculating date difference in days2.
15
+ Given the job parameter 'measure' is "days"
16
+ And the source field 'Date1' has the value "<Date1>"
17
+ And the source field 'Date2' has the value "<Date2>"
18
+ Then the target field 'Difference' is set to the value "<Difference>"
19
+ Examples:
20
+ | Date1 | Date2 | Difference |
21
+ | 2015-12-31 | 2016-01-02 | 2 |
22
+ | 2014-12-31 | 2015-12-31 | 365 |
23
+ | 2016-01-02 | 2015-12-31 | -2 |
24
+ | 2015-02-28 | 2015-03-01 | 1 |
25
+ | 2016-02-28 | 2016-03-01 | 2 | # leap day
26
+
27
+
28
+ Scenario Outline: Calculating date difference in months.
29
+ Given the job parameter 'measure' is "months"
30
+ And the source field 'Date1' has the value "<Date1>"
31
+ And the source field 'Date2' has the value "<Date2>"
32
+ Then the target field 'Difference' is set to the value "<Difference>"
33
+ Examples:
34
+ | Date1 | Date2 | Difference |
35
+ | 2015-12-31 | 2016-01-02 | 1 |
36
+ | 2015-12-31 | 2016-02-02 | 2 |
37
+ | 2015-12-31 | 2017-02-02 | 14 |
38
+ | 2016-02-02 | 2015-12-31 | -2 |
39
+
40
+ Scenario Outline: Calculating date difference in years.
41
+ Given the job parameter 'measure' is "years"
42
+ And the source field 'Date1' has the value "<Date1>"
43
+ And the source field 'Date2' has the value "<Date2>"
44
+ Then the target field 'Difference' is set to the value "<Difference>"
45
+ Examples:
46
+ | Date1 | Date2 | Difference |
47
+ | 2015-12-31 | 2016-01-02 | 1 |
48
+ | 2015-01-01 | 2015-12-31 | 0 |
49
+ | 2015-12-31 | 2017-02-02 | 2 |
50
+ | 2016-02-02 | 2015-12-31 | -1 |
@@ -0,0 +1,34 @@
1
+ Feature: Tests the parse_date transform
2
+
3
+ Background:
4
+ Given the job is 'ParseDate'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+ And the source 'Source Data'
9
+ And the target 'Target Data'
10
+ And the following example record for 'Source Data':
11
+ | Date String |
12
+ | 2015-12-31 |
13
+
14
+ Scenario Outline: Parsing date strings.
15
+ Given the source field 'Date String' has the value "<Date String>"
16
+ And the job parameter 'format' is "<Format>"
17
+ Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
18
+ Examples:
19
+ | Date String | Format | Parsed Date |
20
+ | 2015-10-21 | %Y-%m-%d | 2015-10-21 |
21
+ | 10/21/2015 | %m/%d/%Y | 2015-10-21 |
22
+ | 20151021 | %Y%m%d | 2015-10-21 |
23
+ | | %m/%d/%Y | |
24
+
25
+ Scenario Outline: Parsing date strings for missing values.
26
+ Given the source field 'Date String' has the value ""
27
+ And the job parameter 'if_blank' is "<If Blank>"
28
+
29
+ Then the target field 'Parsed Date' is set to the value "<Parsed Date>"
30
+ Examples:
31
+ | If Blank | Parsed Date |
32
+ | low | 1900-01-01 |
33
+ | high | 2999-12-31 |
34
+ | | |
@@ -0,0 +1,15 @@
1
+ Feature: Test the prefix transformer.
2
+
3
+ Background:
4
+ Given the job is 'Prefix'
5
+ And the job source 'Source Data'
6
+ And the job target 'Target Data'
7
+
8
+
9
+ Scenario: Prefixing a field.
10
+ Given the source 'Source Data'
11
+ And the target 'Target Data'
12
+ Given the following example record for 'Source Data':
13
+ | Field |
14
+ | something |
15
+ Then the target field 'Field' is set to the value "prefixsomething"
@@ -0,0 +1,25 @@
1
+ # This file was auto-generated by Remi but is expected to be overwritten.
2
+ # Put anything in here that should be shared by all jobs.
3
+
4
+ require 'bundler/setup'
5
+ require 'remi'
6
+
7
+ module AllJobsShared
8
+ include Remi::Job
9
+
10
+ define_param :sftp, {
11
+ host: 'example.com',
12
+ username: 'user',
13
+ password: '1234567890'
14
+ }
15
+
16
+ define_param :salesforce_credentials, {
17
+ host: 'login.salesforce.com',
18
+ username: 'user@example.com',
19
+ password: 'password',
20
+ security_token: '4342jn3j4n32n4',
21
+ client_id: 'dkfjsdkfjoasdjdf',
22
+ client_secret: '28357245723475',
23
+ instance_url: 'https://na1.salesforce.com'
24
+ }
25
+ end
@@ -0,0 +1,12 @@
1
+ require_relative 'all_jobs_shared'
2
+
3
+ class CopySourceJob
4
+ include AllJobsShared
5
+
6
+ define_source :source_data, Remi::DataSource::DataFrame
7
+ define_source :target_data, Remi::DataSource::DataFrame
8
+
9
+ define_transform :main, sources: :source_data, targets: :target_data do
10
+ target_data.df = source_data.df.monkey_dup
11
+ end
12
+ end
@@ -0,0 +1,164 @@
1
+ # This is an example Remi job that was auto-generated by Remi.
2
+
3
+ require_relative 'all_jobs_shared'
4
+
5
+ class SampleJob
6
+ include AllJobsShared
7
+
8
+ define_source :existing_contacts, Remi::DataSource::Salesforce,
9
+ object: :Contact,
10
+ credentials: params[:salesforce_credentials],
11
+ api: :bulk,
12
+ fields: {
13
+ :Id => {},
14
+ :External_ID__c => {}
15
+ },
16
+ query: <<-EOQ
17
+ SELECT
18
+ Id,
19
+ External_ID__c
20
+ FROM
21
+ Contact
22
+ EOQ
23
+
24
+
25
+ define_source :sample_file, Remi::DataSource::CsvFile,
26
+ extractor: Remi::Extractor::SftpFile.new(
27
+ credentials: params[:sftp],
28
+ remote_file: /^SampleFile_(\d+)\.txt/,
29
+ remote_folder: '/',
30
+ most_recent_only: true
31
+ ),
32
+ csv_options: {
33
+ headers: true,
34
+ col_sep: ",",
35
+ encoding: "ISO-8859-1:UTF-8"
36
+ },
37
+ fields: {
38
+ :student_id => {},
39
+ :school_id => {},
40
+ :school_name => {},
41
+ :program => {},
42
+ :last_name => {},
43
+ :first_name => {},
44
+ :current_email => {},
45
+ :mailing_address_line_1 => {},
46
+ :mailing_address_line_2 => {},
47
+ :mailing_city => {},
48
+ :mailing_state => {},
49
+ :mailing_postal_code => {},
50
+ :birthdate => { type: :date, format: '%m/%d/%Y'},
51
+ :applied_date => { type: :date, format: '%m/%d/%Y'}
52
+ }
53
+
54
+ define_target :all_contacts, Remi::DataTarget::DataFrame
55
+
56
+ define_target :contact_updates, Remi::DataTarget::Salesforce,
57
+ credentials: params[:salesforce_credentials],
58
+ object: :Contact,
59
+ operation: :update,
60
+ api: :bulk
61
+
62
+ define_target :contact_creates, Remi::DataTarget::Salesforce,
63
+ credentials: params[:salesforce_credentials],
64
+ object: :Contact,
65
+ operation: :create,
66
+ api: :bulk
67
+
68
+ define_lookup :program_name_lookup, Remi::Lookup::RegexSieve, {
69
+ /^BIO$/ => "Biology",
70
+ /^Fake Biology$/ => nil,
71
+ /(?:B|Microb)iology/ => "Biology",
72
+ /^CHEM$/ => "Chemistry",
73
+ /Chemistry/ => "Chemistry",
74
+ /Physics/ => "Physics"
75
+ }
76
+
77
+ define_transform :map_common_fields, sources: [:sample_file, :existing_contacts], targets: :all_contacts do
78
+
79
+ # Exclude all source records with an invalid program name
80
+ all_contacts.df = sample_file.df.monkey_dup
81
+ Remi::SourceToTargetMap.apply(all_contacts.df) do
82
+ map source(:program) .target(:Major__c)
83
+ .transform(Remi::Transform[:lookup][program_name_lookup])
84
+ end
85
+ all_contacts.df = all_contacts.df.where(all_contacts.df[:Major__c].not_eq(nil))
86
+
87
+ student_id_to_sf_id = existing_contacts.df.map_rows { |row| [row[:External_ID__c], row[:Id]] }.to_h
88
+
89
+ # Map fields that are common to both creates and updates
90
+ Remi::SourceToTargetMap.apply(all_contacts.df) do
91
+
92
+ # Prefixes source id record and then looks up existing salesforce Id
93
+ map source(:student_id) .target(:External_ID__c, :Id)
94
+ .transform(Remi::Transform[:prefix]['SAMP'])
95
+ .transform(->(v) { [v, Remi::Transform[:lookup][student_id_to_sf_id].call(v)] })
96
+ end
97
+ end
98
+
99
+
100
+ define_transform :map_creates, sources: :all_contacts, targets: :contact_creates do
101
+
102
+ work_contact_creates = all_contacts.df.where(all_contacts.df[:Id].eq(nil))
103
+ Remi::SourceToTargetMap.apply(work_contact_creates) do
104
+
105
+ map source(:school_id) .target(:School_ID__c)
106
+ map source(:school_name) .target(:School_Name__c)
107
+ map source(:first_name) .target(:FirstName)
108
+ .transform(Remi::Transform[:ifblank].('Not Provided'))
109
+ map source(:last_name) .target(:LastName)
110
+ .transform(Remi::Transform[:ifblank].('Not Provided'))
111
+ map source(:mailing_city) .target(:MailingCity)
112
+ map source(:mailing_state) .target(:MailingState)
113
+ map source(:mailing_postal_code) .target(:MailingPostalCode)
114
+
115
+ map source(:birthdate) .target(:Birthdate)
116
+ .transform(Remi::Transform[:format_date][from_fmt: sample_file.fields[:birthdate][:format]])
117
+
118
+ map source(:applied_date) .target(:Applied_Date__c)
119
+ .transform(Remi::Transform[:ifblank].(Date.today.strftime(sample_file.fields[:applied_date][:format])))
120
+ .transform(Remi::Transform[:format_date].(from_fmt: sample_file.fields[:applied_date][:format]))
121
+
122
+ map source(:mailing_address_line_1, :mailing_address_line_2) .target(:MailingStreet)
123
+ .transform(->(line_1, line_2) {
124
+ Remi::Transform[:ifblank].(nil).call(line_1).nil? ? [] : [line_1, line_2]
125
+ })
126
+ .transform(Remi::Transform[:concatenate].(', '))
127
+
128
+ map source(:school_id, :school_name) .target(:School__c)
129
+ .transform(->(id, name) {[
130
+ Remi::Transform[:ifblank]["Unknown"].call(id),
131
+ Remi::Transform[:ifblank]["Unknown"].call(name)
132
+ ]})
133
+ .transform(Remi::Transform[:concatenate].('-'))
134
+
135
+ map source(:current_email) .target(:Email)
136
+ .transform(Remi::Transform[:replace].(/,/, '.'))
137
+ .transform(Remi::Transform[:validate_email].call)
138
+ end
139
+
140
+ contact_creates.df = work_contact_creates[
141
+ :External_ID__c,
142
+ :School_ID__c,
143
+ :School_Name__c,
144
+ :School__c,
145
+ :Major__c,
146
+ :FirstName,
147
+ :LastName,
148
+ :Email,
149
+ :MailingStreet,
150
+ :MailingCity,
151
+ :MailingState,
152
+ :MailingPostalCode,
153
+ :Birthdate,
154
+ :Applied_Date__c
155
+ ]
156
+ end
157
+
158
+ define_transform :map_updates, sources: :all_contacts, targets: :contact_updates do
159
+ contact_updates.df = all_contacts.df[
160
+ :Id,
161
+ :Major__c
162
+ ].where(all_contacts.df[:Id].not_eq(nil))
163
+ end
164
+ end
@@ -0,0 +1,17 @@
1
+ require_relative '../all_jobs_shared'
2
+
3
+ class DateDiffJob
4
+ include AllJobsShared
5
+
6
+ define_param :measure, :days
7
+ define_source :source_data, Remi::DataSource::DataFrame
8
+ define_target :target_data, Remi::DataTarget::DataFrame
9
+
10
+ define_transform :main, sources: :source_data, targets: :target_data do
11
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
12
+ map source(:date1, :date2) .target(:difference)
13
+ .transform(->(d1,d2) { [Date.strptime(d1), Date.strptime(d2)] })
14
+ .transform(Remi::Transform[:date_diff].(params[:measure]))
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ require_relative '../all_jobs_shared'
2
+
3
+ class ParseDateJob
4
+ include AllJobsShared
5
+
6
+ define_param :format, '%Y-%m-%d'
7
+ define_param :if_blank, nil
8
+ define_source :source_data, Remi::DataSource::DataFrame
9
+ define_target :target_data, Remi::DataTarget::DataFrame
10
+
11
+ define_transform :main, sources: :source_data, targets: :target_data do
12
+
13
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
14
+ map source(:date_string) .target(:parsed_date)
15
+ .transform(Remi::Transform[:parse_date].(format: params[:format], if_blank: params[:if_blank]))
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,16 @@
1
+ require_relative '../all_jobs_shared'
2
+
3
+ class PrefixJob
4
+ include AllJobsShared
5
+
6
+ define_param :prefix, 'prefix'
7
+ define_source :source_data, Remi::DataSource::DataFrame
8
+ define_target :target_data, Remi::DataTarget::DataFrame
9
+
10
+ define_transform :main, sources: :source_data, targets: :target_data do
11
+ Remi::SourceToTargetMap.apply(source_data.df, target_data.df) do
12
+ map source(:field) .target(:field)
13
+ .transform(Remi::Transform[:prefix].(params[:prefix]))
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ require_relative 'prefix_job'
2
+ require_relative 'parse_date_job'
3
+ require_relative 'date_diff_job'
@@ -0,0 +1,39 @@
1
+ module Remi
2
+ module Settings
3
+ extend self
4
+
5
+ def work_dir
6
+ @work_dir ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname('',nil))
7
+ end
8
+
9
+ def work_dir=(arg)
10
+ @work_dir = arg
11
+ end
12
+
13
+ def log_level
14
+ @log_level ||= Logger::INFO
15
+ end
16
+
17
+ def log_level=(arg)
18
+ @log_level = arg
19
+ end
20
+
21
+ def logger
22
+ return @logger.call if @logger.respond_to? :call
23
+ @logger ||= lambda do
24
+ l = Logger.new(STDOUT)
25
+ l.level = log_level
26
+ l.formatter = proc do |severity, datetime, progname, msg|
27
+ "#{msg}\n"
28
+ end
29
+ l
30
+ end
31
+
32
+ @logger.call
33
+ end
34
+
35
+ def logger=(arg)
36
+ @logger = arg
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,265 @@
1
+ module Remi
2
+ module SfBulkHelper
3
+
4
+ class DupeLookupKeyError < StandardError; end
5
+ class MaxAttemptError < StandardError; end
6
+
7
+ # Public: Class used to execute SF Bulk operations. This class is not meant to be
8
+ # used directly. It is instead meant to be inherited by classes that perform the
9
+ # specific query, update, create, or upsert operations.
10
+ #
11
+ # Examples
12
+ #
13
+ # sf_query = SfBulkQuery.query(client, 'Contact', 'SELECT Id, Name FROM Contact')
14
+ # puts sf_query.result
15
+ #
16
+ # mydata = [ { 'Id' => '001G000000ncxb8IAA', 'Name' => 'Happy Feet' } ]
17
+ # sf_update = SfBulkUpdate.update(client, 'Contact', mydata)
18
+ class SfBulkOperation
19
+
20
+ # Public: Initializes a SfBulkOperation (does not execute operation).
21
+ #
22
+ # restforce_client - An instance of Restforce that is used to authenticate the connection.
23
+ # object - The name of the object to operate on (e.g., Contact, Task, etc).
24
+ # data - For query operations, this is the SOQL query string. For other
25
+ # operations, this is an array of hashes, where the hash keys are column names
26
+ # and the values are the data.
27
+ # batch_size - Batch size to use to download or upload data (default: 10000)
28
+ # max_mattempts - The maximum number of attempts to upload data (default: 2)
29
+ # logger - Logger to use (default: Logger.new(STDOUT)
30
+ def initialize(restforce_client, object, data, batch_size: 5000, max_attempts: 2, logger: Logger.new(STDOUT))
31
+ @restforce_client = restforce_client
32
+ @object = object
33
+ @data = data
34
+ @batch_size = batch_size
35
+ @max_attempts = max_attempts
36
+ @attempts = Hash.new(0)
37
+ @logger = logger
38
+ end
39
+
40
+ # Public: A symbol representing the operation to be performed (:query, :update, :create, :upsert)
41
+ def operation
42
+ :undefined
43
+ end
44
+
45
+ # Public: Returns the instance of SalesforceBulkApi::Api used for bulk operations.
46
+ def sf_bulk
47
+ @sf_bulk ||= SalesforceBulkApi::Api.new(@restforce_client).tap { |o| o.connection.set_status_throttle(5) }
48
+ end
49
+
50
+ # Public: Returns the raw result from the SalesforceBulkApi query
51
+ def raw_result
52
+ @raw_result || execute
53
+ end
54
+
55
+ # Public: Returns useful metadata about the batch query.
56
+ def info
57
+ execute if @attempts[:total] == 0
58
+
59
+ return @info if @info and @attempts[:info] == @attempts[:total]
60
+ @attempts[:info] += 1
61
+
62
+ @info = raw_result.reject { |k,v| k == 'batches' }.tap do |h|
63
+ h['query'] = @data if operation == :query
64
+ end
65
+ end
66
+
67
+ # Public: Collects the results from all of the batches and aggregates them
68
+ # into an array of hashes. Each element of the array represents a record in the
69
+ # result and the hash gives the column-value. Note that if multiple retries are
70
+ # needed, this is just the final result.
71
+ #
72
+ # Returns an array of hashes.
73
+ def result
74
+ execute if @attempts[:total] == 0
75
+
76
+ return @result if @result and @attempts[:result] == @attempts[:total]
77
+ @attempts[:result] += 1
78
+
79
+ @result = []
80
+ raw_result['batches'].each do |batch|
81
+ next unless batch['response']
82
+
83
+ batch['response'].each do |record|
84
+ @result << record.inject({}) { |h, (k,v)| h[k] = v.first unless ['xsi:type','type'].include? k; h }
85
+ end
86
+
87
+ # delete raw result at end of processing to free memory
88
+ batch['response'] = nil
89
+ end
90
+
91
+ @result
92
+ end
93
+
94
+ # Public: Converts the result into a hash that can be used to
95
+ # lookup the row for a given key (e.g., external id field).
96
+ #
97
+ # key - A string representing the name of the column to be used as the lookup key.
98
+ # duplicates - Indicates whether duplicate keys are allowed. If they are allowed,
99
+ # only the first row found will be retained. If duplicates are not allowed,
100
+ # an error is raised (default: false).
101
+ #
102
+ # Returns a hash.
103
+ def as_lookup(key:, duplicates: false)
104
+ execute if @attempts[:total] == 0
105
+
106
+ @as_lookup ||= {}
107
+ @attempts[:as_lookup] = Hash.new(0) if @attempts[:as_lookup] == 0
108
+
109
+ return @as_lookup[key] if @as_lookup[key] and @attempts[:as_lookup][key] == @attempts[:total]
110
+ @attempts[:as_lookup][key] += 1
111
+
112
+ @as_lookup[key] = result.inject({}) do |lkp,row|
113
+ raise DupeLookupKeyError, "Duplicate key: #{row[key]} found in result of query: #{@data}" if lkp.has_key?(row[key]) and not duplicates
114
+ lkp[row[key]] = row unless lkp.has_key?(row[key])
115
+ lkp
116
+ end
117
+ end
118
+
119
+
120
+ # Public: Returns true if any of the records failed to update.
121
+ def failed_records?
122
+ n_failed_records = result.reduce(0) do |count, row|
123
+ count += 1 if row['success'] != 'true'
124
+ count
125
+ end
126
+
127
+ n_failed_batches = raw_result['batches'].reduce(0) do |count, batch|
128
+ count += 1 if batch['state'].first != 'Completed'
129
+ count
130
+ end
131
+
132
+ n_failed_records > 0 || n_failed_batches > 0
133
+ end
134
+
135
+
136
+ private
137
+
138
+ # Private: Sends the operation to Salesforce using the bulk API.
139
+ def send_bulk_operation
140
+ raise "No SF bulk operation defined for #{operation}"
141
+ end
142
+
143
+ # Private: Executes the operation and retries if needed.
144
+ def execute
145
+ @attempts[:total] += 1
146
+ @logger.info "Executing Salesforce Bulk operation: #{operation}"
147
+
148
+ @raw_result = send_bulk_operation
149
+ @logger.info "Bulk operation response: "
150
+ JSON.pretty_generate(info).split("\n").each { |l| @logger.info l }
151
+
152
+ retry_failed if failed_records?
153
+
154
+ @logger.info JSON.pretty_generate(info)
155
+ @raw_result
156
+ end
157
+
158
+ # Private: Drops any data that has already been loaded to salesforce.
159
+ # Note that this doesn't work for created data since the initial data
160
+ # wont have a salesforce id. Sometimes batches can fail completely
161
+ # and won't give anything in the result set. Therefore, the only way
162
+ # to be able to drop data that's already been created would be to
163
+ # know how the data was split into batches and the gem we use does not
164
+ # make this simple. So for now, we live with the defect.
165
+ def drop_successfully_updated_data
166
+ lkp_result_by_id = as_lookup(key: 'id', duplicates: true)
167
+ @data.reject! do |row|
168
+ sf_bulk_result = lkp_result_by_id[row['Id'] || row[:Id]]
169
+ sf_bulk_result && (sf_bulk_result['success'] == 'true')
170
+ end
171
+
172
+ nil
173
+ end
174
+
175
+
176
+ # Private: Selects data needed to be retried and re-executes the operation.
177
+ def retry_failed
178
+ raise MaxAttemptError if @attempts[:total] >= @max_attempts
179
+ @logger.warn "Retrying #{operation} - #{@attempts[:total]} of #{@max_attempts}"
180
+
181
+ drop_successfully_updated_data
182
+
183
+ execute
184
+ end
185
+ end
186
+
187
+
188
+ # Public: Class used to execute SF Bulk Update operations (see SfBulkOperation class for
189
+ # more details).
190
+ class SfBulkUpdate < SfBulkOperation
191
+ def self.update(*args,**kargs)
192
+ SfBulkUpdate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
193
+ end
194
+
195
+ def operation
196
+ :update
197
+ end
198
+
199
+ private
200
+
201
+ def send_bulk_operation
202
+ sf_bulk.send(operation, @object, @data, true, false, [], @batch_size)
203
+ end
204
+ end
205
+
206
+ # Public: Class used to execute SF Bulk Create operations (see SfBulkOperation class for
207
+ # more details).
208
+ class SfBulkCreate < SfBulkOperation
209
+ def self.create(*args,**kargs)
210
+ SfBulkCreate.new(*args,**kargs).tap { |sf| sf.send(:execute) }
211
+ end
212
+
213
+ def operation
214
+ :create
215
+ end
216
+
217
+ private
218
+
219
+ def send_bulk_operation
220
+ sf_bulk.send(operation, @object, @data, true, false, @batch_size)
221
+ end
222
+ end
223
+
224
+ # Public: Class used to execute SF Bulk Upsert operations (see SfBulkOperation class for
225
+ # more details).
226
+ class SfBulkUpsert < SfBulkOperation
227
+ def self.upsert(*args,**kargs)
228
+ SfBulkUpsert.new(*args,**kargs).tap { |sf| sf.send(:execute) }
229
+ end
230
+
231
+ def operation
232
+ :upsert
233
+ end
234
+
235
+ private
236
+
237
+ def send_bulk_operation
238
+ # Upsert does not support external id right now
239
+ sf_bulk.send(operation, @object, @data, 'Id', true, false, [], @batch_size)
240
+ end
241
+ end
242
+
243
+ # Public: Class used to execute SF Bulk Query operations (see SfBulkOperation class for
244
+ # more details).
245
+ class SfBulkQuery < SfBulkOperation
246
+ def self.query(*args,**kargs)
247
+ SfBulkQuery.new(*args,**kargs).tap { |sf| sf.send(:execute) }
248
+ end
249
+
250
+ def operation
251
+ :query
252
+ end
253
+
254
+ def failed_records?
255
+ false
256
+ end
257
+
258
+ private
259
+
260
+ def send_bulk_operation
261
+ sf_bulk.send(operation, @object, @data, @batch_size)
262
+ end
263
+ end
264
+ end
265
+ end