salesforce_bulk_query-edge 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/.travis.yml +26 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +168 -0
- data/Rakefile +20 -0
- data/env_setup-example.sh +13 -0
- data/lib/salesforce_bulk_query.rb +108 -0
- data/lib/salesforce_bulk_query/batch.rb +153 -0
- data/lib/salesforce_bulk_query/connection.rb +140 -0
- data/lib/salesforce_bulk_query/job.rb +199 -0
- data/lib/salesforce_bulk_query/logger.rb +44 -0
- data/lib/salesforce_bulk_query/query.rb +192 -0
- data/lib/salesforce_bulk_query/utils.rb +16 -0
- data/lib/salesforce_bulk_query/version.rb +3 -0
- data/new-version.sh +22 -0
- data/salesforce_bulk_query.gemspec +34 -0
- data/spec/salesforce_bulk_query_spec.rb +227 -0
- data/spec/spec_helper.rb +9 -0
- metadata +207 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2a22424006fca0ee3da13b2edea99a629199d708
|
4
|
+
data.tar.gz: 561cf3fe12a4a240e70c5140dae51b0d72d86617
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bda06a6eef7746b4ba76679057ca36d2232de8e0c6158c7de7af3b852bb0a9af255e45d9dae2f6fc4a93e32d65646f932e516461462652f1749854480778c2b8
|
7
|
+
data.tar.gz: ef399db80fa4dc12ace6ccb135fcffc7e83524efeed5d6b11fbe8580826bcbc485cd2201f96d0798f36bad11f113f07498ba0ccf87a8fee8374f501e576b49b5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
language: ruby
|
2
|
+
sudo: false
|
3
|
+
os:
|
4
|
+
- linux
|
5
|
+
- osx
|
6
|
+
branches:
|
7
|
+
except:
|
8
|
+
- gh-pages
|
9
|
+
- next_doc_release
|
10
|
+
env:
|
11
|
+
global:
|
12
|
+
- LOGGING=true
|
13
|
+
- secure: EVku6JcQcp69DksaZUo8Sluk2Mk45CHjs/7w2xDG6n58Px3zxG3i0DSxm5SYBaDvcIQCv+LLt/LyjTRs5t51GbQrFLZGxGLMHDkJ/eS4K2q27P8NGqM/Yj4WtxB7BYQNLQf00eCo9HeTyjY7AsAqZG8wn6W9DUF5Gtu7AUO0aiQ=
|
14
|
+
- secure: GwO5mPCUhaeC1fHuX86T3INRrrpqb2raWS+M83Ko4fnwKUryGiMOVYtzu5NSiTfN2YRHd3gvAsisempqCHndTPvuWcJhSCIGEZdIYgdxXAz4Meb78t0w/5rY7O9BOJ5+Zin098SmB/uiPvW/HDSQw+T+bBHO3ETw+h8VvFDmHM4=
|
15
|
+
- secure: TbguXDUV3wZhZ9ZHGYP2X0zo8gJejeuawmHQ6xey5gZzYrAH92r6wgddQ7KM2Vc+UCw0Gpnuy2AKM6EhEGbRShc5u6LmwAJIqZ6iG48ALOxha6JOoICuSbahRmnNSVOP1utzAXjOB20YO8lao5PGNfqyacFnr3VhqG22Kl5EJhc=
|
16
|
+
- secure: a2wb07pS1Cl6aiKN0tSVjW4Zsc0K0yQ3mI+nrR3KQpF1vaMu43gKYRGLz3NDWcaPrIgKfFyeKjucxd8o52dLn5Mi8OW7hpPWy3d9Y9umx0PpTYHl16tOw2PLpWedqGTM/eZn5Y8UtzrsAdVJKIcCXE7SA28qGmaR5KKuaJjC+3w=
|
17
|
+
- secure: JvH3Nr6N9GbSWDNc8vE0cwDZPQcUgwXB2MjtkPZ/fnfrR/x/cNmCyc2lzZqfExmDNn17YbR2fPP8zKBczEDvY8q03v42LfnpDSCR2F73yJzQe9dxHwACroAT8VgAz3o23ejgry31SzZz+IoUvZ2fzuoZs+uAt1yr48I2V03PqfM=
|
18
|
+
rvm:
|
19
|
+
- 1.9.3
|
20
|
+
- jruby-19mode
|
21
|
+
- 2.1
|
22
|
+
- 2.2
|
23
|
+
before_install:
|
24
|
+
- gem update --system
|
25
|
+
- gem update bundler
|
26
|
+
script: rake ci
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(BSD License)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Yatish Mehta & GoodData Corporation. All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided
|
6
|
+
that the following conditions are met:
|
7
|
+
|
8
|
+
* Redistributions of source code must retain the above copyright notice, this list of conditions and
|
9
|
+
the following disclaimer.
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions
|
11
|
+
and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
12
|
+
* Neither the name of the GoodData Corporation nor the names of its contributors may be used to endorse
|
13
|
+
or promote products derived from this software without specific prior written permission.
|
14
|
+
|
15
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
16
|
+
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
17
|
+
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
18
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
19
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
20
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
21
|
+
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
22
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
Salesforce Bulk Query
|
2
|
+
=====================
|
3
|
+
A library for downloading data from Salesforce Bulk API. We only focus on querying, other operations of the API aren't supported. Designed to handle a lot of data.
|
4
|
+
|
5
|
+
Derived from [Salesforce Bulk API](https://github.com/yatish27/salesforce_bulk_api)
|
6
|
+
|
7
|
+
## Status
|
8
|
+
|
9
|
+
[![Gem Version](https://badge.fury.io/rb/salesforce_bulk_query.png)](http://badge.fury.io/rb/salesforce_bulk_query)
|
10
|
+
[![Downloads](http://img.shields.io/gem/dt/salesforce_bulk_query.svg)](http://rubygems.org/gems/salesforce_bulk_query)
|
11
|
+
[![Dependency Status](https://gemnasium.com/cvengros/salesforce_bulk_query.png)](https://gemnasium.com/cvengros/salesforce_bulk_query)
|
12
|
+
[![Code Climate](https://codeclimate.com/github/cvengros/salesforce_bulk_query.png)](https://codeclimate.com/github/cvengros/salesforce_bulk_query)
|
13
|
+
[![Build Status](https://travis-ci.org/cvengros/salesforce_bulk_query.png)](https://travis-ci.org/cvengros/salesforce_bulk_query)
|
14
|
+
[![Coverage Status](https://coveralls.io/repos/cvengros/salesforce_bulk_query/badge.png)](https://coveralls.io/r/cvengros/salesforce_bulk_query)
|
15
|
+
|
16
|
+
## Basic Usage
|
17
|
+
To install, run:
|
18
|
+
|
19
|
+
gem install salesforce_bulk_query
|
20
|
+
|
21
|
+
or add
|
22
|
+
|
23
|
+
gem salesforce_bulk_query
|
24
|
+
|
25
|
+
to your Gemfile.
|
26
|
+
|
27
|
+
Before using the library, make sure you have the right account in your Salesforce organization that has access to API and that you won't run out of the [API limits](http://www.salesforce.com/us/developer/docs/api_asynchpre/Content/asynch_api_concepts_limits.htm#batch_proc_time_title).
|
28
|
+
|
29
|
+
You will also need a Salesforce connected app for the `client_id` and `client_sercret` params, see [the guide](https://help.salesforce.com/HTViewHelpDoc?id=connected_app_create.htm&language=en_US). The app needs to have OAuth settings enabled (even if you plan to use just username-password-token authentication). The required permissions are _Access and manage your data (api)_, _Perform requests on your behalf at any time (refresh token, offline access)_. The other parameters such as redirect url don't need to be set.
|
30
|
+
|
31
|
+
For doing most of the API calls, the library uses [Restforce](https://github.com/ejholmes/restforce) Code example:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
require 'restforce'
|
35
|
+
require 'salesforce_bulk_query'
|
36
|
+
|
37
|
+
# Create a restforce client instance
|
38
|
+
# with basic auth
|
39
|
+
restforce = Restforce.new(
|
40
|
+
:username => 'me',
|
41
|
+
:password => 'password',
|
42
|
+
:security_token => 'token',
|
43
|
+
:client_id => "my sfdc app client id",
|
44
|
+
:client_secret => "my sfdc app client secret"
|
45
|
+
)
|
46
|
+
|
47
|
+
# or OAuth
|
48
|
+
restforce = Restforce.new(
|
49
|
+
:refresh_token => "xyz",
|
50
|
+
:client_id => "my sfdc app client id",
|
51
|
+
:client_secret => "my sfdc app client secret"
|
52
|
+
)
|
53
|
+
|
54
|
+
bulk_api = SalesforceBulkQuery::Api.new(restforce)
|
55
|
+
|
56
|
+
# query the api
|
57
|
+
result = bulk_api.query("Task", "SELECT Id, Name FROM Task")
|
58
|
+
|
59
|
+
# the result is files
|
60
|
+
puts "All the downloaded stuff is in csvs: #{result[:filenames]}"
|
61
|
+
|
62
|
+
# query is a blocking call and can take several hours
|
63
|
+
# if you want to just start the query asynchronously, use
|
64
|
+
query = start_query("Task", "SELECT Id, Name FROM Task")
|
65
|
+
|
66
|
+
# get a coffee
|
67
|
+
sleep(1234)
|
68
|
+
|
69
|
+
# get what's available and check the status
|
70
|
+
results = query.get_available_results
|
71
|
+
if results[:succeeded]
|
72
|
+
puts "All the downloaded stuff is in csvs: #{results[:filenames]}"
|
73
|
+
else
|
74
|
+
puts "This is going to take a while, get another coffee"
|
75
|
+
end
|
76
|
+
```
|
77
|
+
|
78
|
+
## How it works
|
79
|
+
|
80
|
+
The library uses the [Salesforce Bulk API](https://www.salesforce.com/us/developer/docs/api_asynch/index_Left.htm#CSHID=asynch_api_bulk_query.htm|StartTopic=Content%2Fasynch_api_bulk_query.htm|SkinName=webhelp). The given query is divided into 15 subqueries, according to the [limits](http://www.salesforce.com/us/developer/docs/api_asynchpre/Content/asynch_api_concepts_limits.htm#batch_proc_time_title). Each subquery is an interval based on the CreatedDate Salesforce field (date field can be customized). The limits are passed to the API in SOQL queries. Subqueries are sent to the API as batches and added to a job.
|
81
|
+
|
82
|
+
The first interval starts with the date the first Salesforce object was created, we query Salesforce REST API for that. If this query times out, we use a constant. The last interval ends a few minutes before now to avoid consistency issues. Custom start and end can be passed - see Options.
|
83
|
+
|
84
|
+
Job has a fixed time limit to process all the subqueries. Batches that finish in time are downloaded to CSVs, batches that don't are divided to 15 subqueries each and added to new jobs.
|
85
|
+
|
86
|
+
CSV results are downloaded by chunks, so that we don't run into memory related issues. All other requests are made through the Restforce client that is passed when instantiating the Api class. Restforce is not in the dependencies, so theoretically you can pass another object with the same set of methods as Restforce client.
|
87
|
+
|
88
|
+
## Options
|
89
|
+
There are a few optional settings you can pass to the `Api` methods:
|
90
|
+
* `api_version`: Which Salesforce api version should be used
|
91
|
+
* `logger`: Where logs should go, see the example.
|
92
|
+
* `filename_prefix`: Prefix applied to csv files.
|
93
|
+
* `directory_path`: Custom direcotory path for CSVs, if omitted, a new temp directory is created.
|
94
|
+
* `check_interval`: How often the results should be checked in secs.
|
95
|
+
* `time_limit`: Maximum time the query can take. If this time limit is exceeded, available results are downloaded and the list of subqueries that didn't finished is returned. In seconds. The limti should be understood as limit for waiting. When the limit is reached the function downloads data that is ready which can take some additonal time. If no limit is given the query runs until it finishes.
|
96
|
+
* `date_field`: Salesforce date field that will be used for splitting the query, and optionally limiting the results. Must be a date field on the queried sobject. Default is `CreatedDate`
|
97
|
+
* `date_from`, `date_to`: limits for the `date_field` (`CreatedDate` by default). Note that queries can't contain any WHERE statements as we're doing some manipulations to create subqueries and we don't want things to get too difficult. So this is the way to limit the query yourself. The format is like `"1999-01-01T00:00:00.000Z"`
|
98
|
+
* `single_batch`: If true, the queries are not divided into subqueries as described above. Instead one batch job is created with the given query. This is faster for small amount of data, but will fail with a timeout if you have a lot of data.
|
99
|
+
|
100
|
+
See specs for exact usage.
|
101
|
+
|
102
|
+
## Logging
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
require 'logger'
|
106
|
+
require 'restforce'
|
107
|
+
|
108
|
+
# create the restforce client
|
109
|
+
restforce = Restforce.new(...)
|
110
|
+
|
111
|
+
# instantiate a logger and pass it to the Api constructor
|
112
|
+
logger = Logger.new(STDOUT)
|
113
|
+
bulk_api = SalesforceBulkQuery::Api.new(restforce, :logger => logger)
|
114
|
+
|
115
|
+
# switch off logging in Restforce so you don't get every message twice
|
116
|
+
Restforce.log = false
|
117
|
+
```
|
118
|
+
|
119
|
+
If you're using Restforce as a client (which you probably are) and you want to do logging, Salesforce Bulk Query will use a custom logging middleware for Restforce. This is because the original logging middleware puts all API responses to log, which is not something you would like to do for a few gigabytes CSVs. When you use the :logger parameter it's recommended you switch off the default logging in Restforce, otherwise you'll get all messages twice.
|
120
|
+
|
121
|
+
## Notes
|
122
|
+
|
123
|
+
Query (user given) -> Job (Salesforce construct that encapsulates 15 batches) -> Batch (1 SOQL with Date constraints)
|
124
|
+
|
125
|
+
At the beginning the query is divided into 15 subqueries and put into a single job. When one of the subqueries fails, a new job with 15 subqueries is created, the range of the failed query is divided into 15 sub-subqueries.
|
126
|
+
|
127
|
+
## Change policy
|
128
|
+
|
129
|
+
The gem is trying to follow [semantic versioning](http://semver.org/). All methods and options described in this readme document are considered public API. If any of these change, at least the minor version is bumped. Methods and their params not described in this document can change even in patches.
|
130
|
+
|
131
|
+
## Running tests locally
|
132
|
+
Travis CI is set up for this repository to make sure all the tests are passing with each commit.
|
133
|
+
|
134
|
+
To run the tests locally:
|
135
|
+
|
136
|
+
* Copy the env_setup-example.sh file
|
137
|
+
```
|
138
|
+
cp env_setup-example.sh env_setup.sh
|
139
|
+
```
|
140
|
+
* Setup all the params in env_setup. USERNAME, PASSWORD and TOKEN are your salesforce account credentials. You can get those by [registering for a free developer account](https://developer.salesforce.com/signup). You might need to [reset your security token](https://help.salesforce.com/apex/HTViewHelpDoc?id=user_security_token.htm) to put it to TOKEN variable. CLIENT_ID and CLIENT_SECRET belong to your Salesforce connected app. You can create one by following the steps outlined in [the tutorial](https://help.salesforce.com/apex/HTViewHelpDoc?id=connected_app_create.htm). Make sure you check the 'api' permission.
|
141
|
+
* Run the env_setup
|
142
|
+
```
|
143
|
+
. env_setup.sh
|
144
|
+
```
|
145
|
+
* Run the tests
|
146
|
+
```
|
147
|
+
bundle exec rspec
|
148
|
+
```
|
149
|
+
|
150
|
+
Note that env_setup.sh is ignored from git in .gitignore so that you don't commit your credentials by accident.
|
151
|
+
|
152
|
+
## Contributing
|
153
|
+
|
154
|
+
1. Fork it ( https://github.com/[my-github-username]/salesforce_bulk_query/fork )
|
155
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
156
|
+
3. Run the tests (see above), fix if they fail.
|
157
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
158
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
159
|
+
5. Create a new Pull Request
|
160
|
+
|
161
|
+
Make sure you run all the tests and they pass. If you create a new feature, write a test for it.
|
162
|
+
|
163
|
+
## Copyright
|
164
|
+
|
165
|
+
Copyright (c) 2014 Yatish Mehta & GoodData Corporation. See [LICENSE](LICENSE) for details.
|
166
|
+
|
167
|
+
|
168
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'coveralls/rake/task'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
Coveralls::RakeTask.new
|
6
|
+
|
7
|
+
task default: %w[ci]
|
8
|
+
|
9
|
+
desc 'Run continuous integration test'
|
10
|
+
task :ci do
|
11
|
+
Rake::Task['test:unit'].invoke
|
12
|
+
Rake::Task['coveralls:push'].invoke
|
13
|
+
end
|
14
|
+
|
15
|
+
namespace :test do
|
16
|
+
desc "Run unit tests"
|
17
|
+
RSpec::Core::RakeTask.new(:unit) do |t|
|
18
|
+
t.pattern = 'spec/**/*.rb'
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
require 'salesforce_bulk_query/connection'
|
4
|
+
require 'salesforce_bulk_query/query'
|
5
|
+
require 'salesforce_bulk_query/logger'
|
6
|
+
require 'salesforce_bulk_query/utils'
|
7
|
+
require 'salesforce_bulk_query/version'
|
8
|
+
|
9
|
+
|
10
|
+
# Module where all the stuff is happening
|
11
|
+
module SalesforceBulkQuery
|
12
|
+
|
13
|
+
# Abstracts the whole library, class the user interacts with
|
14
|
+
class Api
|
15
|
+
@@DEFAULT_API_VERSION = '29.0'
|
16
|
+
|
17
|
+
Encoding.default_external = Encoding::UTF_8
|
18
|
+
Encoding.default_internal = Encoding::UTF_8
|
19
|
+
|
20
|
+
# Constructor
|
21
|
+
# @param client [Restforce] An instance of the Restforce client, that is used internally to access Salesforce api
|
22
|
+
# @param options
|
23
|
+
def initialize(client, options={})
|
24
|
+
@logger = options[:logger]
|
25
|
+
|
26
|
+
api_version = options[:api_version] || @@DEFAULT_API_VERSION
|
27
|
+
ssl_version = options[:ssl_version]
|
28
|
+
# use our own logging middleware if logger passed
|
29
|
+
if @logger && client.respond_to?(:middleware)
|
30
|
+
client.middleware.use(SalesforceBulkQuery::Logger, @logger, options)
|
31
|
+
end
|
32
|
+
|
33
|
+
# initialize connection
|
34
|
+
@connection = SalesforceBulkQuery::Connection.new(client, api_version, @logger, options[:filename_prefix],ssl_version)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the Salesforce instance URL
|
38
|
+
def instance_url
|
39
|
+
# make sure it ends with /
|
40
|
+
url = @connection.client.instance_url
|
41
|
+
url += '/' if url[-1] != '/'
|
42
|
+
return url
|
43
|
+
end
|
44
|
+
|
45
|
+
CHECK_INTERVAL = 30
|
46
|
+
|
47
|
+
# Query the Salesforce API. It's a blocking method - waits until the query is resolved
|
48
|
+
# can take quite some time
|
49
|
+
# @param sobject Salesforce object, e.g. "Opportunity"
|
50
|
+
# @param soql SOQL query, e.g. "SELECT Name FROM Opportunity"
|
51
|
+
# @return hash with :filenames and other useful stuff
|
52
|
+
def query(sobject, soql, options={})
|
53
|
+
@logger.info "Running query: #{soql}. Gem version salesforce_bulk_query: #{SalesforceBulkQuery::VERSION}" if @logger
|
54
|
+
check_interval = options[:check_interval] || CHECK_INTERVAL
|
55
|
+
time_limit = options[:time_limit] # in seconds
|
56
|
+
|
57
|
+
start_time = Time.now
|
58
|
+
|
59
|
+
# start the machinery
|
60
|
+
query = start_query(sobject, soql, options)
|
61
|
+
results = nil
|
62
|
+
|
63
|
+
loop do
|
64
|
+
# get available results and check the status
|
65
|
+
results = query.get_available_results(options)
|
66
|
+
@logger.debug "get_available_results: #{results}"
|
67
|
+
|
68
|
+
# if finished get the result and we're done
|
69
|
+
if results[:succeeded]
|
70
|
+
|
71
|
+
# we're done
|
72
|
+
@logger.info "Query succeeded. Results: #{results}" if @logger
|
73
|
+
break
|
74
|
+
end
|
75
|
+
|
76
|
+
# if we've run out of time limit, go away
|
77
|
+
if time_limit && (Time.now - start_time > time_limit)
|
78
|
+
@logger.warn "Ran out of time limit, downloading what's available and terminating" if @logger
|
79
|
+
|
80
|
+
@logger.info "Downloaded the following files: #{results[:filenames]} The following didn't finish in time: #{results[:unfinished_subqueries]}." if @logger
|
81
|
+
break
|
82
|
+
end
|
83
|
+
|
84
|
+
@logger.info "Sleeping #{check_interval}" if @logger
|
85
|
+
@logger.info "Downloaded files: #{results[:filenames].length} Jobs in progress: #{query.jobs_in_progress.length}"
|
86
|
+
sleep(check_interval)
|
87
|
+
end
|
88
|
+
|
89
|
+
# nice list of files to log
|
90
|
+
if @logger && ! results[:filenames].empty?
|
91
|
+
|
92
|
+
@logger.info "Download finished. Downloaded files in #{File.dirname(results[:filenames][0])}. Filename size [line count]:"
|
93
|
+
@logger.info "\n" + results[:filenames].sort.map{|f| "#{File.basename(f)} #{File.size(f)} #{Utils.line_count(f) if options[:count_lines]}"}.join("\n")
|
94
|
+
end
|
95
|
+
return results
|
96
|
+
end
|
97
|
+
|
98
|
+
# Start the query (synchronous method)
|
99
|
+
# @params see #query
|
100
|
+
# @return Query instance with the running query
|
101
|
+
def start_query(sobject, soql, options={})
|
102
|
+
# create the query, start it and return it
|
103
|
+
query = SalesforceBulkQuery::Query.new(sobject, soql, @connection, {:logger => @logger}.merge(options))
|
104
|
+
query.start(options)
|
105
|
+
return query
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
|
3
|
+
require 'salesforce_bulk_query/utils'
|
4
|
+
|
5
|
+
|
6
|
+
module SalesforceBulkQuery
|
7
|
+
# Represents a Salesforce api batch. Batch contains a single subquery.
|
8
|
+
# Many batches are contained in a Job.
|
9
|
+
class Batch
|
10
|
+
def initialize(options)
|
11
|
+
@sobject = options[:sobject]
|
12
|
+
@soql = options[:soql]
|
13
|
+
@job_id = options[:job_id]
|
14
|
+
@connection = options[:connection]
|
15
|
+
@date_field = options[:date_field] or fail "date_field must be given when creating a batch"
|
16
|
+
@start = options[:start]
|
17
|
+
@stop = options[:stop]
|
18
|
+
@logger = options[:logger]
|
19
|
+
@@directory_path ||= Dir.mktmpdir
|
20
|
+
@filename = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :soql, :start, :stop, :filename, :fail_message, :batch_id, :csv_record_count
|
24
|
+
|
25
|
+
# Do the api request
|
26
|
+
def create
|
27
|
+
path = "job/#{@job_id}/batch/"
|
28
|
+
|
29
|
+
response_parsed = @connection.post_xml(path, @soql, {:csv_content_type => true})
|
30
|
+
|
31
|
+
@batch_id = response_parsed['id'][0]
|
32
|
+
end
|
33
|
+
|
34
|
+
# check status of the batch
|
35
|
+
# if it fails, don't throw an error now, let the job above collect all fails and raise it at once
|
36
|
+
def check_status
|
37
|
+
succeeded = nil
|
38
|
+
failed = nil
|
39
|
+
|
40
|
+
# get the status of the batch
|
41
|
+
# https://www.salesforce.com/us/developer/docs/api_asynch/Content/asynch_api_batches_get_info.htm
|
42
|
+
status_path = "job/#{@job_id}/batch/#{@batch_id}"
|
43
|
+
status_response = @connection.get_xml(status_path)
|
44
|
+
|
45
|
+
# interpret the status
|
46
|
+
@status = status_response['state'][0]
|
47
|
+
|
48
|
+
# https://www.salesforce.com/us/developer/docs/api_asynch/Content/asynch_api_batches_interpret_status.htm
|
49
|
+
case @status
|
50
|
+
when 'Failed'
|
51
|
+
failed = true
|
52
|
+
@fail_message = status_response['stateMessage']
|
53
|
+
when 'InProgress', 'Queued'
|
54
|
+
succeeded = false
|
55
|
+
when 'Completed'
|
56
|
+
succeeded = true
|
57
|
+
failed = false
|
58
|
+
else
|
59
|
+
fail "Something weird happened, #{@batch_id} has status #{@status}."
|
60
|
+
end
|
61
|
+
|
62
|
+
if succeeded
|
63
|
+
# request to get the result id
|
64
|
+
# https://www.salesforce.com/us/developer/docs/api_asynch/Content/asynch_api_batches_get_results.htm
|
65
|
+
path = "job/#{@job_id}/batch/#{@batch_id}/result"
|
66
|
+
|
67
|
+
response_parsed = @connection.get_xml(path)
|
68
|
+
|
69
|
+
@result_id = response_parsed["result"] ? response_parsed["result"][0] : nil
|
70
|
+
end
|
71
|
+
|
72
|
+
return {
|
73
|
+
:failed => failed,
|
74
|
+
:fail_message => @fail_message,
|
75
|
+
:succeeded => succeeded,
|
76
|
+
:result_id => @result_id
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_filename
|
81
|
+
return "#{@sobject}_#{@date_field}_#{@start}_#{@stop}_#{@batch_id}.csv"
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_result(options={})
|
85
|
+
# if it was already downloaded, no one should ask about it
|
86
|
+
if @filename
|
87
|
+
raise "This batch was already downloaded once: #{@filename}, #{@batch_id}"
|
88
|
+
end
|
89
|
+
|
90
|
+
directory_path = options[:directory_path]
|
91
|
+
skip_verification = options[:skip_verification]
|
92
|
+
|
93
|
+
# request to get the actual results
|
94
|
+
path = "job/#{@job_id}/batch/#{@batch_id}/result/#{@result_id}"
|
95
|
+
|
96
|
+
if !@result_id
|
97
|
+
raise "batch #{@batch_id} not finished yet, trying to get result: #{path}"
|
98
|
+
end
|
99
|
+
|
100
|
+
directory_path ||= @@directory_path
|
101
|
+
|
102
|
+
# write it to a file
|
103
|
+
@filename = File.join(directory_path, get_filename)
|
104
|
+
@connection.get_to_file(path, @filename)
|
105
|
+
|
106
|
+
# Verify the number of downloaded records is roughly the same as
|
107
|
+
# count on the soql api
|
108
|
+
# maybe also verify
|
109
|
+
@verification = true
|
110
|
+
unless skip_verification
|
111
|
+
verify
|
112
|
+
end
|
113
|
+
@logger.debug "get_result :verification : #{@verification}" if @logger
|
114
|
+
return {
|
115
|
+
:filename => @filename,
|
116
|
+
:verification => @verification
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
def verify
|
121
|
+
api_count = @connection.query_count(@sobject, @date_field, @start, @stop)
|
122
|
+
# if we weren't able to get the count, fail.
|
123
|
+
if api_count.nil?
|
124
|
+
return @verification = false
|
125
|
+
end
|
126
|
+
|
127
|
+
# count the records in the csv
|
128
|
+
@csv_record_count = Utils.line_count(@filename)
|
129
|
+
|
130
|
+
if @logger && @csv_record_count > 0 && @csv_record_count % 100 == 0
|
131
|
+
@logger.warn "The line count for batch id #{@batch_id} soql #{@soql} is highly suspicious: #{@csv_record_count}"
|
132
|
+
end
|
133
|
+
if @logger && (api_count - @csv_record_count).abs > 2
|
134
|
+
@logger.warn "The counts for batch id #{@batch_id}, soql #{@soql} don't match. Record count in downloaded csv #{@csv_record_count}, record count on api count(): #{api_count}"
|
135
|
+
@logger.info "verify result: #{(api_count - @csv_record_count).abs < 2}"
|
136
|
+
|
137
|
+
end
|
138
|
+
@verification = ((api_count - @csv_record_count).abs < 2)
|
139
|
+
end
|
140
|
+
|
141
|
+
def to_log
|
142
|
+
return {
|
143
|
+
:sobject => @sobject,
|
144
|
+
:soql => @soql,
|
145
|
+
:job_id => @job_id,
|
146
|
+
:connection => @connection.to_log,
|
147
|
+
:start => @start,
|
148
|
+
:stop => @stop,
|
149
|
+
:directory_path => @@directory_path
|
150
|
+
}
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|