brynq-sdk-salesforce 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq_sdk_salesforce
3
- Version: 2.0.0
3
+ Version: 2.0.2
4
4
  Summary: Salesforce wrapper from BrynQ
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -0,0 +1 @@
1
+ from .salesforce import Salesforce
@@ -0,0 +1,318 @@
1
+ from brynq_sdk_brynq import BrynQ
2
+ import urllib.parse
3
+ import warnings
4
+ import requests
5
+ import json
6
+ from typing import Union, List
7
+ import pandas as pd
8
+ import os
9
+
10
+
11
+ class Salesforce(BrynQ):
12
+ """
13
+ This class is meant to be a simple wrapper around the Salesforce API. In order to start using it, authorize your application is BrynQ.
14
+ You will receive a code which you can use to obtain a refresh token using the get_refresh_token method. Use this refresh token to refresh your access token always before you make a data call.
15
+ """
16
+ def __init__(self, label: Union[str, List], debug: bool = False, sandbox: bool = False):
17
+ super().__init__()
18
+ if sandbox:
19
+ self.system = 'salesforce-sandbox'
20
+ else:
21
+ self.system = 'salesforce'
22
+ self.credentials = self.get_system_credential(system=self.system, label=label)
23
+ self.credential_id = self.credentials['id']
24
+ self.customer_url = self.credentials['auth']['instance_url']
25
+ self.debug = debug
26
+ self.api_version = 56.0
27
+ self.timeout = 3600
28
+
29
+ def __get_headers(self) -> dict:
30
+ credentials = self.refresh_system_credential(system=self.system, system_id=self.credential_id)
31
+ headers = {"Authorization": f"Bearer {credentials['access_token']}",
32
+ "Content-Type": "application/json"}
33
+ if self.debug:
34
+ print(f"Headers: {headers}")
35
+
36
+ return headers
37
+
38
+ def query_data(self, query: str) -> pd.DataFrame:
39
+ """
40
+ This method is used to send raw queries to Salesforce.
41
+ :param query: Querystring. Something like: 'select+Name,Id+from+Account'
42
+ :return: data or error
43
+ """
44
+ params = {
45
+ "q": query
46
+ }
47
+ if self.debug:
48
+ print(f"Query: {query}")
49
+ params_str = urllib.parse.urlencode(params, safe=':+')
50
+ df = pd.DataFrame()
51
+ done = False
52
+ url = f"{self.customer_url}/services/data/v37.0/query/?"
53
+ while done is False:
54
+ response = requests.get(url=url, params=params_str, headers=self.__get_headers(), timeout=self.timeout)
55
+ response.raise_for_status()
56
+ response = response.json()
57
+ done = response['done']
58
+ if done is False:
59
+ url = f"{self.customer_url}{response['nextRecordsUrl']}"
60
+ df = pd.concat([df, pd.DataFrame(response['records'])])
61
+
62
+ return df
63
+
64
+ def get_data(self, fields: Union[str, List], object_name: str, filter: str = None) -> pd.DataFrame:
65
+ """
66
+ This method is used to send queries in a somewhat userfriendly wayt to Salesforce.
67
+ :param fields: fields you want to get
68
+ :param object_name: table or object name that the fields need to be retrieved from
69
+ :param filter: statement that evaluates to True or False
70
+ :return: data or error
71
+ """
72
+ fields = ",".join(fields) if isinstance(fields, List) else fields
73
+ params = {
74
+ "q": f"SELECT {fields} FROM {object_name}{' WHERE ' + filter if filter is not None else ''}"
75
+ }
76
+ if self.debug:
77
+ print(f"Query: {params['q']}")
78
+ params_str = urllib.parse.urlencode(params, safe=':+')
79
+ df = pd.DataFrame()
80
+ done = False
81
+ url = f"{self.customer_url}/services/data/v37.0/query/?"
82
+ while done is False:
83
+ response = requests.get(url=url, params=params_str, headers=self.__get_headers(), timeout=self.timeout)
84
+ response.raise_for_status()
85
+ response = response.json()
86
+ done = response['done']
87
+ if done is False:
88
+ url = f"{self.customer_url}{response['nextRecordsUrl']}"
89
+ df = pd.concat([df, pd.DataFrame(response['records'])])
90
+
91
+ return df
92
+
93
+ def create_contact(self, data: dict) -> json:
94
+ """
95
+ This method is used to send queries in a somewhat userfriendly wayt to Salesforce.
96
+ :param data: fields you want to update
97
+ :return: data or error
98
+ """
99
+ allowed_fields = {
100
+ 'salure_customer': 'Klant_van_Salure__c',
101
+ # 'full_name': 'Name',
102
+ 'first_name': 'FirstName',
103
+ 'last_name': 'LastName',
104
+ 'phone': 'Phone',
105
+ 'email': 'Email',
106
+ 'salesforce_account_id': 'AccountId',
107
+ 'organisation_person_id': 'AFAS_persoons_ID__C'
108
+ }
109
+ required_fields = []
110
+
111
+ self.__check_fields(data=data, required_fields=required_fields, allowed_fields=list(allowed_fields.keys()))
112
+
113
+ body = {}
114
+
115
+ # Add allowed fields to the body
116
+ for field in (allowed_fields.keys() & data.keys()):
117
+ body.update({allowed_fields[field]: data[field]})
118
+
119
+ body = json.dumps(body)
120
+ if self.debug:
121
+ print(f"Payload: {body}")
122
+
123
+ response = requests.post(url=f"{self.customer_url}/services/data/v37.0/sobjects/Contact", data=body, headers=self.__get_headers(), timeout=self.timeout)
124
+ response.raise_for_status()
125
+ if self.debug:
126
+ print(f"Response: {response.content, response.text}")
127
+
128
+ return response.json()
129
+
130
+ def update_contact(self, data: dict):
131
+ """
132
+ This method is used to send queries in a somewhat userfriendly way to Salesforce.
133
+ :param data: fields you want to update
134
+ :return: nothing is returned when update is successful, otherwise raises error
135
+ """
136
+ allowed_fields = {
137
+ 'salure_customer': 'Klant_van_Salure__c',
138
+ # 'full_name': 'Name',
139
+ 'first_name': 'FirstName',
140
+ 'last_name': 'LastName',
141
+ 'phone': 'Phone',
142
+ 'email': 'Email',
143
+ 'salesforce_account_id': 'AccountId',
144
+ 'organisation_person_id': 'AFAS_persoons_ID__C'
145
+ }
146
+ required_fields = ['contact_id']
147
+
148
+ self.__check_fields(data=data, required_fields=required_fields, allowed_fields=list(allowed_fields.keys()))
149
+
150
+ body = {}
151
+
152
+ # Add allowed fields to the body
153
+ for field in (allowed_fields.keys() & data.keys()):
154
+ body.update({allowed_fields[field]: data[field]})
155
+
156
+ body = json.dumps(body)
157
+ if self.debug:
158
+ print(f"Payload: {body}")
159
+
160
+ response = requests.patch(url=f"{self.customer_url}/services/data/v37.0/sobjects/Contact/{data['contact_id']}", data=body, headers=self.__get_headers(), timeout=self.timeout)
161
+ response.raise_for_status()
162
+ if self.debug:
163
+ print(f"Response: {response.content, response.text}")
164
+
165
+ @staticmethod
166
+ def __check_fields(data: Union[dict, List], required_fields: List, allowed_fields: List):
167
+ if isinstance(data, dict):
168
+ data = data.keys()
169
+
170
+ for field in data:
171
+ if field not in allowed_fields and field not in required_fields:
172
+ warnings.warn('Field {field} is not implemented. Optional fields are: {allowed_fields}'.format(field=field, allowed_fields=tuple(allowed_fields)))
173
+
174
+ for field in required_fields:
175
+ if field not in data:
176
+ raise ValueError('Field {field} is required. Required fields are: {required_fields}'.format(field=field, required_fields=tuple(required_fields)))
177
+
178
+ def query_table_metadata(self, table: str) -> requests.Response:
179
+ """
180
+ This method is used to get the metadata of a table in Salesforce.
181
+ :param table: table or object name that the fields need to be retrieved from
182
+ :return: data or error
183
+ """
184
+ url = f"{self.customer_url}/services/data/v{self.api_version}/sobjects/{table}/describe/"
185
+ response = requests.get(url, headers=self.__get_headers(), timeout=self.timeout)
186
+ return response
187
+
188
+ def query_table(self, data_dir: str, table: str, fields: Union[str, List], filter: str = None, filename: str = None) -> pd.DataFrame:
189
+ """
190
+ for information about the tables, see: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/resources_query.htm
191
+ With this method, you give a certain table you want to retrieve data from. This function contains a list of tables that are available in this function.
192
+ If you want to use an table that is not in this list, you can use the query_data method. In this function, there is extra information available per table like if it is
193
+ possible to get a full or an incremental load. This function will also check your previous loaded data and add new data to the previous data. Deleted data will also be deleted from
194
+ your dataset
195
+ :param data_dir: directory where the data will be stored. Both the full and incremental data will be stored here
196
+ :param table: table (it's a SQL query) you want to retrieve data from. If you call an table which is not in the approved tables, you will always get the full (not incremental) dataset.
197
+ :param fields: fields you want to get from the table
198
+ :param filter: possible filter you want to apply to the table
199
+ :param filename: filename you want to use for the data. If not given, the table will be used as filename
200
+ return: the dataset in pandas format
201
+ """
202
+ approved_tables = {
203
+ 'Account': 'incremental',
204
+ 'AccountHistory': 'full',
205
+ 'Appliaction__c': 'incremental',
206
+ 'Beneficiary__c': 'incremental',
207
+ 'Campaign': 'incremental',
208
+ 'CampaignMember': 'incremental',
209
+ 'Case': 'incremental',
210
+ 'Contact': 'incremental',
211
+ 'cpm__Installment__c': 'incremental',
212
+ 'cpm__Payment__c': 'incremental',
213
+ 'Document__c': 'incremental',
214
+ 'Donaction_contracts__c': 'incremental',
215
+ 'Donor_Type_Budget__c': 'incremental',
216
+ 'Dorcas_Exchange_Rates__c': 'incremental',
217
+ 'Dorcas_Report__c': 'incremental',
218
+ 'General_Ledger_Account__c': 'incremental',
219
+ 'Lead': 'incremental',
220
+ 'npe03__Recurring_Donation__c': 'incremental',
221
+ 'npsp__General_Accounting_Unit__c': 'incremental',
222
+ 'Opportunity': 'incremental',
223
+ 'pmnc__Project__c': 'incremental',
224
+ 'Project_Budget__c': 'incremental',
225
+ 'Project_Budget_Line__c': 'incremental',
226
+ 'Project_Expense__c': 'incremental',
227
+ 'Project_Indicator__c': 'incremental',
228
+ 'Project_Result__c': 'incremental',
229
+ 'Reporting_Unit__c': 'incremental',
230
+ 'Result_Framework__c': 'incremental',
231
+ 'Stakeholder__c': 'incremental',
232
+ 'Volunteer_Assignment__c': 'incremental',
233
+ 'User': 'full'
234
+ }
235
+ if table not in approved_tables.keys():
236
+ approved_tables[table] = 'full'
237
+
238
+ # First create a folder for the raw feather files
239
+ os.makedirs(data_dir, exist_ok=True)
240
+ os.makedirs(f'{data_dir}/cache/', exist_ok=True)
241
+
242
+ # Check if there is allready a file for the called table. If not, it's always the first and thus full load
243
+ filename = table if filename is None else filename
244
+ load_type = approved_tables[table]
245
+ initial_load = False if os.path.exists(f'{data_dir}/cache/{filename}.ftr') else True
246
+
247
+ fields = fields.split(',') if isinstance(fields, str) else fields
248
+ # Add metadata fields to the fields, then use set to avoid duplicates
249
+ fields.extend(['Id', 'CreatedDate', 'LastModifiedDate']) if load_type == 'incremental' else fields.extend(['Id'])
250
+ fields = ','.join(list(set(fields)))
251
+
252
+ # If it's an incremental load with a filter, load the records that are created or updated in the last 14 days (double records will be removed later) and apply the filter
253
+ if initial_load is False and load_type == 'incremental':
254
+ params = {"q": f"SELECT {fields} FROM {table} WHERE LastModifiedDate >= LAST_N_DAYS:7 {'' if filter is None or filter == '*' else ' AND ' + filter }"}
255
+ # In all other cases, just load the full dataset without any filter and any field which is needed for incremental loads
256
+ else:
257
+ params = {"q": f"SELECT {fields} FROM {table} {'' if filter is None or filter == '*' else ' WHERE ' + filter }"}
258
+
259
+ params_str = urllib.parse.urlencode(params, safe=':+')
260
+ url = f'{self.customer_url}/services/data/v{self.api_version}/query/?'
261
+ done = False
262
+ df = pd.DataFrame()
263
+
264
+ # With the created URL and parameters, call the API
265
+ while not done:
266
+ response = requests.get(url=url, params=params_str, headers=self.__get_headers(), timeout=self.timeout)
267
+ response.raise_for_status()
268
+ done = response.json()['done']
269
+ df_temp = pd.DataFrame(response.json()['records'])
270
+ if 'attributes' in df_temp.columns:
271
+ del df_temp['attributes']
272
+ if not done:
273
+ url = f"{self.customer_url}{response.json()['nextRecordsUrl']}"
274
+ df = pd.concat([df_temp, df])
275
+
276
+ if load_type == 'incremental':
277
+ # Now get the previously fetched data which is stored in feather files and concat it with the new data. keep only the new data in case of duplicates
278
+ if os.path.exists(f'{data_dir}/cache/{filename}.ftr'):
279
+ df_old = pd.read_feather(f'{data_dir}/cache/{filename}.ftr')
280
+ df = pd.concat([df, df_old])
281
+ df.sort_values(by=['Id', 'LastModifiedDate'], ascending=False, inplace=True)
282
+ df = df.drop_duplicates(subset=['Id'], keep='first')
283
+
284
+ # Get the deleted rows from the table with a new call to Salesforce. Get all the deleted records and not only recent deleted ones because very old rows can be deleted as well since the last time the data was fetched
285
+ params = {"q": f"SELECT+Id,isDeleted+FROM+{table}+WHERE+isDeleted+=TRUE"}
286
+ params_str = urllib.parse.urlencode(params, safe=':+')
287
+ done = False
288
+ df_del = pd.DataFrame()
289
+ url = f'{self.customer_url}/services/data/v{self.api_version}/queryAll/?'
290
+ while done is False:
291
+ response = requests.get(url=url, params=params_str, headers=self.__get_headers(), timeout=self.timeout)
292
+ response.raise_for_status()
293
+ done = response.json()['done']
294
+ df_temp = pd.DataFrame(response.json()['records'])
295
+ if done is False:
296
+ url = f"{self.customer_url}{response.json()['nextRecordsUrl']}"
297
+ df_del = pd.concat([df_temp, df_del])
298
+
299
+ # Join the deleted rows to the dataframe and filter out the deleted rows
300
+ if len(df_del) > 0:
301
+ del df_del['attributes']
302
+ df = df.merge(df_del, how='left', on='Id')
303
+ df = df[df['IsDeleted'].isna()].copy()
304
+ del df['IsDeleted']
305
+
306
+ # Save the final result to the cache as a feather file and to csv
307
+ if 'attributes' in df.columns:
308
+ del df['attributes']
309
+ df.reset_index(drop=True, inplace=True)
310
+ if df.empty:
311
+ return df
312
+ try:
313
+ df.to_feather(f'{data_dir}cache/{filename}.ftr')
314
+ except Exception as e:
315
+ df = df.astype(str)
316
+ df.to_feather(f'{data_dir}cache/{filename}.ftr', compression='lz4')
317
+
318
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq-sdk-salesforce
3
- Version: 2.0.0
3
+ Version: 2.0.2
4
4
  Summary: Salesforce wrapper from BrynQ
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -1,4 +1,6 @@
1
1
  setup.py
2
+ brynq_sdk_salesforce/__init__.py
3
+ brynq_sdk_salesforce/salesforce.py
2
4
  brynq_sdk_salesforce.egg-info/PKG-INFO
3
5
  brynq_sdk_salesforce.egg-info/SOURCES.txt
4
6
  brynq_sdk_salesforce.egg-info/dependency_links.txt
@@ -0,0 +1 @@
1
+ brynq_sdk_salesforce
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
 
3
3
  setup(
4
4
  name='brynq_sdk_salesforce',
5
- version='2.0.0',
5
+ version='2.0.2',
6
6
  description='Salesforce wrapper from BrynQ',
7
7
  long_description='Salesforce wrapper from BrynQ',
8
8
  author='BrynQ',