castor-extractor 0.16.3__py3-none-any.whl → 0.16.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/commands/extract_databricks.py +3 -0
- castor_extractor/commands/extract_salesforce.py +43 -0
- castor_extractor/commands/extract_salesforce_reporting.py +6 -6
- castor_extractor/utils/client/api.py +36 -27
- castor_extractor/utils/salesforce/__init__.py +3 -0
- castor_extractor/utils/salesforce/client.py +84 -0
- castor_extractor/utils/salesforce/client_test.py +21 -0
- castor_extractor/utils/salesforce/constants.py +13 -0
- castor_extractor/utils/salesforce/credentials.py +65 -0
- castor_extractor/{visualization/salesforce_reporting/client → utils/salesforce}/credentials_test.py +3 -2
- castor_extractor/visualization/salesforce_reporting/__init__.py +1 -2
- castor_extractor/visualization/salesforce_reporting/client/__init__.py +1 -2
- castor_extractor/visualization/salesforce_reporting/client/rest.py +7 -90
- castor_extractor/visualization/salesforce_reporting/extract.py +10 -8
- castor_extractor/warehouse/databricks/client.py +1 -1
- castor_extractor/warehouse/databricks/credentials.py +1 -4
- castor_extractor/warehouse/databricks/extract.py +1 -1
- castor_extractor/warehouse/salesforce/__init__.py +6 -0
- castor_extractor/warehouse/salesforce/client.py +112 -0
- castor_extractor/warehouse/salesforce/constants.py +2 -0
- castor_extractor/warehouse/salesforce/extract.py +111 -0
- castor_extractor/warehouse/salesforce/format.py +67 -0
- castor_extractor/warehouse/salesforce/format_test.py +32 -0
- castor_extractor/warehouse/salesforce/soql.py +45 -0
- {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/METADATA +1 -1
- {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/RECORD +30 -19
- {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/entry_points.txt +2 -1
- castor_extractor/visualization/salesforce_reporting/client/constants.py +0 -2
- castor_extractor/visualization/salesforce_reporting/client/credentials.py +0 -33
- {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/LICENCE +0 -0
- {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/WHEEL +0 -0
CHANGELOG.md
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from argparse import ArgumentParser
|
|
3
|
+
|
|
4
|
+
from castor_extractor.warehouse import salesforce # type: ignore
|
|
5
|
+
|
|
6
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def main():
|
|
10
|
+
parser = ArgumentParser()
|
|
11
|
+
|
|
12
|
+
parser.add_argument("-u", "--username", help="Salesforce username")
|
|
13
|
+
parser.add_argument("-p", "--password", help="Salesforce password")
|
|
14
|
+
parser.add_argument("-c", "--client-id", help="Salesforce client id")
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
"-s", "--client-secret", help="Salesforce client secret"
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"-t", "--security-token", help="Salesforce security token"
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument("-b", "--base-url", help="Salesforce instance URL")
|
|
22
|
+
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
23
|
+
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--skip-existing",
|
|
26
|
+
dest="skip_existing",
|
|
27
|
+
action="store_true",
|
|
28
|
+
help="Skips files already extracted instead of replacing them",
|
|
29
|
+
)
|
|
30
|
+
parser.set_defaults(skip_existing=False)
|
|
31
|
+
|
|
32
|
+
args = parser.parse_args()
|
|
33
|
+
|
|
34
|
+
salesforce.extract_all(
|
|
35
|
+
username=args.username,
|
|
36
|
+
password=args.password,
|
|
37
|
+
client_id=args.client_id,
|
|
38
|
+
client_secret=args.client_secret,
|
|
39
|
+
security_token=args.security_token,
|
|
40
|
+
base_url=args.base_url,
|
|
41
|
+
output_directory=args.output,
|
|
42
|
+
skip_existing=args.skip_existing,
|
|
43
|
+
)
|
|
@@ -11,23 +11,23 @@ def main():
|
|
|
11
11
|
|
|
12
12
|
parser.add_argument("-u", "--username", help="Salesforce username")
|
|
13
13
|
parser.add_argument("-p", "--password", help="Salesforce password")
|
|
14
|
-
parser.add_argument("-
|
|
14
|
+
parser.add_argument("-c", "--client-id", help="Salesforce client id")
|
|
15
15
|
parser.add_argument(
|
|
16
|
-
"-s", "--
|
|
16
|
+
"-s", "--client-secret", help="Salesforce client secret"
|
|
17
17
|
)
|
|
18
18
|
parser.add_argument(
|
|
19
19
|
"-t", "--security-token", help="Salesforce security token"
|
|
20
20
|
)
|
|
21
|
-
parser.add_argument("-
|
|
21
|
+
parser.add_argument("-b", "--base-url", help="Salesforce instance URL")
|
|
22
22
|
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
23
23
|
|
|
24
24
|
args = parser.parse_args()
|
|
25
25
|
salesforce_reporting.extract_all(
|
|
26
26
|
username=args.username,
|
|
27
27
|
password=args.password,
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
client_id=args.client_id,
|
|
29
|
+
client_secret=args.client_secret,
|
|
30
30
|
security_token=args.security_token,
|
|
31
|
-
|
|
31
|
+
base_url=args.base_url,
|
|
32
32
|
output_directory=args.output,
|
|
33
33
|
)
|
|
@@ -1,23 +1,25 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Optional
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Callable, Dict, Literal, Optional
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
8
8
|
DEFAULT_TIMEOUT_MS = 30_000
|
|
9
|
-
|
|
9
|
+
|
|
10
|
+
# https://requests.readthedocs.io/en/latest/api/#requests.request
|
|
11
|
+
HttpMethod = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class APIClient:
|
|
13
15
|
"""
|
|
14
16
|
API client
|
|
15
|
-
-
|
|
16
|
-
- authentication via access token for now
|
|
17
|
+
- authentication via access token
|
|
17
18
|
"""
|
|
18
19
|
|
|
19
|
-
def __init__(self,
|
|
20
|
-
self.
|
|
20
|
+
def __init__(self, host: str, token: Optional[str] = None):
|
|
21
|
+
self._host = host
|
|
22
|
+
self._token = token or ""
|
|
21
23
|
self._timeout = DEFAULT_TIMEOUT_MS
|
|
22
24
|
|
|
23
25
|
@staticmethod
|
|
@@ -26,25 +28,32 @@ class APIClient:
|
|
|
26
28
|
host = "https://" + host
|
|
27
29
|
return f"{host.strip('/')}/{path}"
|
|
28
30
|
|
|
29
|
-
def _headers(self):
|
|
30
|
-
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
31
|
+
def _headers(self) -> Dict[str, str]:
|
|
32
|
+
if self._token:
|
|
33
|
+
return {"Authorization": f"Bearer {self._token}"}
|
|
34
|
+
return dict()
|
|
35
|
+
|
|
36
|
+
def _call(
|
|
37
|
+
self,
|
|
38
|
+
url: str,
|
|
39
|
+
method: HttpMethod = "GET",
|
|
40
|
+
*,
|
|
41
|
+
params: Optional[dict] = None,
|
|
42
|
+
data: Optional[dict] = None,
|
|
43
|
+
processor: Optional[Callable] = None,
|
|
44
|
+
) -> Any:
|
|
45
|
+
logger.debug(f"Calling {method} on {url}")
|
|
46
|
+
result = requests.request(
|
|
47
|
+
method, url, headers=self._headers(), params=params, json=data
|
|
45
48
|
)
|
|
49
|
+
result.raise_for_status()
|
|
46
50
|
|
|
47
|
-
if
|
|
48
|
-
return
|
|
51
|
+
if processor:
|
|
52
|
+
return processor(result)
|
|
49
53
|
|
|
50
|
-
return
|
|
54
|
+
return result.json()
|
|
55
|
+
|
|
56
|
+
def get(self, path: str, payload: Optional[dict] = None) -> dict:
|
|
57
|
+
"""path: REST API operation path, such as /api/2.0/clusters/get"""
|
|
58
|
+
url = self.build_url(self._host, path)
|
|
59
|
+
return self._call(url=url, data=payload)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Iterator, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from requests import Response
|
|
5
|
+
|
|
6
|
+
from ...utils.client.api import APIClient
|
|
7
|
+
from .constants import DEFAULT_API_VERSION, DEFAULT_PAGINATION_LIMIT
|
|
8
|
+
from .credentials import SalesforceCredentials
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SalesforceBaseClient(APIClient):
|
|
14
|
+
"""
|
|
15
|
+
Salesforce API client.
|
|
16
|
+
https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_rest.htm
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
api_version = DEFAULT_API_VERSION
|
|
20
|
+
pagination_limit = DEFAULT_PAGINATION_LIMIT
|
|
21
|
+
|
|
22
|
+
PATH_TPL = "services/data/v{version}/{suffix}"
|
|
23
|
+
|
|
24
|
+
def __init__(self, credentials: SalesforceCredentials):
|
|
25
|
+
super().__init__(host=credentials.base_url)
|
|
26
|
+
self._token = self._access_token(credentials)
|
|
27
|
+
|
|
28
|
+
def _access_token(self, credentials: SalesforceCredentials) -> str:
|
|
29
|
+
url = self.build_url(self._host, "services/oauth2/token")
|
|
30
|
+
response = self._call(
|
|
31
|
+
url, "POST", params=credentials.token_request_payload()
|
|
32
|
+
)
|
|
33
|
+
return response["access_token"]
|
|
34
|
+
|
|
35
|
+
def _full_url(self, suffix: str) -> str:
|
|
36
|
+
path = self.PATH_TPL.format(version=self.api_version, suffix=suffix)
|
|
37
|
+
return self.build_url(self._host, path)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def query_url(self) -> str:
|
|
41
|
+
"""Returns the query API url"""
|
|
42
|
+
return self._full_url("query")
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def tooling_url(self) -> str:
|
|
46
|
+
"""Returns the tooling API url"""
|
|
47
|
+
return self._full_url("tooling/query")
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _query_processor(response: Response) -> Tuple[dict, Optional[str]]:
|
|
51
|
+
results = response.json()
|
|
52
|
+
return results["records"], results.get("nextRecordsUrl")
|
|
53
|
+
|
|
54
|
+
def _has_reached_pagination_limit(self, page_number: int) -> bool:
|
|
55
|
+
return page_number > self.pagination_limit
|
|
56
|
+
|
|
57
|
+
def _query_first_page(self, query: str) -> Tuple[Iterator[dict], str]:
|
|
58
|
+
url = self.query_url
|
|
59
|
+
logger.info("querying page 0")
|
|
60
|
+
records, next_page_url = self._call(
|
|
61
|
+
url, params={"q": query}, processor=self._query_processor
|
|
62
|
+
)
|
|
63
|
+
return records, next_page_url
|
|
64
|
+
|
|
65
|
+
def _query_all(self, query: str) -> Iterator[dict]:
|
|
66
|
+
"""
|
|
67
|
+
Run a SOQL query over salesforce API.
|
|
68
|
+
|
|
69
|
+
more: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm
|
|
70
|
+
"""
|
|
71
|
+
records, next_page_path = self._query_first_page(query)
|
|
72
|
+
yield from records
|
|
73
|
+
|
|
74
|
+
page_count = 1
|
|
75
|
+
while next_page_path and not self._has_reached_pagination_limit(
|
|
76
|
+
page_count
|
|
77
|
+
):
|
|
78
|
+
logger.info(f"querying page {page_count}")
|
|
79
|
+
url = self.build_url(self._host, next_page_path)
|
|
80
|
+
records, next_page = self._call(
|
|
81
|
+
url, processor=self._query_processor
|
|
82
|
+
)
|
|
83
|
+
yield from records
|
|
84
|
+
page_count += 1
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from unittest.mock import patch
|
|
2
|
+
|
|
3
|
+
from .client import SalesforceBaseClient
|
|
4
|
+
from .credentials import SalesforceCredentials
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@patch.object(SalesforceBaseClient, "_call")
|
|
8
|
+
def test_SalesforceBaseClient__urls(mock_call):
|
|
9
|
+
mock_call.return_value = {"access_token": "the_token"}
|
|
10
|
+
credentials = SalesforceCredentials(
|
|
11
|
+
username="usr",
|
|
12
|
+
password="pw",
|
|
13
|
+
client_id="key",
|
|
14
|
+
client_secret="secret",
|
|
15
|
+
security_token="token",
|
|
16
|
+
base_url="url",
|
|
17
|
+
)
|
|
18
|
+
client = SalesforceBaseClient(credentials)
|
|
19
|
+
|
|
20
|
+
assert client.query_url == "https://url/services/data/v59.0/query"
|
|
21
|
+
assert client.tooling_url == "https://url/services/data/v59.0/tooling/query"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
DEFAULT_API_VERSION = 59.0
|
|
2
|
+
DEFAULT_PAGINATION_LIMIT = 100
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Keys:
|
|
6
|
+
"""Salesforce's credentials keys"""
|
|
7
|
+
|
|
8
|
+
USERNAME = "username"
|
|
9
|
+
PASSWORD = "password" # noqa: S105
|
|
10
|
+
CLIENT_ID = "client_id"
|
|
11
|
+
CLIENT_SECRET = "client_secret" # noqa: S105
|
|
12
|
+
SECURITY_TOKEN = "security_token" # noqa: S105
|
|
13
|
+
BASE_URL = "base_url"
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from ...utils import from_env
|
|
4
|
+
from .constants import Keys
|
|
5
|
+
|
|
6
|
+
_USERNAME = "CASTOR_SALESFORCE_USERNAME"
|
|
7
|
+
_PASSWORD = "CASTOR_SALESFORCE_PASSWORD" # noqa: S105
|
|
8
|
+
_SECURITY_TOKEN = "CASTOR_SALESFORCE_SECURITY_TOKEN" # noqa: S105
|
|
9
|
+
_CLIENT_ID = "CASTOR_SALESFORCE_CLIENT_ID"
|
|
10
|
+
_CLIENT_SECRET = "CASTOR_SALESFORCE_CLIENT_SECRET" # noqa: S105
|
|
11
|
+
_BASE_URL = "CASTOR_SALESFORCE_BASE_URL"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SalesforceCredentials:
|
|
15
|
+
"""
|
|
16
|
+
Class to handle Salesforce rest API permissions
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
username: str,
|
|
23
|
+
password: str,
|
|
24
|
+
security_token: str,
|
|
25
|
+
client_id: str,
|
|
26
|
+
client_secret: str,
|
|
27
|
+
base_url: str,
|
|
28
|
+
):
|
|
29
|
+
self.username = username
|
|
30
|
+
self.password = password + security_token
|
|
31
|
+
self.client_id = client_id
|
|
32
|
+
self.client_secret = client_secret
|
|
33
|
+
self.base_url = base_url
|
|
34
|
+
|
|
35
|
+
def token_request_payload(self) -> Dict[str, str]:
|
|
36
|
+
"""
|
|
37
|
+
Params to post to the API in order to retrieve the authentication token
|
|
38
|
+
"""
|
|
39
|
+
return {
|
|
40
|
+
"grant_type": "password",
|
|
41
|
+
"client_id": self.client_id,
|
|
42
|
+
"client_secret": self.client_secret,
|
|
43
|
+
"username": self.username,
|
|
44
|
+
"password": self.password,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def to_credentials(params: dict) -> SalesforceCredentials:
|
|
49
|
+
"""extract Salesforce credentials"""
|
|
50
|
+
username = params.get(Keys.USERNAME) or from_env(_USERNAME)
|
|
51
|
+
password = params.get(Keys.PASSWORD) or from_env(_PASSWORD)
|
|
52
|
+
security_token = params.get(Keys.SECURITY_TOKEN) or from_env(
|
|
53
|
+
_SECURITY_TOKEN
|
|
54
|
+
)
|
|
55
|
+
client_id = params.get(Keys.CLIENT_ID) or from_env(_CLIENT_ID)
|
|
56
|
+
client_secret = params.get(Keys.CLIENT_SECRET) or from_env(_CLIENT_SECRET)
|
|
57
|
+
base_url = params.get(Keys.BASE_URL) or from_env(_BASE_URL)
|
|
58
|
+
return SalesforceCredentials(
|
|
59
|
+
username=username,
|
|
60
|
+
password=password,
|
|
61
|
+
client_id=client_id,
|
|
62
|
+
client_secret=client_secret,
|
|
63
|
+
security_token=security_token,
|
|
64
|
+
base_url=base_url,
|
|
65
|
+
)
|
castor_extractor/{visualization/salesforce_reporting/client → utils/salesforce}/credentials_test.py
RENAMED
|
@@ -5,9 +5,10 @@ def test_Credentials_token_request_payload():
|
|
|
5
5
|
creds = SalesforceCredentials(
|
|
6
6
|
username="giphy",
|
|
7
7
|
password="1312",
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
client_id="degenie",
|
|
9
|
+
client_secret="fautpasledire",
|
|
10
10
|
security_token="yo",
|
|
11
|
+
base_url="man",
|
|
11
12
|
)
|
|
12
13
|
|
|
13
14
|
payload = creds.token_request_payload()
|
|
@@ -1,2 +1 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .rest import SalesforceClient
|
|
1
|
+
from .rest import SalesforceReportingClient
|
|
@@ -1,13 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import
|
|
3
|
-
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
|
|
4
|
-
|
|
5
|
-
import requests
|
|
6
|
-
from requests import Response
|
|
2
|
+
from typing import Dict, Iterator, List, Optional
|
|
7
3
|
|
|
4
|
+
from ....utils.salesforce import SalesforceBaseClient
|
|
8
5
|
from ..assets import SalesforceReportingAsset
|
|
9
|
-
from .constants import DEFAULT_API_VERSION, DEFAULT_PAGINATION_LIMIT
|
|
10
|
-
from .credentials import SalesforceCredentials
|
|
11
6
|
from .soql import queries
|
|
12
7
|
|
|
13
8
|
logger = logging.getLogger(__name__)
|
|
@@ -19,89 +14,11 @@ REQUIRING_URL_ASSETS = (
|
|
|
19
14
|
)
|
|
20
15
|
|
|
21
16
|
|
|
22
|
-
class
|
|
17
|
+
class SalesforceReportingClient(SalesforceBaseClient):
|
|
23
18
|
"""
|
|
24
|
-
Salesforce API client
|
|
25
|
-
https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_rest.htm
|
|
19
|
+
Salesforce Reporting API client
|
|
26
20
|
"""
|
|
27
21
|
|
|
28
|
-
api_version = DEFAULT_API_VERSION
|
|
29
|
-
pagination_limit = DEFAULT_PAGINATION_LIMIT
|
|
30
|
-
|
|
31
|
-
def __init__(
|
|
32
|
-
self,
|
|
33
|
-
credentials: SalesforceCredentials,
|
|
34
|
-
instance_url: str,
|
|
35
|
-
):
|
|
36
|
-
self.credentials = credentials
|
|
37
|
-
self.instance_url = instance_url
|
|
38
|
-
self._token = self._access_token()
|
|
39
|
-
|
|
40
|
-
def _access_token(self) -> Tuple[str, str]:
|
|
41
|
-
url = f"{self.instance_url}/services/oauth2/token"
|
|
42
|
-
response = self._call(
|
|
43
|
-
url, "POST", data=self.credentials.token_request_payload()
|
|
44
|
-
)
|
|
45
|
-
return response["access_token"]
|
|
46
|
-
|
|
47
|
-
def _header(self) -> Dict:
|
|
48
|
-
return {"Authorization": f"Bearer {self._token}"}
|
|
49
|
-
|
|
50
|
-
@staticmethod
|
|
51
|
-
def _call(
|
|
52
|
-
url: str,
|
|
53
|
-
method: str = "GET",
|
|
54
|
-
*,
|
|
55
|
-
header: Optional[Dict] = None,
|
|
56
|
-
params: Optional[Dict] = None,
|
|
57
|
-
data: Optional[Dict] = None,
|
|
58
|
-
processor: Optional[Callable] = None,
|
|
59
|
-
) -> Any:
|
|
60
|
-
logger.debug(f"Calling {method} on {url}")
|
|
61
|
-
result = requests.request(
|
|
62
|
-
method,
|
|
63
|
-
url,
|
|
64
|
-
headers=header,
|
|
65
|
-
params=params,
|
|
66
|
-
data=data,
|
|
67
|
-
)
|
|
68
|
-
result.raise_for_status()
|
|
69
|
-
|
|
70
|
-
if processor:
|
|
71
|
-
return processor(result)
|
|
72
|
-
|
|
73
|
-
return result.json()
|
|
74
|
-
|
|
75
|
-
@staticmethod
|
|
76
|
-
def _query_processor(response: Response) -> Tuple[dict, Optional[str]]:
|
|
77
|
-
results = response.json()
|
|
78
|
-
return results["records"], results.get("nextRecordsUrl")
|
|
79
|
-
|
|
80
|
-
def _query_all(self, query: str) -> Iterator[Dict]:
|
|
81
|
-
"""
|
|
82
|
-
Run a SOQL query over salesforce API.
|
|
83
|
-
|
|
84
|
-
more: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm
|
|
85
|
-
"""
|
|
86
|
-
url = f"{self.instance_url}/services/data/v{self.api_version}/query"
|
|
87
|
-
records, next_page = self._call(
|
|
88
|
-
url,
|
|
89
|
-
params={"q": query},
|
|
90
|
-
processor=self._query_processor,
|
|
91
|
-
header=self._header(),
|
|
92
|
-
)
|
|
93
|
-
yield from records
|
|
94
|
-
|
|
95
|
-
page_count = 0
|
|
96
|
-
while next_page and page_count <= self.pagination_limit:
|
|
97
|
-
logger.info(f"querying page {page_count}")
|
|
98
|
-
url = f"{self.instance_url}{next_page}"
|
|
99
|
-
records, next_page = self._call(
|
|
100
|
-
url, processor=self._query_processor, header=self._header()
|
|
101
|
-
)
|
|
102
|
-
yield from records
|
|
103
|
-
page_count += 1
|
|
104
|
-
|
|
105
22
|
def _get_asset_url(
|
|
106
23
|
self, asset_type: SalesforceReportingAsset, asset: dict
|
|
107
24
|
) -> Optional[str]:
|
|
@@ -111,15 +28,15 @@ class SalesforceClient:
|
|
|
111
28
|
|
|
112
29
|
if asset_type == SalesforceReportingAsset.DASHBOARDS:
|
|
113
30
|
path = f"lightning/r/Dashboard/{asset['Id']}/view"
|
|
114
|
-
return
|
|
31
|
+
return self.build_url(self._host, path)
|
|
115
32
|
|
|
116
33
|
if asset_type == SalesforceReportingAsset.FOLDERS:
|
|
117
34
|
path = asset["attributes"]["url"].lstrip("/")
|
|
118
|
-
return
|
|
35
|
+
return self.build_url(self._host, path)
|
|
119
36
|
|
|
120
37
|
if asset_type == SalesforceReportingAsset.REPORTS:
|
|
121
38
|
path = f"lightning/r/Report/{asset['Id']}/view"
|
|
122
|
-
return
|
|
39
|
+
return self.build_url(self._host, path)
|
|
123
40
|
|
|
124
41
|
return None
|
|
125
42
|
|
|
@@ -10,14 +10,15 @@ from ...utils import (
|
|
|
10
10
|
write_json,
|
|
11
11
|
write_summary,
|
|
12
12
|
)
|
|
13
|
+
from ...utils.salesforce import SalesforceCredentials
|
|
13
14
|
from .assets import SalesforceReportingAsset
|
|
14
|
-
from .client import
|
|
15
|
+
from .client import SalesforceReportingClient
|
|
15
16
|
|
|
16
17
|
logger = logging.getLogger(__name__)
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
|
-
client:
|
|
21
|
+
client: SalesforceReportingClient,
|
|
21
22
|
) -> Iterable[Tuple[str, Union[list, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Salesforce"""
|
|
23
24
|
|
|
@@ -30,10 +31,10 @@ def iterate_all_data(
|
|
|
30
31
|
def extract_all(
|
|
31
32
|
username: str,
|
|
32
33
|
password: str,
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
client_id: str,
|
|
35
|
+
client_secret: str,
|
|
35
36
|
security_token: str,
|
|
36
|
-
|
|
37
|
+
base_url: str,
|
|
37
38
|
output_directory: Optional[str] = None,
|
|
38
39
|
) -> None:
|
|
39
40
|
"""
|
|
@@ -44,11 +45,12 @@ def extract_all(
|
|
|
44
45
|
creds = SalesforceCredentials(
|
|
45
46
|
username=username,
|
|
46
47
|
password=password,
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
client_id=client_id,
|
|
49
|
+
client_secret=client_secret,
|
|
49
50
|
security_token=security_token,
|
|
51
|
+
base_url=base_url,
|
|
50
52
|
)
|
|
51
|
-
client =
|
|
53
|
+
client = SalesforceReportingClient(credentials=creds)
|
|
52
54
|
ts = current_timestamp()
|
|
53
55
|
|
|
54
56
|
for key, data in iterate_all_data(client):
|
|
@@ -31,7 +31,7 @@ class DatabricksClient(APIClient):
|
|
|
31
31
|
db_allowed: Optional[Set[str]] = None,
|
|
32
32
|
db_blocked: Optional[Set[str]] = None,
|
|
33
33
|
):
|
|
34
|
-
super().__init__(credentials)
|
|
34
|
+
super().__init__(host=credentials.host, token=credentials.token)
|
|
35
35
|
self._db_allowed = db_allowed
|
|
36
36
|
self._db_blocked = db_blocked
|
|
37
37
|
self.formatter = DatabricksFormatter()
|
|
@@ -25,7 +25,4 @@ def to_credentials(params: dict) -> DatabricksCredentials:
|
|
|
25
25
|
"""extract Databricks credentials"""
|
|
26
26
|
host = params.get("host") or from_env(_HOST)
|
|
27
27
|
token = params.get("token") or from_env(_TOKEN)
|
|
28
|
-
return DatabricksCredentials(
|
|
29
|
-
host=host,
|
|
30
|
-
token=token,
|
|
31
|
-
)
|
|
28
|
+
return DatabricksCredentials(host=host, token=token)
|
|
@@ -43,7 +43,7 @@ class DatabricksExtractionProcessor:
|
|
|
43
43
|
self._storage = storage
|
|
44
44
|
self._skip_existing = skip_existing
|
|
45
45
|
|
|
46
|
-
def _should_not_reextract(self, asset_group) -> bool:
|
|
46
|
+
def _should_not_reextract(self, asset_group: WarehouseAssetGroup) -> bool:
|
|
47
47
|
"""helper function to determine whether we need to extract"""
|
|
48
48
|
if not self._skip_existing:
|
|
49
49
|
return False
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Iterator, List
|
|
3
|
+
|
|
4
|
+
from tqdm import tqdm # type: ignore
|
|
5
|
+
|
|
6
|
+
from ...utils.salesforce import SalesforceBaseClient, SalesforceCredentials
|
|
7
|
+
from .format import SalesforceFormatter
|
|
8
|
+
from .soql import SOBJECT_FIELDS_QUERY_TPL, SOBJECTS_QUERY_TPL
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SalesforceClient(SalesforceBaseClient):
|
|
14
|
+
"""
|
|
15
|
+
Salesforce API client to extract sobjects
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Implicit (hard-coded in Salesforce) limitation when using SOQL of 2,000 rows
|
|
19
|
+
LIMIT_RECORDS_PER_PAGE = 2000
|
|
20
|
+
|
|
21
|
+
def __init__(self, credentials: SalesforceCredentials):
|
|
22
|
+
super().__init__(credentials)
|
|
23
|
+
self.formatter = SalesforceFormatter()
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def name() -> str:
|
|
27
|
+
return "Salesforce"
|
|
28
|
+
|
|
29
|
+
def _format_query(self, query_template: str, start_durable_id: str) -> str:
|
|
30
|
+
return query_template.format(
|
|
31
|
+
start_durable_id=start_durable_id,
|
|
32
|
+
limit=self.LIMIT_RECORDS_PER_PAGE,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def _next_records(
|
|
36
|
+
self, url: str, query_template: str, start_durable_id: str = "0000"
|
|
37
|
+
) -> List[dict]:
|
|
38
|
+
query = self._format_query(
|
|
39
|
+
query_template, start_durable_id=start_durable_id
|
|
40
|
+
)
|
|
41
|
+
records, _ = self._call(
|
|
42
|
+
url, params={"q": query}, processor=self._query_processor
|
|
43
|
+
)
|
|
44
|
+
return records
|
|
45
|
+
|
|
46
|
+
def _is_last_page(self, records: List[dict]) -> bool:
|
|
47
|
+
return len(records) < self.LIMIT_RECORDS_PER_PAGE
|
|
48
|
+
|
|
49
|
+
def _should_query_next_page(
|
|
50
|
+
self, records: List[dict], page_number: int
|
|
51
|
+
) -> bool:
|
|
52
|
+
return not (
|
|
53
|
+
self._is_last_page(records)
|
|
54
|
+
or self._has_reached_pagination_limit(page_number)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def _query_all(self, query_template: str) -> Iterator[dict]:
|
|
58
|
+
"""
|
|
59
|
+
Run a SOQL query over salesforce API
|
|
60
|
+
|
|
61
|
+
Note, pagination is performed via a LIMIT in the SOQL query and requires
|
|
62
|
+
that ids are sorted. The SOQL query must support `limit` and
|
|
63
|
+
`start_durable_id` as parameters.
|
|
64
|
+
"""
|
|
65
|
+
url = self.query_url
|
|
66
|
+
logger.info("querying page 0")
|
|
67
|
+
records = self._next_records(url, query_template)
|
|
68
|
+
yield from records
|
|
69
|
+
|
|
70
|
+
page_count = 1
|
|
71
|
+
while self._should_query_next_page(records, page_count):
|
|
72
|
+
logger.info(f"querying page {page_count}")
|
|
73
|
+
last_durable_id = records[-1]["DurableId"]
|
|
74
|
+
records = self._next_records(
|
|
75
|
+
url, query_template, start_durable_id=last_durable_id
|
|
76
|
+
)
|
|
77
|
+
yield from records
|
|
78
|
+
page_count += 1
|
|
79
|
+
|
|
80
|
+
def fetch_sobjects(self) -> List[dict]:
|
|
81
|
+
"""Fetch all sobjects"""
|
|
82
|
+
logger.info("Extracting sobjects")
|
|
83
|
+
return list(self._query_all(SOBJECTS_QUERY_TPL))
|
|
84
|
+
|
|
85
|
+
def fetch_fields(self, sobject_name: str) -> List[dict]:
|
|
86
|
+
"""Fetches fields of a given sobject"""
|
|
87
|
+
query = SOBJECT_FIELDS_QUERY_TPL.format(
|
|
88
|
+
entity_definition_id=sobject_name
|
|
89
|
+
)
|
|
90
|
+
response = self._call(self.tooling_url, params={"q": query})
|
|
91
|
+
return response["records"]
|
|
92
|
+
|
|
93
|
+
def tables(self) -> List[dict]:
|
|
94
|
+
"""
|
|
95
|
+
Get Salesforce sobjects as tables
|
|
96
|
+
"""
|
|
97
|
+
sobjects = self.fetch_sobjects()
|
|
98
|
+
logger.info(f"Extracted {len(sobjects)} sobjects")
|
|
99
|
+
return self.formatter.tables(sobjects)
|
|
100
|
+
|
|
101
|
+
def columns(
|
|
102
|
+
self, sobject_names: List[str], show_progress: bool = True
|
|
103
|
+
) -> List[dict]:
|
|
104
|
+
"""
|
|
105
|
+
Get salesforce sobject fields as columns
|
|
106
|
+
show_progress: optionally deactivate the tqdm progress bar
|
|
107
|
+
"""
|
|
108
|
+
sobject_fields: Dict[str, List[dict]] = dict()
|
|
109
|
+
for sobject_name in tqdm(sobject_names, disable=not show_progress):
|
|
110
|
+
fields = self.fetch_fields(sobject_name)
|
|
111
|
+
sobject_fields[sobject_name] = fields
|
|
112
|
+
return self.formatter.columns(sobject_fields)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from ...utils import AbstractStorage, LocalStorage, write_summary
|
|
5
|
+
from ...utils.salesforce import to_credentials
|
|
6
|
+
from ..abstract import (
|
|
7
|
+
SupportedAssets,
|
|
8
|
+
WarehouseAsset,
|
|
9
|
+
WarehouseAssetGroup,
|
|
10
|
+
common_args,
|
|
11
|
+
)
|
|
12
|
+
from .client import SalesforceClient
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
Paths = Dict[str, str]
|
|
18
|
+
|
|
19
|
+
SALESFORCE_CATALOG_ASSETS: Tuple[WarehouseAsset, ...] = (
|
|
20
|
+
WarehouseAsset.TABLE,
|
|
21
|
+
WarehouseAsset.COLUMN,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
SALESFORCE_ASSETS: SupportedAssets = {
|
|
25
|
+
WarehouseAssetGroup.CATALOG: SALESFORCE_CATALOG_ASSETS
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SalesforceExtractionProcessor:
|
|
30
|
+
"""Salesforce API-based extraction management - warehouse part"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
client: SalesforceClient,
|
|
35
|
+
storage: AbstractStorage,
|
|
36
|
+
skip_existing: bool = False,
|
|
37
|
+
):
|
|
38
|
+
self._client = client
|
|
39
|
+
self._storage = storage
|
|
40
|
+
self._skip_existing = skip_existing
|
|
41
|
+
|
|
42
|
+
def _should_extract(self) -> bool:
|
|
43
|
+
"""helper function to determine whether we need to extract"""
|
|
44
|
+
if not self._skip_existing:
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
for asset in SALESFORCE_CATALOG_ASSETS:
|
|
48
|
+
if not self._storage.exists(asset.value):
|
|
49
|
+
return True
|
|
50
|
+
|
|
51
|
+
logger.info("Skipped, files for catalog already exist")
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
def _existing_group_paths(self) -> Paths:
|
|
55
|
+
return {
|
|
56
|
+
a.value: self._storage.path(a.value)
|
|
57
|
+
for a in SALESFORCE_CATALOG_ASSETS
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
def extract_catalog(self, show_progress: bool = True) -> Paths:
|
|
61
|
+
"""
|
|
62
|
+
Extract the following catalog assets: tables and columns
|
|
63
|
+
and return the locations of the extracted data
|
|
64
|
+
"""
|
|
65
|
+
if not self._should_extract():
|
|
66
|
+
return self._existing_group_paths()
|
|
67
|
+
|
|
68
|
+
catalog_locations: Paths = dict()
|
|
69
|
+
|
|
70
|
+
tables = self._client.tables()
|
|
71
|
+
location = self._storage.put(WarehouseAsset.TABLE.value, tables)
|
|
72
|
+
catalog_locations[WarehouseAsset.TABLE.value] = location
|
|
73
|
+
logger.info(f"Extracted {len(tables)} tables to {location}")
|
|
74
|
+
|
|
75
|
+
table_names = [t["table_name"] for t in tables]
|
|
76
|
+
columns = self._client.columns(table_names, show_progress)
|
|
77
|
+
location = self._storage.put(WarehouseAsset.COLUMN.value, columns)
|
|
78
|
+
catalog_locations[WarehouseAsset.COLUMN.value] = location
|
|
79
|
+
logger.info(f"Extracted {len(columns)} columns to {location}")
|
|
80
|
+
return catalog_locations
|
|
81
|
+
|
|
82
|
+
def extract_role(self) -> Paths:
|
|
83
|
+
"""extract no users and return the empty file location"""
|
|
84
|
+
users: List[dict] = []
|
|
85
|
+
location = self._storage.put(WarehouseAsset.USER.value, users)
|
|
86
|
+
logger.info(f"Extracted {len(users)} users to {location}")
|
|
87
|
+
return {WarehouseAsset.USER.value: location}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def extract_all(**kwargs) -> None:
|
|
91
|
+
"""
|
|
92
|
+
Extract all assets from Salesforce and store the results in CSV files
|
|
93
|
+
"""
|
|
94
|
+
output_directory, skip_existing = common_args(kwargs)
|
|
95
|
+
|
|
96
|
+
client = SalesforceClient(credentials=to_credentials(kwargs))
|
|
97
|
+
storage = LocalStorage(directory=output_directory)
|
|
98
|
+
extractor = SalesforceExtractionProcessor(
|
|
99
|
+
client=client,
|
|
100
|
+
storage=storage,
|
|
101
|
+
skip_existing=skip_existing,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
extractor.extract_catalog()
|
|
105
|
+
extractor.extract_role()
|
|
106
|
+
|
|
107
|
+
write_summary(
|
|
108
|
+
output_directory,
|
|
109
|
+
storage.stored_at_ts,
|
|
110
|
+
client_name=client.name(),
|
|
111
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Any, Dict, List
|
|
2
|
+
|
|
3
|
+
from .constants import SCHEMA_NAME
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _clean(raw: str) -> str:
|
|
7
|
+
return raw.strip('"')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _field_description(field: Dict[str, Any]) -> str:
|
|
11
|
+
context: Dict[str, str] = {}
|
|
12
|
+
|
|
13
|
+
field_definition: Dict[str, str] = field.get("FieldDefinition") or {}
|
|
14
|
+
if description := field_definition.get("Description"):
|
|
15
|
+
context["Description"] = _clean(description)
|
|
16
|
+
if help_text := field.get("InlineHelpText"):
|
|
17
|
+
context["Help Text"] = _clean(help_text)
|
|
18
|
+
if compliance_group := field_definition.get("ComplianceGroup"):
|
|
19
|
+
context["Compliance Categorization"] = _clean(compliance_group)
|
|
20
|
+
if security_level := field_definition.get("SecurityClassification"):
|
|
21
|
+
context["Data Sensitivity Level"] = _clean(security_level)
|
|
22
|
+
|
|
23
|
+
return "\n".join([f"- {k}: {v}" for k, v in context.items()])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _to_column_payload(field: dict, position: int, table_name: str) -> dict:
|
|
27
|
+
field_name = field["QualifiedApiName"]
|
|
28
|
+
return {
|
|
29
|
+
"id": f"{table_name}.{field_name}",
|
|
30
|
+
"table_id": table_name,
|
|
31
|
+
"column_name": field_name,
|
|
32
|
+
"description": _field_description(field),
|
|
33
|
+
"data_type": field.get("DataType"),
|
|
34
|
+
"ordinal_position": position,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _to_table_payload(table: dict) -> dict:
|
|
39
|
+
return {
|
|
40
|
+
"id": table["QualifiedApiName"],
|
|
41
|
+
"schema_id": SCHEMA_NAME,
|
|
42
|
+
"table_name": table["QualifiedApiName"],
|
|
43
|
+
"description": "",
|
|
44
|
+
"tags": [],
|
|
45
|
+
"type": "TABLE",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SalesforceFormatter:
|
|
50
|
+
"""
|
|
51
|
+
Helper functions that format the response in the format to be exported as
|
|
52
|
+
csv.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def tables(sobjects: List[dict]) -> List[dict]:
|
|
57
|
+
"""formats the raw list of sobjects to tables"""
|
|
58
|
+
return [_to_table_payload(s) for s in sobjects]
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def columns(sobject_fields: Dict[str, List[dict]]) -> List[dict]:
|
|
62
|
+
"""formats the raw list of sobject fields to columns"""
|
|
63
|
+
return [
|
|
64
|
+
_to_column_payload(field, idx, table_name)
|
|
65
|
+
for table_name, fields in sobject_fields.items()
|
|
66
|
+
for idx, field in enumerate(fields)
|
|
67
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from .format import _field_description
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test__field_description():
|
|
5
|
+
|
|
6
|
+
field = {}
|
|
7
|
+
assert _field_description(field) == ""
|
|
8
|
+
|
|
9
|
+
definition = {}
|
|
10
|
+
field = {"FieldDefinition": definition}
|
|
11
|
+
assert _field_description(field) == ""
|
|
12
|
+
|
|
13
|
+
definition.update({"Description": "foo"})
|
|
14
|
+
assert "foo" in _field_description(field)
|
|
15
|
+
|
|
16
|
+
field.update({"InlineHelpText": "bar"})
|
|
17
|
+
assert "bar" in _field_description(field)
|
|
18
|
+
|
|
19
|
+
definition.update({"ComplianceGroup": "bim"})
|
|
20
|
+
assert "bim" in _field_description(field)
|
|
21
|
+
|
|
22
|
+
definition.update({"SecurityClassification": "bam"})
|
|
23
|
+
description = _field_description(field)
|
|
24
|
+
|
|
25
|
+
assert "bam" in description
|
|
26
|
+
expected = (
|
|
27
|
+
"- Description: foo\n"
|
|
28
|
+
"- Help Text: bar\n"
|
|
29
|
+
"- Compliance Categorization: bim\n"
|
|
30
|
+
"- Data Sensitivity Level: bam"
|
|
31
|
+
)
|
|
32
|
+
assert description == expected
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
SOBJECTS_QUERY_TPL = """
|
|
2
|
+
SELECT
|
|
3
|
+
DeploymentStatus,
|
|
4
|
+
DeveloperName,
|
|
5
|
+
DurableId,
|
|
6
|
+
ExternalSharingModel,
|
|
7
|
+
InternalSharingModel,
|
|
8
|
+
Label,
|
|
9
|
+
PluralLabel,
|
|
10
|
+
QualifiedApiName
|
|
11
|
+
FROM EntityDefinition
|
|
12
|
+
WHERE DurableId > '{start_durable_id}'
|
|
13
|
+
ORDER BY DurableId
|
|
14
|
+
LIMIT {limit}
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
SOBJECT_FIELDS_QUERY_TPL = """
|
|
19
|
+
SELECT
|
|
20
|
+
DataType,
|
|
21
|
+
DeveloperName,
|
|
22
|
+
Digits,
|
|
23
|
+
FieldDefinition.BusinessOwnerId,
|
|
24
|
+
FieldDefinition.ComplianceGroup,
|
|
25
|
+
FieldDefinition.DataType,
|
|
26
|
+
FieldDefinition.Description,
|
|
27
|
+
FieldDefinition.IsIndexed,
|
|
28
|
+
FieldDefinition.LastModifiedBy.Username,
|
|
29
|
+
FieldDefinition.LastModifiedDate,
|
|
30
|
+
FieldDefinition.SecurityClassification,
|
|
31
|
+
InlineHelpText,
|
|
32
|
+
IsComponent,
|
|
33
|
+
IsCompound,
|
|
34
|
+
IsNillable,
|
|
35
|
+
IsUnique,
|
|
36
|
+
Label,
|
|
37
|
+
Length,
|
|
38
|
+
Precision,
|
|
39
|
+
QualifiedApiName,
|
|
40
|
+
ReferenceTo,
|
|
41
|
+
RelationshipName,
|
|
42
|
+
Scale
|
|
43
|
+
FROM EntityParticle
|
|
44
|
+
WHERE EntityDefinitionId='{entity_definition_id}'
|
|
45
|
+
"""
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=6ApRuFb6ZxwvpMFyBRJAU6L7teZ01KK2tD0pXFvsYdw,10026
|
|
2
2
|
Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
|
|
5
5
|
castor_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
castor_extractor/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
castor_extractor/commands/extract_bigquery.py,sha256=dU4OiYO1V0n32orvZnMh1_xtFKF_VxHNXcVsH3otY-g,1269
|
|
8
|
-
castor_extractor/commands/extract_databricks.py,sha256=
|
|
8
|
+
castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
|
|
9
9
|
castor_extractor/commands/extract_domo.py,sha256=lwJ7XeYOeLMF2plf5PK3cL56N9n2yjcDsyRM6UFwKTM,1208
|
|
10
10
|
castor_extractor/commands/extract_looker.py,sha256=gwjIQPOHrXevgU_o2l8vDHHQT8Sb-mGdwcceb6wJJbg,1483
|
|
11
11
|
castor_extractor/commands/extract_metabase_api.py,sha256=VPyEKO2VFXzk_OsbQnDhObE9siuBfoegechCZYPZi2k,778
|
|
@@ -16,7 +16,8 @@ castor_extractor/commands/extract_postgres.py,sha256=pX0RnCPi4nw6QQ6wiAuZ_Xt3ZbD
|
|
|
16
16
|
castor_extractor/commands/extract_powerbi.py,sha256=e6MXDNOafdp0w4ZtOnE5z5o_CxvaodUbbQFk__pDiM4,875
|
|
17
17
|
castor_extractor/commands/extract_qlik.py,sha256=mSeyGOprTyBExes-lzp___7tgBS1KeyTVpwKAqMpkiw,989
|
|
18
18
|
castor_extractor/commands/extract_redshift.py,sha256=bdLp7d7ImZoKCkWc3f3NXF1imIzMVT43_KPI-x4UVac,1155
|
|
19
|
-
castor_extractor/commands/
|
|
19
|
+
castor_extractor/commands/extract_salesforce.py,sha256=s2o799ePjQFYsVwZbrGEflzOIwJKtxUMb9pcF4-K90Y,1400
|
|
20
|
+
castor_extractor/commands/extract_salesforce_reporting.py,sha256=rmAo--dl_m2x7TtJ29w1PfsKt9tQDZocTdvwTqj-fnI,1146
|
|
20
21
|
castor_extractor/commands/extract_sigma.py,sha256=agwfKj55C81-kect3K6xSJVBv3TDuPT6fGWSTytkQ2o,703
|
|
21
22
|
castor_extractor/commands/extract_snowflake.py,sha256=vYiruxRoo--GeMemOGsSE1w9kcKTh_y4E165HtMVzkM,1982
|
|
22
23
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
@@ -46,7 +47,7 @@ castor_extractor/uploader/utils.py,sha256=NCe0tkB28BVhqzOaDhDjaSfODjjcPWB17X6chn
|
|
|
46
47
|
castor_extractor/utils/__init__.py,sha256=cZbvEJ4G2IcJR2BzHwi3oOwDLqJsBx0J9gD71lWE1BQ,1149
|
|
47
48
|
castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
|
|
48
49
|
castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
|
|
49
|
-
castor_extractor/utils/client/api.py,sha256=
|
|
50
|
+
castor_extractor/utils/client/api.py,sha256=tHa7eC11sS_eOCXhlnvUa2haRfOLENmjKgjB09Ijt0s,1664
|
|
50
51
|
castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
|
|
51
52
|
castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
|
|
52
53
|
castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
|
|
@@ -82,6 +83,12 @@ castor_extractor/utils/retry.py,sha256=vYdJMiM-Nr82H1MuD7_KZdqbFz98ffQGqJ4Owbr6m
|
|
|
82
83
|
castor_extractor/utils/retry_test.py,sha256=nsMttlmyKygVcffX3Hay8U2S1BspkGPiCmzIXPpLKyk,2230
|
|
83
84
|
castor_extractor/utils/safe.py,sha256=jpfIimwdBSVUvU2DPFrhqpKC_DSYwxQqd08MlIkSODY,1967
|
|
84
85
|
castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
|
|
86
|
+
castor_extractor/utils/salesforce/__init__.py,sha256=VGD4vd1Se79z2PAaVCvCSL3yhgWlhQFaVDLZ5aERug0,132
|
|
87
|
+
castor_extractor/utils/salesforce/client.py,sha256=Mt9yykAPROFgme5eDqoZQv4u85hxcUoG-tmKFPwLibo,2856
|
|
88
|
+
castor_extractor/utils/salesforce/client_test.py,sha256=s6UTogjC36jrJOnYA-gFuyTQsvROCt9y_eoD2O41xCg,682
|
|
89
|
+
castor_extractor/utils/salesforce/constants.py,sha256=5sph6dbTCp0mAGWP24WTpC1wsIqeG8yI8-BsKrmV_wA,335
|
|
90
|
+
castor_extractor/utils/salesforce/credentials.py,sha256=Wwb-_BlbFBJUl3dhXz72IIqcCfj1F3Zj3JoYr3FYk0A,2045
|
|
91
|
+
castor_extractor/utils/salesforce/credentials_test.py,sha256=FQRyNk2Jsh6KtYiW20oL43CVnGjXLcAjdFATkE7jK0s,586
|
|
85
92
|
castor_extractor/utils/store.py,sha256=D_pVaPsu1MKAJC0K47O_vYTs-Afl6oejravAJdvjmGc,2040
|
|
86
93
|
castor_extractor/utils/string.py,sha256=aW6bbjqEGnh9kT5KZBnMlV6fhdgOJ0ENCkCTDon1xA0,2377
|
|
87
94
|
castor_extractor/utils/string_test.py,sha256=OmRVCJUXMcCTwY-QJDhUViYpxkvQQgNRJLCaXY0iUnk,2535
|
|
@@ -182,15 +189,12 @@ castor_extractor/visualization/qlik/client/rest.py,sha256=EkHEs3_Vrmy0Ex5b9M_klm
|
|
|
182
189
|
castor_extractor/visualization/qlik/client/rest_test.py,sha256=Z2gBTokUVv-JapBtrY2nZDJzBtOusRq6_lJutVvzqG8,1684
|
|
183
190
|
castor_extractor/visualization/qlik/constants.py,sha256=Pbd1SH3_VI_yEhoDx4PIXBUup-MqXUFjxDkDRr2V4J8,95
|
|
184
191
|
castor_extractor/visualization/qlik/extract.py,sha256=1ulrirDzoKEdsWxztR6MHcUAE8CiEMx75esdUE7PAmY,2397
|
|
185
|
-
castor_extractor/visualization/salesforce_reporting/__init__.py,sha256=
|
|
192
|
+
castor_extractor/visualization/salesforce_reporting/__init__.py,sha256=MvArD0GKNIpCDvLIYcpKrjMjFLhMyDETK6i3k0Fb6Tk,124
|
|
186
193
|
castor_extractor/visualization/salesforce_reporting/assets.py,sha256=2J-iAmJGGDufOcJUgE47M3-dEcjYXcVyVUNcmHrj79w,271
|
|
187
|
-
castor_extractor/visualization/salesforce_reporting/client/__init__.py,sha256=
|
|
188
|
-
castor_extractor/visualization/salesforce_reporting/client/
|
|
189
|
-
castor_extractor/visualization/salesforce_reporting/client/credentials.py,sha256=gJapeUKs8gZSY_YdzX-j0Iv4vcaBzCTAlXMecO8Kk5k,875
|
|
190
|
-
castor_extractor/visualization/salesforce_reporting/client/credentials_test.py,sha256=2qIYZ8QuuarHz5EZ9bU0sGEOvoDLnN0eiwsvnbtgXXY,567
|
|
191
|
-
castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=_3wa5-bxKGwaNEwh-KLke3O6lbFWzOUAe1bL8n9hH04,4429
|
|
194
|
+
castor_extractor/visualization/salesforce_reporting/client/__init__.py,sha256=DIA6f_vNJZqT89qVYxg98Le7QeDn2y0Qew03V3J9t9o,44
|
|
195
|
+
castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=hzaXWLcYt0aAHXK46DbsLmzocjRY1llwrNj8_3TObKs,1849
|
|
192
196
|
castor_extractor/visualization/salesforce_reporting/client/soql.py,sha256=DHTi058UEaZKZnxJVmsCouPfA9Lgr3gFY6vY7NiqyMM,1584
|
|
193
|
-
castor_extractor/visualization/salesforce_reporting/extract.py,sha256=
|
|
197
|
+
castor_extractor/visualization/salesforce_reporting/extract.py,sha256=5QwZwP27uXrFJSf00El7Ku592-9fhmCtTdiUGpNkHZM,1678
|
|
194
198
|
castor_extractor/visualization/sigma/__init__.py,sha256=m98AEysUsVHQAWT6m5nvrtLMs22SDQH9G78-IcUwBoY,130
|
|
195
199
|
castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
|
|
196
200
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=sFqsbcwilIxu75njtSbnAIsNlPdRgB39SAInNUf-nbQ,90
|
|
@@ -262,10 +266,10 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
|
|
|
262
266
|
castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
|
|
263
267
|
castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
|
|
264
268
|
castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
|
|
265
|
-
castor_extractor/warehouse/databricks/client.py,sha256=
|
|
269
|
+
castor_extractor/warehouse/databricks/client.py,sha256=FIqHjlGN5EN2dvcZD2941zPAomOye91JmkgPlxGDk0g,8078
|
|
266
270
|
castor_extractor/warehouse/databricks/client_test.py,sha256=ctOQnUXosuuFjWGJKgkxjUcV4vQUBWt2BQ_f0Tyzqe4,2717
|
|
267
|
-
castor_extractor/warehouse/databricks/credentials.py,sha256=
|
|
268
|
-
castor_extractor/warehouse/databricks/extract.py,sha256
|
|
271
|
+
castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
|
|
272
|
+
castor_extractor/warehouse/databricks/extract.py,sha256=-vJhAIxSu1lD_xGl-GXZYTmc5BGu0aXM3l-U0UghREM,5773
|
|
269
273
|
castor_extractor/warehouse/databricks/format.py,sha256=LiPGCTPzL3gQQMMl1v6DvpcTk7BWxZFq03jnHdoYnuU,4968
|
|
270
274
|
castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
|
|
271
275
|
castor_extractor/warehouse/databricks/types.py,sha256=T2SyLy9pY_olLtstdC77moPxIiikVsuQLMxh92YMJQo,78
|
|
@@ -307,6 +311,13 @@ castor_extractor/warehouse/redshift/queries/table_freshness.sql,sha256=l61_ysmTE
|
|
|
307
311
|
castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6WfwsX6VavoMb2VqYA32f6Dt-_Y,170
|
|
308
312
|
castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
|
|
309
313
|
castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
|
|
314
|
+
castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
|
|
315
|
+
castor_extractor/warehouse/salesforce/client.py,sha256=_XiQJJJfELKGmzuBv8Mr_C0FJ-oLg71KbvpehrGvJ_k,3842
|
|
316
|
+
castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
|
|
317
|
+
castor_extractor/warehouse/salesforce/extract.py,sha256=ZTb58t7mqhavNvErrnw8M0L4Uu3qJpQEIldymurbgl0,3417
|
|
318
|
+
castor_extractor/warehouse/salesforce/format.py,sha256=_BSj_G6C-kPwRubxSx1WuHg-_nYVQVNgAANqNfXL5RM,2154
|
|
319
|
+
castor_extractor/warehouse/salesforce/format_test.py,sha256=6hy0USZH7-PDQt3oZ9_3Nwlr3eHLkqNEchqIM3bIDrU,858
|
|
320
|
+
castor_extractor/warehouse/salesforce/soql.py,sha256=81lAtPpq7ccmi6o1zkwqLKC1esOfSsfNObdizkfgiSM,1089
|
|
310
321
|
castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
|
|
311
322
|
castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCwPDh2uBV5aKc1Ql914,5555
|
|
312
323
|
castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
|
|
@@ -346,8 +357,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
346
357
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
347
358
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
348
359
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
349
|
-
castor_extractor-0.16.
|
|
350
|
-
castor_extractor-0.16.
|
|
351
|
-
castor_extractor-0.16.
|
|
352
|
-
castor_extractor-0.16.
|
|
353
|
-
castor_extractor-0.16.
|
|
360
|
+
castor_extractor-0.16.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
361
|
+
castor_extractor-0.16.4.dist-info/METADATA,sha256=-D39Tmu_LFDHRe3HrZ542JjZxl0puzZr0n8wMkW52P0,6370
|
|
362
|
+
castor_extractor-0.16.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
363
|
+
castor_extractor-0.16.4.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
|
|
364
|
+
castor_extractor-0.16.4.dist-info/RECORD,,
|
|
@@ -11,7 +11,8 @@ castor-extract-postgres=castor_extractor.commands.extract_postgres:main
|
|
|
11
11
|
castor-extract-powerbi=castor_extractor.commands.extract_powerbi:main
|
|
12
12
|
castor-extract-qlik=castor_extractor.commands.extract_qlik:main
|
|
13
13
|
castor-extract-redshift=castor_extractor.commands.extract_redshift:main
|
|
14
|
-
castor-extract-salesforce
|
|
14
|
+
castor-extract-salesforce=castor_extractor.commands.extract_salesforce:main
|
|
15
|
+
castor-extract-salesforce-viz=castor_extractor.commands.extract_salesforce_reporting:main
|
|
15
16
|
castor-extract-sigma=castor_extractor.commands.extract_sigma:main
|
|
16
17
|
castor-extract-snowflake=castor_extractor.commands.extract_snowflake:main
|
|
17
18
|
castor-extract-sqlserver=castor_extractor.commands.extract_sqlserver:main
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class SalesforceCredentials:
|
|
5
|
-
"""
|
|
6
|
-
Class to handle Salesforce rest API permissions
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
def __init__(
|
|
10
|
-
self,
|
|
11
|
-
*,
|
|
12
|
-
username: str,
|
|
13
|
-
password: str,
|
|
14
|
-
security_token: str,
|
|
15
|
-
consumer_key: str,
|
|
16
|
-
consumer_secret: str,
|
|
17
|
-
):
|
|
18
|
-
self.username = username
|
|
19
|
-
self.password = password + security_token
|
|
20
|
-
self.consumer_key = consumer_key
|
|
21
|
-
self.consumer_secret = consumer_secret
|
|
22
|
-
|
|
23
|
-
def token_request_payload(self) -> Dict[str, str]:
|
|
24
|
-
"""
|
|
25
|
-
Params to post to the API in order to retrieve the authentication token
|
|
26
|
-
"""
|
|
27
|
-
return {
|
|
28
|
-
"grant_type": "password",
|
|
29
|
-
"client_id": self.consumer_key,
|
|
30
|
-
"client_secret": self.consumer_secret,
|
|
31
|
-
"username": self.username,
|
|
32
|
-
"password": self.password,
|
|
33
|
-
}
|
|
File without changes
|
|
File without changes
|