castor-extractor 0.16.3__py3-none-any.whl → 0.16.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (32) hide show
  1. CHANGELOG.md +4 -0
  2. castor_extractor/commands/extract_databricks.py +3 -0
  3. castor_extractor/commands/extract_salesforce.py +43 -0
  4. castor_extractor/commands/extract_salesforce_reporting.py +6 -6
  5. castor_extractor/utils/client/api.py +36 -27
  6. castor_extractor/utils/salesforce/__init__.py +3 -0
  7. castor_extractor/utils/salesforce/client.py +84 -0
  8. castor_extractor/utils/salesforce/client_test.py +21 -0
  9. castor_extractor/utils/salesforce/constants.py +13 -0
  10. castor_extractor/utils/salesforce/credentials.py +65 -0
  11. castor_extractor/{visualization/salesforce_reporting/client → utils/salesforce}/credentials_test.py +3 -2
  12. castor_extractor/visualization/salesforce_reporting/__init__.py +1 -2
  13. castor_extractor/visualization/salesforce_reporting/client/__init__.py +1 -2
  14. castor_extractor/visualization/salesforce_reporting/client/rest.py +7 -90
  15. castor_extractor/visualization/salesforce_reporting/extract.py +10 -8
  16. castor_extractor/warehouse/databricks/client.py +1 -1
  17. castor_extractor/warehouse/databricks/credentials.py +1 -4
  18. castor_extractor/warehouse/databricks/extract.py +1 -1
  19. castor_extractor/warehouse/salesforce/__init__.py +6 -0
  20. castor_extractor/warehouse/salesforce/client.py +112 -0
  21. castor_extractor/warehouse/salesforce/constants.py +2 -0
  22. castor_extractor/warehouse/salesforce/extract.py +111 -0
  23. castor_extractor/warehouse/salesforce/format.py +67 -0
  24. castor_extractor/warehouse/salesforce/format_test.py +32 -0
  25. castor_extractor/warehouse/salesforce/soql.py +45 -0
  26. {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/METADATA +1 -1
  27. {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/RECORD +30 -19
  28. {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/entry_points.txt +2 -1
  29. castor_extractor/visualization/salesforce_reporting/client/constants.py +0 -2
  30. castor_extractor/visualization/salesforce_reporting/client/credentials.py +0 -33
  31. {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/LICENCE +0 -0
  32. {castor_extractor-0.16.3.dist-info → castor_extractor-0.16.4.dist-info}/WHEEL +0 -0
CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.16.4 - 2024-04-25
4
+
5
+ * Salesforce: extract sobjects and fields
6
+
3
7
  ## 0.16.3 - 2024-04-24
4
8
 
5
9
  * Databricks: Extract table owners
@@ -1,7 +1,10 @@
1
+ import logging
1
2
  from argparse import ArgumentParser
2
3
 
3
4
  from castor_extractor.warehouse import databricks # type: ignore
4
5
 
6
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
7
+
5
8
 
6
9
  def main():
7
10
  parser = ArgumentParser()
@@ -0,0 +1,43 @@
1
+ import logging
2
+ from argparse import ArgumentParser
3
+
4
+ from castor_extractor.warehouse import salesforce # type: ignore
5
+
6
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
7
+
8
+
9
+ def main():
10
+ parser = ArgumentParser()
11
+
12
+ parser.add_argument("-u", "--username", help="Salesforce username")
13
+ parser.add_argument("-p", "--password", help="Salesforce password")
14
+ parser.add_argument("-c", "--client-id", help="Salesforce client id")
15
+ parser.add_argument(
16
+ "-s", "--client-secret", help="Salesforce client secret"
17
+ )
18
+ parser.add_argument(
19
+ "-t", "--security-token", help="Salesforce security token"
20
+ )
21
+ parser.add_argument("-b", "--base-url", help="Salesforce instance URL")
22
+ parser.add_argument("-o", "--output", help="Directory to write to")
23
+
24
+ parser.add_argument(
25
+ "--skip-existing",
26
+ dest="skip_existing",
27
+ action="store_true",
28
+ help="Skips files already extracted instead of replacing them",
29
+ )
30
+ parser.set_defaults(skip_existing=False)
31
+
32
+ args = parser.parse_args()
33
+
34
+ salesforce.extract_all(
35
+ username=args.username,
36
+ password=args.password,
37
+ client_id=args.client_id,
38
+ client_secret=args.client_secret,
39
+ security_token=args.security_token,
40
+ base_url=args.base_url,
41
+ output_directory=args.output,
42
+ skip_existing=args.skip_existing,
43
+ )
@@ -11,23 +11,23 @@ def main():
11
11
 
12
12
  parser.add_argument("-u", "--username", help="Salesforce username")
13
13
  parser.add_argument("-p", "--password", help="Salesforce password")
14
- parser.add_argument("-k", "--consumer-key", help="Salesforce consumer key")
14
+ parser.add_argument("-c", "--client-id", help="Salesforce client id")
15
15
  parser.add_argument(
16
- "-s", "--consumer-secret", help="Salesforce consumer secret"
16
+ "-s", "--client-secret", help="Salesforce client secret"
17
17
  )
18
18
  parser.add_argument(
19
19
  "-t", "--security-token", help="Salesforce security token"
20
20
  )
21
- parser.add_argument("-l", "--url", help="Salesforce instance URL")
21
+ parser.add_argument("-b", "--base-url", help="Salesforce instance URL")
22
22
  parser.add_argument("-o", "--output", help="Directory to write to")
23
23
 
24
24
  args = parser.parse_args()
25
25
  salesforce_reporting.extract_all(
26
26
  username=args.username,
27
27
  password=args.password,
28
- consumer_key=args.consumer_key,
29
- consumer_secret=args.consumer_secret,
28
+ client_id=args.client_id,
29
+ client_secret=args.client_secret,
30
30
  security_token=args.security_token,
31
- instance_url=args.url,
31
+ base_url=args.base_url,
32
32
  output_directory=args.output,
33
33
  )
@@ -1,23 +1,25 @@
1
- import json
2
- from typing import Optional
1
+ import logging
2
+ from typing import Any, Callable, Dict, Literal, Optional
3
3
 
4
4
  import requests
5
5
 
6
- from ...warehouse.databricks.credentials import DatabricksCredentials
6
+ logger = logging.getLogger(__name__)
7
7
 
8
8
  DEFAULT_TIMEOUT_MS = 30_000
9
- APICredentials = DatabricksCredentials
9
+
10
+ # https://requests.readthedocs.io/en/latest/api/#requests.request
11
+ HttpMethod = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
10
12
 
11
13
 
12
14
  class APIClient:
13
15
  """
14
16
  API client
15
- - used for Databricks Unity Catalog for now
16
- - authentication via access token for now
17
+ - authentication via access token
17
18
  """
18
19
 
19
- def __init__(self, credentials: APICredentials):
20
- self.credentials = credentials
20
+ def __init__(self, host: str, token: Optional[str] = None):
21
+ self._host = host
22
+ self._token = token or ""
21
23
  self._timeout = DEFAULT_TIMEOUT_MS
22
24
 
23
25
  @staticmethod
@@ -26,25 +28,32 @@ class APIClient:
26
28
  host = "https://" + host
27
29
  return f"{host.strip('/')}/{path}"
28
30
 
29
- def _headers(self):
30
- return {
31
- "Content-type": "application/json",
32
- "Authorization": f"Bearer {self.credentials.token}",
33
- }
34
-
35
- def get(self, path: str, payload: Optional[dict] = None) -> dict:
36
- """
37
- path: REST API operation path, such as /api/2.0/clusters/get
38
- """
39
- url = self.build_url(self.credentials.host, path)
40
- response = requests.get(
41
- url,
42
- data=json.dumps(payload or dict()),
43
- headers=self._headers(),
44
- timeout=self._timeout,
31
+ def _headers(self) -> Dict[str, str]:
32
+ if self._token:
33
+ return {"Authorization": f"Bearer {self._token}"}
34
+ return dict()
35
+
36
+ def _call(
37
+ self,
38
+ url: str,
39
+ method: HttpMethod = "GET",
40
+ *,
41
+ params: Optional[dict] = None,
42
+ data: Optional[dict] = None,
43
+ processor: Optional[Callable] = None,
44
+ ) -> Any:
45
+ logger.debug(f"Calling {method} on {url}")
46
+ result = requests.request(
47
+ method, url, headers=self._headers(), params=params, json=data
45
48
  )
49
+ result.raise_for_status()
46
50
 
47
- if response.content:
48
- return json.loads(response.content)
51
+ if processor:
52
+ return processor(result)
49
53
 
50
- return {}
54
+ return result.json()
55
+
56
+ def get(self, path: str, payload: Optional[dict] = None) -> dict:
57
+ """path: REST API operation path, such as /api/2.0/clusters/get"""
58
+ url = self.build_url(self._host, path)
59
+ return self._call(url=url, data=payload)
@@ -0,0 +1,3 @@
1
+ from .client import SalesforceBaseClient
2
+ from .constants import Keys
3
+ from .credentials import SalesforceCredentials, to_credentials
@@ -0,0 +1,84 @@
1
+ import logging
2
+ from typing import Iterator, Optional, Tuple
3
+
4
+ from requests import Response
5
+
6
+ from ...utils.client.api import APIClient
7
+ from .constants import DEFAULT_API_VERSION, DEFAULT_PAGINATION_LIMIT
8
+ from .credentials import SalesforceCredentials
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SalesforceBaseClient(APIClient):
14
+ """
15
+ Salesforce API client.
16
+ https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_rest.htm
17
+ """
18
+
19
+ api_version = DEFAULT_API_VERSION
20
+ pagination_limit = DEFAULT_PAGINATION_LIMIT
21
+
22
+ PATH_TPL = "services/data/v{version}/{suffix}"
23
+
24
+ def __init__(self, credentials: SalesforceCredentials):
25
+ super().__init__(host=credentials.base_url)
26
+ self._token = self._access_token(credentials)
27
+
28
+ def _access_token(self, credentials: SalesforceCredentials) -> str:
29
+ url = self.build_url(self._host, "services/oauth2/token")
30
+ response = self._call(
31
+ url, "POST", params=credentials.token_request_payload()
32
+ )
33
+ return response["access_token"]
34
+
35
+ def _full_url(self, suffix: str) -> str:
36
+ path = self.PATH_TPL.format(version=self.api_version, suffix=suffix)
37
+ return self.build_url(self._host, path)
38
+
39
+ @property
40
+ def query_url(self) -> str:
41
+ """Returns the query API url"""
42
+ return self._full_url("query")
43
+
44
+ @property
45
+ def tooling_url(self) -> str:
46
+ """Returns the tooling API url"""
47
+ return self._full_url("tooling/query")
48
+
49
+ @staticmethod
50
+ def _query_processor(response: Response) -> Tuple[dict, Optional[str]]:
51
+ results = response.json()
52
+ return results["records"], results.get("nextRecordsUrl")
53
+
54
+ def _has_reached_pagination_limit(self, page_number: int) -> bool:
55
+ return page_number > self.pagination_limit
56
+
57
+ def _query_first_page(self, query: str) -> Tuple[Iterator[dict], str]:
58
+ url = self.query_url
59
+ logger.info("querying page 0")
60
+ records, next_page_url = self._call(
61
+ url, params={"q": query}, processor=self._query_processor
62
+ )
63
+ return records, next_page_url
64
+
65
+ def _query_all(self, query: str) -> Iterator[dict]:
66
+ """
67
+ Run a SOQL query over salesforce API.
68
+
69
+ more: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm
70
+ """
71
+ records, next_page_path = self._query_first_page(query)
72
+ yield from records
73
+
74
+ page_count = 1
75
+ while next_page_path and not self._has_reached_pagination_limit(
76
+ page_count
77
+ ):
78
+ logger.info(f"querying page {page_count}")
79
+ url = self.build_url(self._host, next_page_path)
80
+ records, next_page = self._call(
81
+ url, processor=self._query_processor
82
+ )
83
+ yield from records
84
+ page_count += 1
@@ -0,0 +1,21 @@
1
+ from unittest.mock import patch
2
+
3
+ from .client import SalesforceBaseClient
4
+ from .credentials import SalesforceCredentials
5
+
6
+
7
+ @patch.object(SalesforceBaseClient, "_call")
8
+ def test_SalesforceBaseClient__urls(mock_call):
9
+ mock_call.return_value = {"access_token": "the_token"}
10
+ credentials = SalesforceCredentials(
11
+ username="usr",
12
+ password="pw",
13
+ client_id="key",
14
+ client_secret="secret",
15
+ security_token="token",
16
+ base_url="url",
17
+ )
18
+ client = SalesforceBaseClient(credentials)
19
+
20
+ assert client.query_url == "https://url/services/data/v59.0/query"
21
+ assert client.tooling_url == "https://url/services/data/v59.0/tooling/query"
@@ -0,0 +1,13 @@
1
+ DEFAULT_API_VERSION = 59.0
2
+ DEFAULT_PAGINATION_LIMIT = 100
3
+
4
+
5
+ class Keys:
6
+ """Salesforce's credentials keys"""
7
+
8
+ USERNAME = "username"
9
+ PASSWORD = "password" # noqa: S105
10
+ CLIENT_ID = "client_id"
11
+ CLIENT_SECRET = "client_secret" # noqa: S105
12
+ SECURITY_TOKEN = "security_token" # noqa: S105
13
+ BASE_URL = "base_url"
@@ -0,0 +1,65 @@
1
+ from typing import Dict
2
+
3
+ from ...utils import from_env
4
+ from .constants import Keys
5
+
6
+ _USERNAME = "CASTOR_SALESFORCE_USERNAME"
7
+ _PASSWORD = "CASTOR_SALESFORCE_PASSWORD" # noqa: S105
8
+ _SECURITY_TOKEN = "CASTOR_SALESFORCE_SECURITY_TOKEN" # noqa: S105
9
+ _CLIENT_ID = "CASTOR_SALESFORCE_CLIENT_ID"
10
+ _CLIENT_SECRET = "CASTOR_SALESFORCE_CLIENT_SECRET" # noqa: S105
11
+ _BASE_URL = "CASTOR_SALESFORCE_BASE_URL"
12
+
13
+
14
+ class SalesforceCredentials:
15
+ """
16
+ Class to handle Salesforce rest API permissions
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ *,
22
+ username: str,
23
+ password: str,
24
+ security_token: str,
25
+ client_id: str,
26
+ client_secret: str,
27
+ base_url: str,
28
+ ):
29
+ self.username = username
30
+ self.password = password + security_token
31
+ self.client_id = client_id
32
+ self.client_secret = client_secret
33
+ self.base_url = base_url
34
+
35
+ def token_request_payload(self) -> Dict[str, str]:
36
+ """
37
+ Params to post to the API in order to retrieve the authentication token
38
+ """
39
+ return {
40
+ "grant_type": "password",
41
+ "client_id": self.client_id,
42
+ "client_secret": self.client_secret,
43
+ "username": self.username,
44
+ "password": self.password,
45
+ }
46
+
47
+
48
+ def to_credentials(params: dict) -> SalesforceCredentials:
49
+ """extract Salesforce credentials"""
50
+ username = params.get(Keys.USERNAME) or from_env(_USERNAME)
51
+ password = params.get(Keys.PASSWORD) or from_env(_PASSWORD)
52
+ security_token = params.get(Keys.SECURITY_TOKEN) or from_env(
53
+ _SECURITY_TOKEN
54
+ )
55
+ client_id = params.get(Keys.CLIENT_ID) or from_env(_CLIENT_ID)
56
+ client_secret = params.get(Keys.CLIENT_SECRET) or from_env(_CLIENT_SECRET)
57
+ base_url = params.get(Keys.BASE_URL) or from_env(_BASE_URL)
58
+ return SalesforceCredentials(
59
+ username=username,
60
+ password=password,
61
+ client_id=client_id,
62
+ client_secret=client_secret,
63
+ security_token=security_token,
64
+ base_url=base_url,
65
+ )
@@ -5,9 +5,10 @@ def test_Credentials_token_request_payload():
5
5
  creds = SalesforceCredentials(
6
6
  username="giphy",
7
7
  password="1312",
8
- consumer_key="degenie",
9
- consumer_secret="fautpasledire",
8
+ client_id="degenie",
9
+ client_secret="fautpasledire",
10
10
  security_token="yo",
11
+ base_url="man",
11
12
  )
12
13
 
13
14
  payload = creds.token_request_payload()
@@ -1,4 +1,3 @@
1
1
  from .assets import SalesforceReportingAsset
2
- from .client import SalesforceClient
3
- from .client.credentials import SalesforceCredentials
2
+ from .client import SalesforceReportingClient
4
3
  from .extract import extract_all
@@ -1,2 +1 @@
1
- from .credentials import SalesforceCredentials
2
- from .rest import SalesforceClient
1
+ from .rest import SalesforceReportingClient
@@ -1,13 +1,8 @@
1
1
  import logging
2
- import os
3
- from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
4
-
5
- import requests
6
- from requests import Response
2
+ from typing import Dict, Iterator, List, Optional
7
3
 
4
+ from ....utils.salesforce import SalesforceBaseClient
8
5
  from ..assets import SalesforceReportingAsset
9
- from .constants import DEFAULT_API_VERSION, DEFAULT_PAGINATION_LIMIT
10
- from .credentials import SalesforceCredentials
11
6
  from .soql import queries
12
7
 
13
8
  logger = logging.getLogger(__name__)
@@ -19,89 +14,11 @@ REQUIRING_URL_ASSETS = (
19
14
  )
20
15
 
21
16
 
22
- class SalesforceClient:
17
+ class SalesforceReportingClient(SalesforceBaseClient):
23
18
  """
24
- Salesforce API client.
25
- https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_rest.htm
19
+ Salesforce Reporting API client
26
20
  """
27
21
 
28
- api_version = DEFAULT_API_VERSION
29
- pagination_limit = DEFAULT_PAGINATION_LIMIT
30
-
31
- def __init__(
32
- self,
33
- credentials: SalesforceCredentials,
34
- instance_url: str,
35
- ):
36
- self.credentials = credentials
37
- self.instance_url = instance_url
38
- self._token = self._access_token()
39
-
40
- def _access_token(self) -> Tuple[str, str]:
41
- url = f"{self.instance_url}/services/oauth2/token"
42
- response = self._call(
43
- url, "POST", data=self.credentials.token_request_payload()
44
- )
45
- return response["access_token"]
46
-
47
- def _header(self) -> Dict:
48
- return {"Authorization": f"Bearer {self._token}"}
49
-
50
- @staticmethod
51
- def _call(
52
- url: str,
53
- method: str = "GET",
54
- *,
55
- header: Optional[Dict] = None,
56
- params: Optional[Dict] = None,
57
- data: Optional[Dict] = None,
58
- processor: Optional[Callable] = None,
59
- ) -> Any:
60
- logger.debug(f"Calling {method} on {url}")
61
- result = requests.request(
62
- method,
63
- url,
64
- headers=header,
65
- params=params,
66
- data=data,
67
- )
68
- result.raise_for_status()
69
-
70
- if processor:
71
- return processor(result)
72
-
73
- return result.json()
74
-
75
- @staticmethod
76
- def _query_processor(response: Response) -> Tuple[dict, Optional[str]]:
77
- results = response.json()
78
- return results["records"], results.get("nextRecordsUrl")
79
-
80
- def _query_all(self, query: str) -> Iterator[Dict]:
81
- """
82
- Run a SOQL query over salesforce API.
83
-
84
- more: https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_query.htm
85
- """
86
- url = f"{self.instance_url}/services/data/v{self.api_version}/query"
87
- records, next_page = self._call(
88
- url,
89
- params={"q": query},
90
- processor=self._query_processor,
91
- header=self._header(),
92
- )
93
- yield from records
94
-
95
- page_count = 0
96
- while next_page and page_count <= self.pagination_limit:
97
- logger.info(f"querying page {page_count}")
98
- url = f"{self.instance_url}{next_page}"
99
- records, next_page = self._call(
100
- url, processor=self._query_processor, header=self._header()
101
- )
102
- yield from records
103
- page_count += 1
104
-
105
22
  def _get_asset_url(
106
23
  self, asset_type: SalesforceReportingAsset, asset: dict
107
24
  ) -> Optional[str]:
@@ -111,15 +28,15 @@ class SalesforceClient:
111
28
 
112
29
  if asset_type == SalesforceReportingAsset.DASHBOARDS:
113
30
  path = f"lightning/r/Dashboard/{asset['Id']}/view"
114
- return os.path.join(self.instance_url, path)
31
+ return self.build_url(self._host, path)
115
32
 
116
33
  if asset_type == SalesforceReportingAsset.FOLDERS:
117
34
  path = asset["attributes"]["url"].lstrip("/")
118
- return os.path.join(self.instance_url, path)
35
+ return self.build_url(self._host, path)
119
36
 
120
37
  if asset_type == SalesforceReportingAsset.REPORTS:
121
38
  path = f"lightning/r/Report/{asset['Id']}/view"
122
- return os.path.join(self.instance_url, path)
39
+ return self.build_url(self._host, path)
123
40
 
124
41
  return None
125
42
 
@@ -10,14 +10,15 @@ from ...utils import (
10
10
  write_json,
11
11
  write_summary,
12
12
  )
13
+ from ...utils.salesforce import SalesforceCredentials
13
14
  from .assets import SalesforceReportingAsset
14
- from .client import SalesforceClient, SalesforceCredentials
15
+ from .client import SalesforceReportingClient
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
18
19
 
19
20
  def iterate_all_data(
20
- client: SalesforceClient,
21
+ client: SalesforceReportingClient,
21
22
  ) -> Iterable[Tuple[str, Union[list, dict]]]:
22
23
  """Iterate over the extracted data from Salesforce"""
23
24
 
@@ -30,10 +31,10 @@ def iterate_all_data(
30
31
  def extract_all(
31
32
  username: str,
32
33
  password: str,
33
- consumer_key: str,
34
- consumer_secret: str,
34
+ client_id: str,
35
+ client_secret: str,
35
36
  security_token: str,
36
- instance_url: str,
37
+ base_url: str,
37
38
  output_directory: Optional[str] = None,
38
39
  ) -> None:
39
40
  """
@@ -44,11 +45,12 @@ def extract_all(
44
45
  creds = SalesforceCredentials(
45
46
  username=username,
46
47
  password=password,
47
- consumer_key=consumer_key,
48
- consumer_secret=consumer_secret,
48
+ client_id=client_id,
49
+ client_secret=client_secret,
49
50
  security_token=security_token,
51
+ base_url=base_url,
50
52
  )
51
- client = SalesforceClient(credentials=creds, instance_url=instance_url)
53
+ client = SalesforceReportingClient(credentials=creds)
52
54
  ts = current_timestamp()
53
55
 
54
56
  for key, data in iterate_all_data(client):
@@ -31,7 +31,7 @@ class DatabricksClient(APIClient):
31
31
  db_allowed: Optional[Set[str]] = None,
32
32
  db_blocked: Optional[Set[str]] = None,
33
33
  ):
34
- super().__init__(credentials)
34
+ super().__init__(host=credentials.host, token=credentials.token)
35
35
  self._db_allowed = db_allowed
36
36
  self._db_blocked = db_blocked
37
37
  self.formatter = DatabricksFormatter()
@@ -25,7 +25,4 @@ def to_credentials(params: dict) -> DatabricksCredentials:
25
25
  """extract Databricks credentials"""
26
26
  host = params.get("host") or from_env(_HOST)
27
27
  token = params.get("token") or from_env(_TOKEN)
28
- return DatabricksCredentials(
29
- host=host,
30
- token=token,
31
- )
28
+ return DatabricksCredentials(host=host, token=token)
@@ -43,7 +43,7 @@ class DatabricksExtractionProcessor:
43
43
  self._storage = storage
44
44
  self._skip_existing = skip_existing
45
45
 
46
- def _should_not_reextract(self, asset_group) -> bool:
46
+ def _should_not_reextract(self, asset_group: WarehouseAssetGroup) -> bool:
47
47
  """helper function to determine whether we need to extract"""
48
48
  if not self._skip_existing:
49
49
  return False
@@ -0,0 +1,6 @@
1
+ from .client import SalesforceClient
2
+ from .extract import (
3
+ SALESFORCE_ASSETS,
4
+ SalesforceExtractionProcessor,
5
+ extract_all,
6
+ )
@@ -0,0 +1,112 @@
1
+ import logging
2
+ from typing import Dict, Iterator, List
3
+
4
+ from tqdm import tqdm # type: ignore
5
+
6
+ from ...utils.salesforce import SalesforceBaseClient, SalesforceCredentials
7
+ from .format import SalesforceFormatter
8
+ from .soql import SOBJECT_FIELDS_QUERY_TPL, SOBJECTS_QUERY_TPL
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SalesforceClient(SalesforceBaseClient):
14
+ """
15
+ Salesforce API client to extract sobjects
16
+ """
17
+
18
+ # Implicit (hard-coded in Salesforce) limitation when using SOQL of 2,000 rows
19
+ LIMIT_RECORDS_PER_PAGE = 2000
20
+
21
+ def __init__(self, credentials: SalesforceCredentials):
22
+ super().__init__(credentials)
23
+ self.formatter = SalesforceFormatter()
24
+
25
+ @staticmethod
26
+ def name() -> str:
27
+ return "Salesforce"
28
+
29
+ def _format_query(self, query_template: str, start_durable_id: str) -> str:
30
+ return query_template.format(
31
+ start_durable_id=start_durable_id,
32
+ limit=self.LIMIT_RECORDS_PER_PAGE,
33
+ )
34
+
35
+ def _next_records(
36
+ self, url: str, query_template: str, start_durable_id: str = "0000"
37
+ ) -> List[dict]:
38
+ query = self._format_query(
39
+ query_template, start_durable_id=start_durable_id
40
+ )
41
+ records, _ = self._call(
42
+ url, params={"q": query}, processor=self._query_processor
43
+ )
44
+ return records
45
+
46
+ def _is_last_page(self, records: List[dict]) -> bool:
47
+ return len(records) < self.LIMIT_RECORDS_PER_PAGE
48
+
49
+ def _should_query_next_page(
50
+ self, records: List[dict], page_number: int
51
+ ) -> bool:
52
+ return not (
53
+ self._is_last_page(records)
54
+ or self._has_reached_pagination_limit(page_number)
55
+ )
56
+
57
+ def _query_all(self, query_template: str) -> Iterator[dict]:
58
+ """
59
+ Run a SOQL query over salesforce API
60
+
61
+ Note, pagination is performed via a LIMIT in the SOQL query and requires
62
+ that ids are sorted. The SOQL query must support `limit` and
63
+ `start_durable_id` as parameters.
64
+ """
65
+ url = self.query_url
66
+ logger.info("querying page 0")
67
+ records = self._next_records(url, query_template)
68
+ yield from records
69
+
70
+ page_count = 1
71
+ while self._should_query_next_page(records, page_count):
72
+ logger.info(f"querying page {page_count}")
73
+ last_durable_id = records[-1]["DurableId"]
74
+ records = self._next_records(
75
+ url, query_template, start_durable_id=last_durable_id
76
+ )
77
+ yield from records
78
+ page_count += 1
79
+
80
+ def fetch_sobjects(self) -> List[dict]:
81
+ """Fetch all sobjects"""
82
+ logger.info("Extracting sobjects")
83
+ return list(self._query_all(SOBJECTS_QUERY_TPL))
84
+
85
+ def fetch_fields(self, sobject_name: str) -> List[dict]:
86
+ """Fetches fields of a given sobject"""
87
+ query = SOBJECT_FIELDS_QUERY_TPL.format(
88
+ entity_definition_id=sobject_name
89
+ )
90
+ response = self._call(self.tooling_url, params={"q": query})
91
+ return response["records"]
92
+
93
+ def tables(self) -> List[dict]:
94
+ """
95
+ Get Salesforce sobjects as tables
96
+ """
97
+ sobjects = self.fetch_sobjects()
98
+ logger.info(f"Extracted {len(sobjects)} sobjects")
99
+ return self.formatter.tables(sobjects)
100
+
101
+ def columns(
102
+ self, sobject_names: List[str], show_progress: bool = True
103
+ ) -> List[dict]:
104
+ """
105
+ Get salesforce sobject fields as columns
106
+ show_progress: optionally deactivate the tqdm progress bar
107
+ """
108
+ sobject_fields: Dict[str, List[dict]] = dict()
109
+ for sobject_name in tqdm(sobject_names, disable=not show_progress):
110
+ fields = self.fetch_fields(sobject_name)
111
+ sobject_fields[sobject_name] = fields
112
+ return self.formatter.columns(sobject_fields)
@@ -0,0 +1,2 @@
1
+ DATABASE_NAME = "salesforce"
2
+ SCHEMA_NAME = "schema"
@@ -0,0 +1,111 @@
1
+ import logging
2
+ from typing import Dict, List, Tuple
3
+
4
+ from ...utils import AbstractStorage, LocalStorage, write_summary
5
+ from ...utils.salesforce import to_credentials
6
+ from ..abstract import (
7
+ SupportedAssets,
8
+ WarehouseAsset,
9
+ WarehouseAssetGroup,
10
+ common_args,
11
+ )
12
+ from .client import SalesforceClient
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ Paths = Dict[str, str]
18
+
19
+ SALESFORCE_CATALOG_ASSETS: Tuple[WarehouseAsset, ...] = (
20
+ WarehouseAsset.TABLE,
21
+ WarehouseAsset.COLUMN,
22
+ )
23
+
24
+ SALESFORCE_ASSETS: SupportedAssets = {
25
+ WarehouseAssetGroup.CATALOG: SALESFORCE_CATALOG_ASSETS
26
+ }
27
+
28
+
29
+ class SalesforceExtractionProcessor:
30
+ """Salesforce API-based extraction management - warehouse part"""
31
+
32
+ def __init__(
33
+ self,
34
+ client: SalesforceClient,
35
+ storage: AbstractStorage,
36
+ skip_existing: bool = False,
37
+ ):
38
+ self._client = client
39
+ self._storage = storage
40
+ self._skip_existing = skip_existing
41
+
42
+ def _should_extract(self) -> bool:
43
+ """helper function to determine whether we need to extract"""
44
+ if not self._skip_existing:
45
+ return True
46
+
47
+ for asset in SALESFORCE_CATALOG_ASSETS:
48
+ if not self._storage.exists(asset.value):
49
+ return True
50
+
51
+ logger.info("Skipped, files for catalog already exist")
52
+ return False
53
+
54
+ def _existing_group_paths(self) -> Paths:
55
+ return {
56
+ a.value: self._storage.path(a.value)
57
+ for a in SALESFORCE_CATALOG_ASSETS
58
+ }
59
+
60
+ def extract_catalog(self, show_progress: bool = True) -> Paths:
61
+ """
62
+ Extract the following catalog assets: tables and columns
63
+ and return the locations of the extracted data
64
+ """
65
+ if not self._should_extract():
66
+ return self._existing_group_paths()
67
+
68
+ catalog_locations: Paths = dict()
69
+
70
+ tables = self._client.tables()
71
+ location = self._storage.put(WarehouseAsset.TABLE.value, tables)
72
+ catalog_locations[WarehouseAsset.TABLE.value] = location
73
+ logger.info(f"Extracted {len(tables)} tables to {location}")
74
+
75
+ table_names = [t["table_name"] for t in tables]
76
+ columns = self._client.columns(table_names, show_progress)
77
+ location = self._storage.put(WarehouseAsset.COLUMN.value, columns)
78
+ catalog_locations[WarehouseAsset.COLUMN.value] = location
79
+ logger.info(f"Extracted {len(columns)} columns to {location}")
80
+ return catalog_locations
81
+
82
+ def extract_role(self) -> Paths:
83
+ """extract no users and return the empty file location"""
84
+ users: List[dict] = []
85
+ location = self._storage.put(WarehouseAsset.USER.value, users)
86
+ logger.info(f"Extracted {len(users)} users to {location}")
87
+ return {WarehouseAsset.USER.value: location}
88
+
89
+
90
+ def extract_all(**kwargs) -> None:
91
+ """
92
+ Extract all assets from Salesforce and store the results in CSV files
93
+ """
94
+ output_directory, skip_existing = common_args(kwargs)
95
+
96
+ client = SalesforceClient(credentials=to_credentials(kwargs))
97
+ storage = LocalStorage(directory=output_directory)
98
+ extractor = SalesforceExtractionProcessor(
99
+ client=client,
100
+ storage=storage,
101
+ skip_existing=skip_existing,
102
+ )
103
+
104
+ extractor.extract_catalog()
105
+ extractor.extract_role()
106
+
107
+ write_summary(
108
+ output_directory,
109
+ storage.stored_at_ts,
110
+ client_name=client.name(),
111
+ )
@@ -0,0 +1,67 @@
1
+ from typing import Any, Dict, List
2
+
3
+ from .constants import SCHEMA_NAME
4
+
5
+
6
+ def _clean(raw: str) -> str:
7
+ return raw.strip('"')
8
+
9
+
10
+ def _field_description(field: Dict[str, Any]) -> str:
11
+ context: Dict[str, str] = {}
12
+
13
+ field_definition: Dict[str, str] = field.get("FieldDefinition") or {}
14
+ if description := field_definition.get("Description"):
15
+ context["Description"] = _clean(description)
16
+ if help_text := field.get("InlineHelpText"):
17
+ context["Help Text"] = _clean(help_text)
18
+ if compliance_group := field_definition.get("ComplianceGroup"):
19
+ context["Compliance Categorization"] = _clean(compliance_group)
20
+ if security_level := field_definition.get("SecurityClassification"):
21
+ context["Data Sensitivity Level"] = _clean(security_level)
22
+
23
+ return "\n".join([f"- {k}: {v}" for k, v in context.items()])
24
+
25
+
26
+ def _to_column_payload(field: dict, position: int, table_name: str) -> dict:
27
+ field_name = field["QualifiedApiName"]
28
+ return {
29
+ "id": f"{table_name}.{field_name}",
30
+ "table_id": table_name,
31
+ "column_name": field_name,
32
+ "description": _field_description(field),
33
+ "data_type": field.get("DataType"),
34
+ "ordinal_position": position,
35
+ }
36
+
37
+
38
+ def _to_table_payload(table: dict) -> dict:
39
+ return {
40
+ "id": table["QualifiedApiName"],
41
+ "schema_id": SCHEMA_NAME,
42
+ "table_name": table["QualifiedApiName"],
43
+ "description": "",
44
+ "tags": [],
45
+ "type": "TABLE",
46
+ }
47
+
48
+
49
+ class SalesforceFormatter:
50
+ """
51
+ Helper functions that format the response in the format to be exported as
52
+ csv.
53
+ """
54
+
55
+ @staticmethod
56
+ def tables(sobjects: List[dict]) -> List[dict]:
57
+ """formats the raw list of sobjects to tables"""
58
+ return [_to_table_payload(s) for s in sobjects]
59
+
60
+ @staticmethod
61
+ def columns(sobject_fields: Dict[str, List[dict]]) -> List[dict]:
62
+ """formats the raw list of sobject fields to columns"""
63
+ return [
64
+ _to_column_payload(field, idx, table_name)
65
+ for table_name, fields in sobject_fields.items()
66
+ for idx, field in enumerate(fields)
67
+ ]
@@ -0,0 +1,32 @@
1
+ from .format import _field_description
2
+
3
+
4
+ def test__field_description():
5
+
6
+ field = {}
7
+ assert _field_description(field) == ""
8
+
9
+ definition = {}
10
+ field = {"FieldDefinition": definition}
11
+ assert _field_description(field) == ""
12
+
13
+ definition.update({"Description": "foo"})
14
+ assert "foo" in _field_description(field)
15
+
16
+ field.update({"InlineHelpText": "bar"})
17
+ assert "bar" in _field_description(field)
18
+
19
+ definition.update({"ComplianceGroup": "bim"})
20
+ assert "bim" in _field_description(field)
21
+
22
+ definition.update({"SecurityClassification": "bam"})
23
+ description = _field_description(field)
24
+
25
+ assert "bam" in description
26
+ expected = (
27
+ "- Description: foo\n"
28
+ "- Help Text: bar\n"
29
+ "- Compliance Categorization: bim\n"
30
+ "- Data Sensitivity Level: bam"
31
+ )
32
+ assert description == expected
@@ -0,0 +1,45 @@
1
+ SOBJECTS_QUERY_TPL = """
2
+ SELECT
3
+ DeploymentStatus,
4
+ DeveloperName,
5
+ DurableId,
6
+ ExternalSharingModel,
7
+ InternalSharingModel,
8
+ Label,
9
+ PluralLabel,
10
+ QualifiedApiName
11
+ FROM EntityDefinition
12
+ WHERE DurableId > '{start_durable_id}'
13
+ ORDER BY DurableId
14
+ LIMIT {limit}
15
+ """
16
+
17
+
18
+ SOBJECT_FIELDS_QUERY_TPL = """
19
+ SELECT
20
+ DataType,
21
+ DeveloperName,
22
+ Digits,
23
+ FieldDefinition.BusinessOwnerId,
24
+ FieldDefinition.ComplianceGroup,
25
+ FieldDefinition.DataType,
26
+ FieldDefinition.Description,
27
+ FieldDefinition.IsIndexed,
28
+ FieldDefinition.LastModifiedBy.Username,
29
+ FieldDefinition.LastModifiedDate,
30
+ FieldDefinition.SecurityClassification,
31
+ InlineHelpText,
32
+ IsComponent,
33
+ IsCompound,
34
+ IsNillable,
35
+ IsUnique,
36
+ Label,
37
+ Length,
38
+ Precision,
39
+ QualifiedApiName,
40
+ ReferenceTo,
41
+ RelationshipName,
42
+ Scale
43
+ FROM EntityParticle
44
+ WHERE EntityDefinitionId='{entity_definition_id}'
45
+ """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.16.3
3
+ Version: 0.16.4
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -1,11 +1,11 @@
1
- CHANGELOG.md,sha256=eRvmcZqJY1G4yDR2CzrA5wzf6xpeZM80HzVBw1tUynw,9959
1
+ CHANGELOG.md,sha256=6ApRuFb6ZxwvpMFyBRJAU6L7teZ01KK2tD0pXFvsYdw,10026
2
2
  Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
3
3
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
4
4
  README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
5
5
  castor_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  castor_extractor/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  castor_extractor/commands/extract_bigquery.py,sha256=dU4OiYO1V0n32orvZnMh1_xtFKF_VxHNXcVsH3otY-g,1269
8
- castor_extractor/commands/extract_databricks.py,sha256=gun8VK7bKMv5T198Z0wDihY83J2Y7xzKFEoxtXJ5O6s,1170
8
+ castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
9
9
  castor_extractor/commands/extract_domo.py,sha256=lwJ7XeYOeLMF2plf5PK3cL56N9n2yjcDsyRM6UFwKTM,1208
10
10
  castor_extractor/commands/extract_looker.py,sha256=gwjIQPOHrXevgU_o2l8vDHHQT8Sb-mGdwcceb6wJJbg,1483
11
11
  castor_extractor/commands/extract_metabase_api.py,sha256=VPyEKO2VFXzk_OsbQnDhObE9siuBfoegechCZYPZi2k,778
@@ -16,7 +16,8 @@ castor_extractor/commands/extract_postgres.py,sha256=pX0RnCPi4nw6QQ6wiAuZ_Xt3ZbD
16
16
  castor_extractor/commands/extract_powerbi.py,sha256=e6MXDNOafdp0w4ZtOnE5z5o_CxvaodUbbQFk__pDiM4,875
17
17
  castor_extractor/commands/extract_qlik.py,sha256=mSeyGOprTyBExes-lzp___7tgBS1KeyTVpwKAqMpkiw,989
18
18
  castor_extractor/commands/extract_redshift.py,sha256=bdLp7d7ImZoKCkWc3f3NXF1imIzMVT43_KPI-x4UVac,1155
19
- castor_extractor/commands/extract_salesforce_reporting.py,sha256=Nzb0cn9HlPxvZDQNTFk7r3rBpVpGO9a2WhwbPpKGa2M,1160
19
+ castor_extractor/commands/extract_salesforce.py,sha256=s2o799ePjQFYsVwZbrGEflzOIwJKtxUMb9pcF4-K90Y,1400
20
+ castor_extractor/commands/extract_salesforce_reporting.py,sha256=rmAo--dl_m2x7TtJ29w1PfsKt9tQDZocTdvwTqj-fnI,1146
20
21
  castor_extractor/commands/extract_sigma.py,sha256=agwfKj55C81-kect3K6xSJVBv3TDuPT6fGWSTytkQ2o,703
21
22
  castor_extractor/commands/extract_snowflake.py,sha256=vYiruxRoo--GeMemOGsSE1w9kcKTh_y4E165HtMVzkM,1982
22
23
  castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
@@ -46,7 +47,7 @@ castor_extractor/uploader/utils.py,sha256=NCe0tkB28BVhqzOaDhDjaSfODjjcPWB17X6chn
46
47
  castor_extractor/utils/__init__.py,sha256=cZbvEJ4G2IcJR2BzHwi3oOwDLqJsBx0J9gD71lWE1BQ,1149
47
48
  castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
48
49
  castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
49
- castor_extractor/utils/client/api.py,sha256=rRTp7brx-Wv-ky604kITdmLXcNQNZYpyedjaDicXM7Y,1324
50
+ castor_extractor/utils/client/api.py,sha256=tHa7eC11sS_eOCXhlnvUa2haRfOLENmjKgjB09Ijt0s,1664
50
51
  castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
51
52
  castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
52
53
  castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
@@ -82,6 +83,12 @@ castor_extractor/utils/retry.py,sha256=vYdJMiM-Nr82H1MuD7_KZdqbFz98ffQGqJ4Owbr6m
82
83
  castor_extractor/utils/retry_test.py,sha256=nsMttlmyKygVcffX3Hay8U2S1BspkGPiCmzIXPpLKyk,2230
83
84
  castor_extractor/utils/safe.py,sha256=jpfIimwdBSVUvU2DPFrhqpKC_DSYwxQqd08MlIkSODY,1967
84
85
  castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
86
+ castor_extractor/utils/salesforce/__init__.py,sha256=VGD4vd1Se79z2PAaVCvCSL3yhgWlhQFaVDLZ5aERug0,132
87
+ castor_extractor/utils/salesforce/client.py,sha256=Mt9yykAPROFgme5eDqoZQv4u85hxcUoG-tmKFPwLibo,2856
88
+ castor_extractor/utils/salesforce/client_test.py,sha256=s6UTogjC36jrJOnYA-gFuyTQsvROCt9y_eoD2O41xCg,682
89
+ castor_extractor/utils/salesforce/constants.py,sha256=5sph6dbTCp0mAGWP24WTpC1wsIqeG8yI8-BsKrmV_wA,335
90
+ castor_extractor/utils/salesforce/credentials.py,sha256=Wwb-_BlbFBJUl3dhXz72IIqcCfj1F3Zj3JoYr3FYk0A,2045
91
+ castor_extractor/utils/salesforce/credentials_test.py,sha256=FQRyNk2Jsh6KtYiW20oL43CVnGjXLcAjdFATkE7jK0s,586
85
92
  castor_extractor/utils/store.py,sha256=D_pVaPsu1MKAJC0K47O_vYTs-Afl6oejravAJdvjmGc,2040
86
93
  castor_extractor/utils/string.py,sha256=aW6bbjqEGnh9kT5KZBnMlV6fhdgOJ0ENCkCTDon1xA0,2377
87
94
  castor_extractor/utils/string_test.py,sha256=OmRVCJUXMcCTwY-QJDhUViYpxkvQQgNRJLCaXY0iUnk,2535
@@ -182,15 +189,12 @@ castor_extractor/visualization/qlik/client/rest.py,sha256=EkHEs3_Vrmy0Ex5b9M_klm
182
189
  castor_extractor/visualization/qlik/client/rest_test.py,sha256=Z2gBTokUVv-JapBtrY2nZDJzBtOusRq6_lJutVvzqG8,1684
183
190
  castor_extractor/visualization/qlik/constants.py,sha256=Pbd1SH3_VI_yEhoDx4PIXBUup-MqXUFjxDkDRr2V4J8,95
184
191
  castor_extractor/visualization/qlik/extract.py,sha256=1ulrirDzoKEdsWxztR6MHcUAE8CiEMx75esdUE7PAmY,2397
185
- castor_extractor/visualization/salesforce_reporting/__init__.py,sha256=8Y_gnyPj-oYaz9Wz1fTf6jmDhofPnNPHlGoKd47KaeM,169
192
+ castor_extractor/visualization/salesforce_reporting/__init__.py,sha256=MvArD0GKNIpCDvLIYcpKrjMjFLhMyDETK6i3k0Fb6Tk,124
186
193
  castor_extractor/visualization/salesforce_reporting/assets.py,sha256=2J-iAmJGGDufOcJUgE47M3-dEcjYXcVyVUNcmHrj79w,271
187
- castor_extractor/visualization/salesforce_reporting/client/__init__.py,sha256=RNQTeNLxVVR6MScvHfRekhOPsQjHKH1y4WogemVu4tc,82
188
- castor_extractor/visualization/salesforce_reporting/client/constants.py,sha256=7yPmUeyn4IHQiHLDutXE0L_OBd41E5080vFxqA_s4Dc,58
189
- castor_extractor/visualization/salesforce_reporting/client/credentials.py,sha256=gJapeUKs8gZSY_YdzX-j0Iv4vcaBzCTAlXMecO8Kk5k,875
190
- castor_extractor/visualization/salesforce_reporting/client/credentials_test.py,sha256=2qIYZ8QuuarHz5EZ9bU0sGEOvoDLnN0eiwsvnbtgXXY,567
191
- castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=_3wa5-bxKGwaNEwh-KLke3O6lbFWzOUAe1bL8n9hH04,4429
194
+ castor_extractor/visualization/salesforce_reporting/client/__init__.py,sha256=DIA6f_vNJZqT89qVYxg98Le7QeDn2y0Qew03V3J9t9o,44
195
+ castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=hzaXWLcYt0aAHXK46DbsLmzocjRY1llwrNj8_3TObKs,1849
192
196
  castor_extractor/visualization/salesforce_reporting/client/soql.py,sha256=DHTi058UEaZKZnxJVmsCouPfA9Lgr3gFY6vY7NiqyMM,1584
193
- castor_extractor/visualization/salesforce_reporting/extract.py,sha256=6cUMNrCz46DUS7xyF6eqKRKGP5VB0y_qwqhvVYfKSPE,1639
197
+ castor_extractor/visualization/salesforce_reporting/extract.py,sha256=5QwZwP27uXrFJSf00El7Ku592-9fhmCtTdiUGpNkHZM,1678
194
198
  castor_extractor/visualization/sigma/__init__.py,sha256=m98AEysUsVHQAWT6m5nvrtLMs22SDQH9G78-IcUwBoY,130
195
199
  castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
196
200
  castor_extractor/visualization/sigma/client/__init__.py,sha256=sFqsbcwilIxu75njtSbnAIsNlPdRgB39SAInNUf-nbQ,90
@@ -262,10 +266,10 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
262
266
  castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
263
267
  castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
264
268
  castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
265
- castor_extractor/warehouse/databricks/client.py,sha256=u1KpiG16IlFbaEVAIzBlxnzTk_bARGh-D0sZBXtgF4c,8043
269
+ castor_extractor/warehouse/databricks/client.py,sha256=FIqHjlGN5EN2dvcZD2941zPAomOye91JmkgPlxGDk0g,8078
266
270
  castor_extractor/warehouse/databricks/client_test.py,sha256=ctOQnUXosuuFjWGJKgkxjUcV4vQUBWt2BQ_f0Tyzqe4,2717
267
- castor_extractor/warehouse/databricks/credentials.py,sha256=sMpOAKhBklcmTpcr3mi3o8qLud__8PTZbQUT3K_TRY8,678
268
- castor_extractor/warehouse/databricks/extract.py,sha256=mgl1_b9Mlir9ZU3R5HV689YlhzzhlyVN8IaBHaNwY54,5752
271
+ castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
272
+ castor_extractor/warehouse/databricks/extract.py,sha256=-vJhAIxSu1lD_xGl-GXZYTmc5BGu0aXM3l-U0UghREM,5773
269
273
  castor_extractor/warehouse/databricks/format.py,sha256=LiPGCTPzL3gQQMMl1v6DvpcTk7BWxZFq03jnHdoYnuU,4968
270
274
  castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
271
275
  castor_extractor/warehouse/databricks/types.py,sha256=T2SyLy9pY_olLtstdC77moPxIiikVsuQLMxh92YMJQo,78
@@ -307,6 +311,13 @@ castor_extractor/warehouse/redshift/queries/table_freshness.sql,sha256=l61_ysmTE
307
311
  castor_extractor/warehouse/redshift/queries/user.sql,sha256=sEXveJAuNvZacvpI6WfwsX6VavoMb2VqYA32f6Dt-_Y,170
308
312
  castor_extractor/warehouse/redshift/queries/view_ddl.sql,sha256=Pkyh_QT6d4rhTeyiVcqw6O8CRl7NEhk2p7eM5YIn5kg,719
309
313
  castor_extractor/warehouse/redshift/query.py,sha256=0C81rkt2cpkWrJIxxwALDyqr-49vlqQM04y_N6wwStc,540
314
+ castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29deeN84CkN0_D_Z7CLQdJvFY,137
315
+ castor_extractor/warehouse/salesforce/client.py,sha256=_XiQJJJfELKGmzuBv8Mr_C0FJ-oLg71KbvpehrGvJ_k,3842
316
+ castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
317
+ castor_extractor/warehouse/salesforce/extract.py,sha256=ZTb58t7mqhavNvErrnw8M0L4Uu3qJpQEIldymurbgl0,3417
318
+ castor_extractor/warehouse/salesforce/format.py,sha256=_BSj_G6C-kPwRubxSx1WuHg-_nYVQVNgAANqNfXL5RM,2154
319
+ castor_extractor/warehouse/salesforce/format_test.py,sha256=6hy0USZH7-PDQt3oZ9_3Nwlr3eHLkqNEchqIM3bIDrU,858
320
+ castor_extractor/warehouse/salesforce/soql.py,sha256=81lAtPpq7ccmi6o1zkwqLKC1esOfSsfNObdizkfgiSM,1089
310
321
  castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
311
322
  castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCwPDh2uBV5aKc1Ql914,5555
312
323
  castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
@@ -346,8 +357,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
346
357
  castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
347
358
  castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
348
359
  castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
349
- castor_extractor-0.16.3.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
350
- castor_extractor-0.16.3.dist-info/METADATA,sha256=CaGi5itpnLSjjCv5PpKayJ2Oi859ewvcyrPFzHNIdYM,6370
351
- castor_extractor-0.16.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
352
- castor_extractor-0.16.3.dist-info/entry_points.txt,sha256=EQUCoNjSHevxmY5ZathX_fLZPcuBHng23rj0SSUrLtI,1345
353
- castor_extractor-0.16.3.dist-info/RECORD,,
360
+ castor_extractor-0.16.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
361
+ castor_extractor-0.16.4.dist-info/METADATA,sha256=-D39Tmu_LFDHRe3HrZ542JjZxl0puzZr0n8wMkW52P0,6370
362
+ castor_extractor-0.16.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
363
+ castor_extractor-0.16.4.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
364
+ castor_extractor-0.16.4.dist-info/RECORD,,
@@ -11,7 +11,8 @@ castor-extract-postgres=castor_extractor.commands.extract_postgres:main
11
11
  castor-extract-powerbi=castor_extractor.commands.extract_powerbi:main
12
12
  castor-extract-qlik=castor_extractor.commands.extract_qlik:main
13
13
  castor-extract-redshift=castor_extractor.commands.extract_redshift:main
14
- castor-extract-salesforce-viz=castor_extractor.commands.extract_salesforce_viz:main
14
+ castor-extract-salesforce=castor_extractor.commands.extract_salesforce:main
15
+ castor-extract-salesforce-viz=castor_extractor.commands.extract_salesforce_reporting:main
15
16
  castor-extract-sigma=castor_extractor.commands.extract_sigma:main
16
17
  castor-extract-snowflake=castor_extractor.commands.extract_snowflake:main
17
18
  castor-extract-sqlserver=castor_extractor.commands.extract_sqlserver:main
@@ -1,2 +0,0 @@
1
- DEFAULT_API_VERSION = 59.0
2
- DEFAULT_PAGINATION_LIMIT = 100
@@ -1,33 +0,0 @@
1
- from typing import Dict
2
-
3
-
4
- class SalesforceCredentials:
5
- """
6
- Class to handle Salesforce rest API permissions
7
- """
8
-
9
- def __init__(
10
- self,
11
- *,
12
- username: str,
13
- password: str,
14
- security_token: str,
15
- consumer_key: str,
16
- consumer_secret: str,
17
- ):
18
- self.username = username
19
- self.password = password + security_token
20
- self.consumer_key = consumer_key
21
- self.consumer_secret = consumer_secret
22
-
23
- def token_request_payload(self) -> Dict[str, str]:
24
- """
25
- Params to post to the API in order to retrieve the authentication token
26
- """
27
- return {
28
- "grant_type": "password",
29
- "client_id": self.consumer_key,
30
- "client_secret": self.consumer_secret,
31
- "username": self.username,
32
- "password": self.password,
33
- }