castor-extractor 0.19.0__py3-none-any.whl → 0.19.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +29 -2
- castor_extractor/file_checker/templates/generic_warehouse.py +1 -1
- castor_extractor/knowledge/notion/client/client.py +44 -80
- castor_extractor/knowledge/notion/client/client_test.py +9 -4
- castor_extractor/knowledge/notion/client/constants.py +1 -0
- castor_extractor/knowledge/notion/client/endpoints.py +1 -1
- castor_extractor/knowledge/notion/client/pagination.py +9 -5
- castor_extractor/quality/soda/assets.py +1 -1
- castor_extractor/quality/soda/client/client.py +30 -83
- castor_extractor/quality/soda/client/credentials.py +0 -11
- castor_extractor/quality/soda/client/endpoints.py +3 -6
- castor_extractor/quality/soda/client/pagination.py +25 -0
- castor_extractor/utils/__init__.py +13 -2
- castor_extractor/utils/client/__init__.py +14 -0
- castor_extractor/utils/client/api/__init__.py +5 -0
- castor_extractor/utils/client/api/auth.py +76 -0
- castor_extractor/utils/client/api/auth_test.py +49 -0
- castor_extractor/utils/client/api/client.py +153 -0
- castor_extractor/utils/client/api/client_test.py +47 -0
- castor_extractor/utils/client/api/pagination.py +83 -0
- castor_extractor/utils/client/api/pagination_test.py +51 -0
- castor_extractor/utils/{safe_request_test.py → client/api/safe_request_test.py} +4 -1
- castor_extractor/utils/client/api/utils.py +9 -0
- castor_extractor/utils/client/api/utils_test.py +16 -0
- castor_extractor/utils/collection.py +34 -2
- castor_extractor/utils/collection_test.py +17 -3
- castor_extractor/utils/pager/__init__.py +0 -1
- castor_extractor/utils/retry.py +44 -0
- castor_extractor/utils/retry_test.py +26 -1
- castor_extractor/utils/salesforce/client.py +44 -49
- castor_extractor/utils/salesforce/client_test.py +2 -2
- castor_extractor/utils/salesforce/pagination.py +33 -0
- castor_extractor/visualization/domo/client/client.py +10 -5
- castor_extractor/visualization/domo/client/credentials.py +1 -1
- castor_extractor/visualization/domo/client/endpoints.py +19 -7
- castor_extractor/visualization/looker/api/credentials.py +1 -1
- castor_extractor/visualization/metabase/client/api/client.py +26 -11
- castor_extractor/visualization/metabase/client/api/credentials.py +1 -1
- castor_extractor/visualization/metabase/client/db/credentials.py +1 -1
- castor_extractor/visualization/mode/client/credentials.py +1 -1
- castor_extractor/visualization/qlik/client/engine/credentials.py +1 -1
- castor_extractor/visualization/salesforce_reporting/client/rest.py +4 -3
- castor_extractor/visualization/sigma/client/client.py +106 -111
- castor_extractor/visualization/sigma/client/credentials.py +11 -1
- castor_extractor/visualization/sigma/client/endpoints.py +1 -1
- castor_extractor/visualization/sigma/client/pagination.py +22 -18
- castor_extractor/visualization/tableau/tests/unit/rest_api/auth_test.py +0 -1
- castor_extractor/visualization/tableau/tests/unit/rest_api/credentials_test.py +0 -3
- castor_extractor/visualization/tableau_revamp/assets.py +11 -0
- castor_extractor/visualization/tableau_revamp/client/client.py +71 -151
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +95 -0
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +128 -0
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py +66 -0
- castor_extractor/visualization/tableau_revamp/client/{tsc_fields.py → rest_fields.py} +15 -2
- castor_extractor/visualization/tableau_revamp/constants.py +0 -2
- castor_extractor/visualization/tableau_revamp/extract.py +5 -11
- castor_extractor/warehouse/databricks/api_client.py +239 -0
- castor_extractor/warehouse/databricks/api_client_test.py +15 -0
- castor_extractor/warehouse/databricks/client.py +37 -490
- castor_extractor/warehouse/databricks/client_test.py +1 -99
- castor_extractor/warehouse/databricks/endpoints.py +28 -0
- castor_extractor/warehouse/databricks/lineage.py +141 -0
- castor_extractor/warehouse/databricks/lineage_test.py +34 -0
- castor_extractor/warehouse/databricks/pagination.py +22 -0
- castor_extractor/warehouse/databricks/sql_client.py +90 -0
- castor_extractor/warehouse/databricks/utils.py +44 -1
- castor_extractor/warehouse/databricks/utils_test.py +58 -1
- castor_extractor/warehouse/mysql/client.py +0 -2
- castor_extractor/warehouse/salesforce/client.py +12 -59
- castor_extractor/warehouse/salesforce/pagination.py +34 -0
- castor_extractor/warehouse/sqlserver/client.py +0 -1
- castor_extractor-0.19.6.dist-info/METADATA +903 -0
- {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/RECORD +77 -60
- castor_extractor/utils/client/api.py +0 -87
- castor_extractor/utils/client/api_test.py +0 -24
- castor_extractor/utils/pager/pager_on_token.py +0 -52
- castor_extractor/utils/pager/pager_on_token_test.py +0 -73
- castor_extractor/visualization/sigma/client/client_test.py +0 -54
- castor_extractor-0.19.0.dist-info/METADATA +0 -207
- /castor_extractor/utils/{safe_request.py → client/api/safe_request.py} +0 -0
- {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/LICENCE +0 -0
- {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/WHEEL +0 -0
- {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,14 +1,41 @@
|
|
|
1
|
+
|
|
1
2
|
# Changelog
|
|
2
3
|
|
|
4
|
+
## 0.19.6 - 2024-09-03
|
|
5
|
+
|
|
6
|
+
* Metabase: Adding error handler on API call
|
|
7
|
+
|
|
8
|
+
## 0.19.5 - 2024-09-02
|
|
9
|
+
|
|
10
|
+
* Databricks/Salesforce: Remove deprecated client dependencies
|
|
11
|
+
|
|
12
|
+
## 0.19.4 - 2024-08-29
|
|
13
|
+
|
|
14
|
+
* Tableau Pulse: extract Metrics and Subscriptions
|
|
15
|
+
|
|
16
|
+
## 0.19.3 - 2024-08-27
|
|
17
|
+
|
|
18
|
+
- Sigma: Add SafeMode to SigmaClient
|
|
19
|
+
|
|
20
|
+
## 0.19.2 - 2024-08-23
|
|
21
|
+
|
|
22
|
+
- Reworked APIClient to unify client's behaviours
|
|
23
|
+
|
|
24
|
+
Impacted Technologies: Sigma, Soda, Notion
|
|
25
|
+
|
|
26
|
+
## 0.19.1 - 2024-08-23
|
|
27
|
+
|
|
28
|
+
* Domo: extract datasources via cards metadata endpoint
|
|
29
|
+
|
|
3
30
|
## 0.19.0 - 2024-08-21
|
|
4
31
|
|
|
5
|
-
* Breaking change Looker CLI:
|
|
32
|
+
* Breaking change Looker CLI:
|
|
6
33
|
|
|
7
34
|
`-u` and `--username` changed to `-c` `--client-id`
|
|
8
35
|
|
|
9
36
|
`-p` and `--password` changed to `-s` `--client-secret`
|
|
10
37
|
|
|
11
|
-
* Breaking change Metabase CLI:
|
|
38
|
+
* Breaking change Metabase CLI:
|
|
12
39
|
|
|
13
40
|
`-u` and `--username` changed to `-u` `--user`
|
|
14
41
|
|
|
@@ -1,24 +1,13 @@
|
|
|
1
|
-
import
|
|
1
|
+
from functools import partial
|
|
2
2
|
from http import HTTPStatus
|
|
3
|
-
from typing import Dict, Iterator
|
|
3
|
+
from typing import Dict, Iterator, Optional
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
|
|
7
|
-
from ....utils import RequestSafeMode, empty_iterator
|
|
8
|
-
from ....utils.client.api import APIClient, HttpMethod
|
|
5
|
+
from ....utils import APIClient, BearerAuth, RequestSafeMode, fetch_all_pages
|
|
9
6
|
from ..assets import NotionAsset
|
|
10
|
-
from .constants import
|
|
11
|
-
CASTOR_NOTION_USER_AGENT,
|
|
12
|
-
NOTION_BASE_URL,
|
|
13
|
-
NOTION_TIMEOUT_MS,
|
|
14
|
-
)
|
|
7
|
+
from .constants import CASTOR_NOTION_USER_AGENT, NOTION_BASE_URL, NOTION_VERSION
|
|
15
8
|
from .credentials import NotionCredentials
|
|
16
|
-
from .endpoints import
|
|
17
|
-
from .pagination import
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger("__name__")
|
|
20
|
-
|
|
21
|
-
NOTION_VERSION = "2021-08-16"
|
|
9
|
+
from .endpoints import NotionEndpointFactory
|
|
10
|
+
from .pagination import NotionPagination
|
|
22
11
|
|
|
23
12
|
VOLUME_IGNORED = 10
|
|
24
13
|
IGNORED_ERROR_CODES = (HTTPStatus.BAD_GATEWAY,)
|
|
@@ -26,83 +15,64 @@ NOTION_SAFE_MODE = RequestSafeMode(
|
|
|
26
15
|
max_errors=VOLUME_IGNORED,
|
|
27
16
|
status_codes=IGNORED_ERROR_CODES,
|
|
28
17
|
)
|
|
18
|
+
NOTION_BASE_HEADERS = {
|
|
19
|
+
"Notion-Version": NOTION_VERSION,
|
|
20
|
+
"User-Agent": CASTOR_NOTION_USER_AGENT,
|
|
21
|
+
}
|
|
29
22
|
|
|
30
23
|
|
|
31
24
|
def _search_filter(asset: str) -> Dict[str, Dict[str, str]]:
|
|
32
25
|
return {"filter": {"value": asset, "property": "object"}}
|
|
33
26
|
|
|
34
27
|
|
|
35
|
-
class
|
|
36
|
-
|
|
28
|
+
class NotionAuth(BearerAuth):
|
|
29
|
+
def __init__(self, token: str):
|
|
30
|
+
self.token = token
|
|
37
31
|
|
|
32
|
+
def fetch_token(self):
|
|
33
|
+
return self.token
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class NotionClient(APIClient):
|
|
38
37
|
def __init__(
|
|
39
38
|
self,
|
|
40
39
|
credentials: NotionCredentials,
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
safe_mode: RequestSafeMode = NOTION_SAFE_MODE,
|
|
45
|
-
) -> None:
|
|
46
|
-
base_headers = {
|
|
47
|
-
"Notion-Version": NOTION_VERSION,
|
|
48
|
-
"User-Agent": user_agent,
|
|
49
|
-
}
|
|
50
|
-
|
|
40
|
+
safe_mode: Optional[RequestSafeMode] = None,
|
|
41
|
+
):
|
|
42
|
+
auth = NotionAuth(token=credentials.token)
|
|
51
43
|
super().__init__(
|
|
52
|
-
host=
|
|
53
|
-
|
|
54
|
-
headers=
|
|
55
|
-
|
|
56
|
-
safe_mode=safe_mode,
|
|
44
|
+
host=NOTION_BASE_URL,
|
|
45
|
+
auth=auth,
|
|
46
|
+
headers=NOTION_BASE_HEADERS,
|
|
47
|
+
safe_mode=safe_mode or NOTION_SAFE_MODE,
|
|
57
48
|
)
|
|
58
49
|
|
|
59
|
-
def
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
"""
|
|
63
|
-
API call to Notion:
|
|
64
|
-
- If result is paginated, yield all pages
|
|
65
|
-
- If not, yield only the response payload
|
|
66
|
-
"""
|
|
67
|
-
built_url = self.build_url(self._host, endpoint)
|
|
68
|
-
response_payload = self._call(
|
|
69
|
-
method=method,
|
|
70
|
-
url=built_url,
|
|
71
|
-
params=params if params and method == "GET" else None,
|
|
72
|
-
data=params if params and method == "POST" else None,
|
|
73
|
-
)
|
|
74
|
-
try:
|
|
75
|
-
paginated_response = PaginatedResponse(**response_payload)
|
|
76
|
-
yield from paginated_response.results
|
|
77
|
-
|
|
78
|
-
if not paginated_response.has_more:
|
|
79
|
-
return empty_iterator()
|
|
80
|
-
|
|
81
|
-
yield from self._request(
|
|
82
|
-
method=method,
|
|
83
|
-
endpoint=endpoint,
|
|
84
|
-
params=paginated_response.start_cursor,
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
except ValidationError:
|
|
88
|
-
yield response_payload
|
|
50
|
+
def users(self) -> Iterator[dict]:
|
|
51
|
+
request = partial(self._get, endpoint=NotionEndpointFactory.users())
|
|
52
|
+
yield from fetch_all_pages(request, NotionPagination)
|
|
89
53
|
|
|
90
54
|
def _page_listing(self) -> Iterator[dict]:
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
endpoint=
|
|
94
|
-
|
|
55
|
+
request = partial(
|
|
56
|
+
self._post,
|
|
57
|
+
endpoint=NotionEndpointFactory.search(),
|
|
58
|
+
data=_search_filter("page"),
|
|
95
59
|
)
|
|
60
|
+
yield from fetch_all_pages(request, NotionPagination)
|
|
96
61
|
|
|
97
62
|
def _blocks(self, block_id: str) -> Iterator[dict]:
|
|
98
|
-
|
|
63
|
+
request = partial(
|
|
64
|
+
self._get,
|
|
65
|
+
endpoint=NotionEndpointFactory.blocks(block_id),
|
|
66
|
+
)
|
|
67
|
+
yield from fetch_all_pages(request, NotionPagination)
|
|
99
68
|
|
|
100
69
|
def databases(self) -> Iterator[dict]:
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
endpoint=
|
|
104
|
-
|
|
70
|
+
request = partial(
|
|
71
|
+
self._post,
|
|
72
|
+
endpoint=NotionEndpointFactory.search(),
|
|
73
|
+
data=_search_filter("database"),
|
|
105
74
|
)
|
|
75
|
+
yield from fetch_all_pages(request, NotionPagination)
|
|
106
76
|
|
|
107
77
|
def recursive_blocks(self, block_id: str) -> Iterator[dict]:
|
|
108
78
|
"""Fetch recursively all children blocks of a given block or page"""
|
|
@@ -116,9 +86,7 @@ class NotionClient(APIClient):
|
|
|
116
86
|
|
|
117
87
|
def pages(self) -> Iterator[dict]:
|
|
118
88
|
"""Fetch all pages with its whole content"""
|
|
119
|
-
|
|
120
|
-
logger.info(f"Extracting {len(pages)} pages ...")
|
|
121
|
-
for page in pages:
|
|
89
|
+
for page in self._page_listing():
|
|
122
90
|
if page.get("object") == "database":
|
|
123
91
|
# Notion Search API filter for page doesn't work
|
|
124
92
|
continue
|
|
@@ -126,10 +94,6 @@ class NotionClient(APIClient):
|
|
|
126
94
|
page["child_blocks"] = content
|
|
127
95
|
yield page
|
|
128
96
|
|
|
129
|
-
def users(self) -> Iterator[dict]:
|
|
130
|
-
"""Fetch all users"""
|
|
131
|
-
return self._request("GET", EndpointFactory.users(), {})
|
|
132
|
-
|
|
133
97
|
def fetch(self, asset: NotionAsset) -> Iterator[dict]:
|
|
134
98
|
"""Returns the needed metadata for the queried asset"""
|
|
135
99
|
if asset == NotionAsset.PAGES:
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
from functools import partial
|
|
1
2
|
from unittest.mock import patch
|
|
2
3
|
|
|
4
|
+
from ....utils import fetch_all_pages
|
|
3
5
|
from .client import NotionClient
|
|
4
6
|
from .credentials import NotionCredentials
|
|
7
|
+
from .pagination import NotionPagination
|
|
5
8
|
|
|
6
9
|
MOCK_PAGINATED_RESPONSE = {
|
|
7
10
|
"results": [{"result_id": 1}],
|
|
@@ -16,7 +19,7 @@ MOCK_PAGINATED_RESPONSE_2 = {
|
|
|
16
19
|
MOCK_RESPONSE = {"result_id": 3}
|
|
17
20
|
|
|
18
21
|
|
|
19
|
-
@patch.object(NotionClient, "
|
|
22
|
+
@patch.object(NotionClient, "_get")
|
|
20
23
|
def test_NotionClient__request(mock_call):
|
|
21
24
|
mock_call.side_effect = [
|
|
22
25
|
MOCK_PAGINATED_RESPONSE,
|
|
@@ -25,11 +28,13 @@ def test_NotionClient__request(mock_call):
|
|
|
25
28
|
]
|
|
26
29
|
|
|
27
30
|
client = NotionClient(NotionCredentials(token="MockToken"))
|
|
28
|
-
|
|
31
|
+
get_call = partial(client._get, endpoint="FAKE")
|
|
32
|
+
|
|
33
|
+
response = list(fetch_all_pages(get_call, NotionPagination))
|
|
29
34
|
assert response == [{"result_id": 1}, {"result_id": 2}]
|
|
30
35
|
|
|
31
|
-
response =
|
|
32
|
-
assert response ==
|
|
36
|
+
response = get_call()
|
|
37
|
+
assert response == {"result_id": 3}
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
MOCK_BLOCK = [{"object": "block", "id": "1", "has_children": True}]
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from ....utils import PaginationModel
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
class PaginatedResponse:
|
|
6
|
+
class NotionPagination(PaginationModel):
|
|
8
7
|
"""Class to handle paginated results"""
|
|
9
8
|
|
|
10
9
|
results: list
|
|
11
10
|
next_cursor: Optional[str]
|
|
12
11
|
has_more: bool
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
def is_last(self) -> bool:
|
|
14
|
+
return not self.has_more
|
|
15
|
+
|
|
16
|
+
def next_page_payload(self) -> dict:
|
|
16
17
|
return {"start_cursor": self.next_cursor}
|
|
18
|
+
|
|
19
|
+
def page_results(self) -> list:
|
|
20
|
+
return self.results
|
|
@@ -1,99 +1,46 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import
|
|
3
|
-
|
|
4
|
-
import
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
from functools import partial
|
|
2
|
+
from typing import Iterator
|
|
3
|
+
|
|
4
|
+
from ....utils import (
|
|
5
|
+
APIClient,
|
|
6
|
+
BasicAuth,
|
|
7
|
+
fetch_all_pages,
|
|
8
|
+
)
|
|
8
9
|
from ..assets import SodaAsset
|
|
9
10
|
from .credentials import SodaCredentials
|
|
10
11
|
from .endpoints import SodaEndpointFactory
|
|
12
|
+
from .pagination import SodaCloudPagination
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
_CLOUD_PAGE_SIZE = 100
|
|
14
|
+
_CLOUD_API = "https://cloud.soda.io/api/v1/"
|
|
14
15
|
_REQUESTS_PER_MINUTE = 10
|
|
15
16
|
_SECONDS_PER_MINUTE = 60
|
|
16
|
-
|
|
17
|
+
_RATE_LIMIT_MS = (_SECONDS_PER_MINUTE // _REQUESTS_PER_MINUTE) + 1
|
|
17
18
|
|
|
19
|
+
HEADERS = {"Content-Type": "application/json"}
|
|
18
20
|
|
|
19
|
-
class SodaClient:
|
|
20
|
-
def __init__(
|
|
21
|
-
self,
|
|
22
|
-
credentials: SodaCredentials,
|
|
23
|
-
):
|
|
24
|
-
self._timeout = DEFAULT_TIMEOUT_S
|
|
25
|
-
self._reporting_headers = credentials.reporting_headers
|
|
26
|
-
self._auth = (credentials.api_key, credentials.secret)
|
|
27
21
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
method: HttpMethod = "GET",
|
|
33
|
-
*,
|
|
34
|
-
params: Optional[dict] = None,
|
|
35
|
-
data: Optional[dict] = None,
|
|
36
|
-
auth: Optional[tuple] = None,
|
|
37
|
-
) -> Any:
|
|
38
|
-
response = requests.request(
|
|
39
|
-
method,
|
|
40
|
-
url,
|
|
41
|
-
headers=headers,
|
|
42
|
-
params=params,
|
|
43
|
-
json=data,
|
|
44
|
-
timeout=self._timeout,
|
|
45
|
-
auth=auth,
|
|
22
|
+
class SodaClient(APIClient):
|
|
23
|
+
def __init__(self, credentials: SodaCredentials):
|
|
24
|
+
cloud_auth = BasicAuth(
|
|
25
|
+
username=credentials.api_key, password=credentials.secret
|
|
46
26
|
)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
return response.json()
|
|
27
|
+
super().__init__(host=_CLOUD_API, auth=cloud_auth, headers=HEADERS)
|
|
50
28
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
**{"page": page_number, "size": _REPORTING_PAGE_SIZE},
|
|
57
|
-
**additional,
|
|
58
|
-
}
|
|
59
|
-
_check_results_page = self._call(
|
|
60
|
-
url=url,
|
|
61
|
-
method="POST",
|
|
62
|
-
data=json_data,
|
|
63
|
-
headers=self._reporting_headers,
|
|
64
|
-
)
|
|
65
|
-
yield from _check_results_page["data"]
|
|
66
|
-
|
|
67
|
-
next_page = len(_check_results_page["data"]) == _REPORTING_PAGE_SIZE
|
|
68
|
-
page_number += 1
|
|
69
|
-
|
|
70
|
-
def _datasets(self) -> Iterator[dict]:
|
|
71
|
-
url = SodaEndpointFactory.datasets()
|
|
72
|
-
next_page = True
|
|
73
|
-
page_number = 0
|
|
74
|
-
while next_page:
|
|
75
|
-
data = self._call(
|
|
76
|
-
url=url,
|
|
77
|
-
method="GET",
|
|
78
|
-
headers={},
|
|
79
|
-
params={"size": _CLOUD_PAGE_SIZE, "page": page_number},
|
|
80
|
-
auth=self._auth,
|
|
81
|
-
)
|
|
82
|
-
yield from data["content"]
|
|
83
|
-
next_page = not data["last"]
|
|
84
|
-
page_number += 1
|
|
85
|
-
sleep(_RATE_LIMIT_S)
|
|
29
|
+
def datasets(self) -> Iterator[dict]:
|
|
30
|
+
request = partial(self._get, endpoint=SodaEndpointFactory.datasets())
|
|
31
|
+
yield from fetch_all_pages(
|
|
32
|
+
request, SodaCloudPagination, rate_limit=_RATE_LIMIT_MS
|
|
33
|
+
)
|
|
86
34
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
url, additional={"from_datetime": _date}
|
|
35
|
+
def checks(self) -> Iterator[dict]:
|
|
36
|
+
request = partial(self._get, endpoint=SodaEndpointFactory.checks())
|
|
37
|
+
yield from fetch_all_pages(
|
|
38
|
+
request, SodaCloudPagination, rate_limit=_RATE_LIMIT_MS
|
|
92
39
|
)
|
|
93
40
|
|
|
94
|
-
def fetch(self, asset: SodaAsset) -> Iterator
|
|
41
|
+
def fetch(self, asset: SodaAsset) -> Iterator:
|
|
95
42
|
if asset == SodaAsset.DATASETS:
|
|
96
|
-
|
|
97
|
-
if asset == SodaAsset.
|
|
98
|
-
|
|
43
|
+
return self.datasets()
|
|
44
|
+
if asset == SodaAsset.CHECKS:
|
|
45
|
+
return self.checks()
|
|
99
46
|
raise ValueError(f"The asset {asset}, is not supported")
|
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
from pydantic import Field
|
|
4
2
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
-
from requests.auth import HTTPBasicAuth
|
|
6
3
|
|
|
7
4
|
SODA_ENV_PREFIX = "CASTOR_SODA_"
|
|
8
5
|
|
|
@@ -18,11 +15,3 @@ class SodaCredentials(BaseSettings):
|
|
|
18
15
|
|
|
19
16
|
api_key: str = Field(repr=False)
|
|
20
17
|
secret: str = Field(repr=False)
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def reporting_headers(self) -> Dict[str, str]:
|
|
24
|
-
return {
|
|
25
|
-
"Content-Type": "application/json",
|
|
26
|
-
"API_KEY_ID": self.api_key,
|
|
27
|
-
"API_KEY_SECRET": self.secret,
|
|
28
|
-
}
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
class SodaEndpointFactory:
|
|
2
2
|
"""Wrapper class around all endpoints we're using"""
|
|
3
3
|
|
|
4
|
-
CLOUD_API = "https://cloud.soda.io/api/v1"
|
|
5
|
-
REPORTING_API = "https://reporting.cloud.soda.io/v1"
|
|
6
|
-
|
|
7
4
|
@classmethod
|
|
8
5
|
def datasets(cls) -> str:
|
|
9
|
-
return
|
|
6
|
+
return "datasets"
|
|
10
7
|
|
|
11
8
|
@classmethod
|
|
12
|
-
def
|
|
13
|
-
return
|
|
9
|
+
def checks(cls) -> str:
|
|
10
|
+
return "checks"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from ....utils import PaginationModel
|
|
4
|
+
|
|
5
|
+
_CLOUD_PAGE_SIZE = 100
|
|
6
|
+
_CLOUD_FIRST_PAGE = 0
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SodaCloudPagination(PaginationModel):
|
|
10
|
+
content: List[dict]
|
|
11
|
+
last: bool
|
|
12
|
+
|
|
13
|
+
def is_last(self) -> bool:
|
|
14
|
+
return self.last
|
|
15
|
+
|
|
16
|
+
def next_page_payload(self) -> dict:
|
|
17
|
+
current_page = (
|
|
18
|
+
self.current_page_payload[self.fetch_by.value]["page"]
|
|
19
|
+
if self.current_page_payload
|
|
20
|
+
else _CLOUD_FIRST_PAGE
|
|
21
|
+
)
|
|
22
|
+
return {"page": current_page + 1, "size": _CLOUD_PAGE_SIZE}
|
|
23
|
+
|
|
24
|
+
def page_results(self) -> list:
|
|
25
|
+
return self.content
|
|
@@ -1,12 +1,24 @@
|
|
|
1
1
|
from .argument_parser import parse_filled_arguments
|
|
2
2
|
from .client import (
|
|
3
3
|
AbstractSourceClient,
|
|
4
|
+
APIClient,
|
|
5
|
+
Auth,
|
|
6
|
+
BasicAuth,
|
|
7
|
+
BearerAuth,
|
|
8
|
+
CustomAuth,
|
|
4
9
|
ExtractionQuery,
|
|
10
|
+
FetchNextPageBy,
|
|
11
|
+
PaginationModel,
|
|
5
12
|
PostgresClient,
|
|
13
|
+
RequestSafeMode,
|
|
14
|
+
ResponseJson,
|
|
6
15
|
SqlalchemyClient,
|
|
16
|
+
build_url,
|
|
17
|
+
fetch_all_pages,
|
|
18
|
+
handle_response,
|
|
7
19
|
uri_encode,
|
|
8
20
|
)
|
|
9
|
-
from .collection import empty_iterator, group_by, mapping_from_rows
|
|
21
|
+
from .collection import deduplicate, empty_iterator, group_by, mapping_from_rows
|
|
10
22
|
from .constants import OUTPUT_DIR
|
|
11
23
|
from .deprecate import deprecate_python
|
|
12
24
|
from .env import from_env
|
|
@@ -24,7 +36,6 @@ from .pager import (
|
|
|
24
36
|
)
|
|
25
37
|
from .retry import RetryStrategy, retry
|
|
26
38
|
from .safe import SafeMode, safe_mode
|
|
27
|
-
from .safe_request import RequestSafeMode, ResponseJson, handle_response
|
|
28
39
|
from .store import AbstractStorage, LocalStorage
|
|
29
40
|
from .string import decode_when_bytes, string_to_tuple
|
|
30
41
|
from .time import (
|
|
@@ -1,4 +1,18 @@
|
|
|
1
1
|
from .abstract import AbstractSourceClient, SqlalchemyClient
|
|
2
|
+
from .api import (
|
|
3
|
+
APIClient,
|
|
4
|
+
Auth,
|
|
5
|
+
BasicAuth,
|
|
6
|
+
BearerAuth,
|
|
7
|
+
CustomAuth,
|
|
8
|
+
FetchNextPageBy,
|
|
9
|
+
PaginationModel,
|
|
10
|
+
RequestSafeMode,
|
|
11
|
+
ResponseJson,
|
|
12
|
+
build_url,
|
|
13
|
+
fetch_all_pages,
|
|
14
|
+
handle_response,
|
|
15
|
+
)
|
|
2
16
|
from .postgres import PostgresClient
|
|
3
17
|
from .query import ExtractionQuery
|
|
4
18
|
from .uri import uri_encode
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Dict, Optional, Union
|
|
4
|
+
|
|
5
|
+
from requests.auth import AuthBase, HTTPBasicAuth
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BasicAuth(HTTPBasicAuth):
|
|
11
|
+
"""
|
|
12
|
+
Authentication for API using basic auth method
|
|
13
|
+
|
|
14
|
+
- Instantiate with username and password
|
|
15
|
+
- pass the Auth class to the APIClient
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def refresh_token(self):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CustomAuth(AuthBase, ABC):
|
|
23
|
+
"""
|
|
24
|
+
Authentication for API using custom auth method
|
|
25
|
+
|
|
26
|
+
You need to:
|
|
27
|
+
- implement the `_authentication_header()` method
|
|
28
|
+
- pass the Auth class to the APIClient
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def refresh_token(self):
|
|
32
|
+
"""Method to refresh the token if token expires"""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def _authentication_header(self) -> Dict[str, str]:
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
def __call__(self, r):
|
|
40
|
+
r.headers = {**r.headers, **self._authentication_header()}
|
|
41
|
+
return r
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BearerAuth:
|
|
45
|
+
"""
|
|
46
|
+
Authentication for API using Bearer tokens
|
|
47
|
+
|
|
48
|
+
You need to:
|
|
49
|
+
- implement the `fetch_token()` method
|
|
50
|
+
- pass the Auth class to the APIClient
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
_token: Optional[str] = None
|
|
54
|
+
authentication_key = "Authorization"
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def fetch_token(self) -> Optional[str]:
|
|
58
|
+
"""Method that should return the bearer token"""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
def refresh_token(self):
|
|
62
|
+
"""Method to refresh the token if token expires"""
|
|
63
|
+
self._fetch_token(force_refresh=True)
|
|
64
|
+
|
|
65
|
+
def _fetch_token(self, force_refresh: bool = False) -> Optional[str]:
|
|
66
|
+
if not self._token or force_refresh:
|
|
67
|
+
logger.info("Refreshing authentication token...")
|
|
68
|
+
self._token = self.fetch_token()
|
|
69
|
+
return f"Bearer {self._token}"
|
|
70
|
+
|
|
71
|
+
def __call__(self, r):
|
|
72
|
+
r.headers[self.authentication_key] = self._fetch_token()
|
|
73
|
+
return r
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
Auth = Union[BasicAuth, CustomAuth, BearerAuth]
|