castor-extractor 0.24.34__py3-none-any.whl → 0.24.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +10 -0
- castor_extractor/transformation/coalesce/client/client.py +92 -88
- castor_extractor/transformation/coalesce/client/pagination.py +26 -0
- castor_extractor/utils/client/api/pagination.py +5 -2
- castor_extractor/visualization/sigma/client/client.py +24 -0
- {castor_extractor-0.24.34.dist-info → castor_extractor-0.24.36.dist-info}/METADATA +11 -1
- {castor_extractor-0.24.34.dist-info → castor_extractor-0.24.36.dist-info}/RECORD +10 -12
- castor_extractor/transformation/coalesce/client/type.py +0 -1
- castor_extractor/transformation/coalesce/client/utils.py +0 -52
- castor_extractor/transformation/coalesce/client/utils_test.py +0 -54
- {castor_extractor-0.24.34.dist-info → castor_extractor-0.24.36.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.34.dist-info → castor_extractor-0.24.36.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.34.dist-info → castor_extractor-0.24.36.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.36 - 2025-08-04
|
|
4
|
+
|
|
5
|
+
* Sigma:
|
|
6
|
+
* Refresh token before lineage extraction
|
|
7
|
+
* Disregard 403 errors during lineage extraction
|
|
8
|
+
|
|
9
|
+
## 0.24.35 - 2025-07-29
|
|
10
|
+
|
|
11
|
+
* Coalesce - Fix pagination issue
|
|
12
|
+
|
|
3
13
|
## 0.24.34 - 2025-07-02
|
|
4
14
|
|
|
5
15
|
* SQLServer: multiple databases
|
|
@@ -1,31 +1,47 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from functools import partial
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Callable, Optional
|
|
4
5
|
|
|
5
|
-
from
|
|
6
|
+
from pydantic import ValidationError
|
|
6
7
|
|
|
7
8
|
from ....utils import (
|
|
8
9
|
APIClient,
|
|
9
10
|
BearerAuth,
|
|
10
11
|
RequestSafeMode,
|
|
11
12
|
SerializedAsset,
|
|
13
|
+
fetch_all_pages,
|
|
12
14
|
)
|
|
13
15
|
from ..assets import CoalesceAsset, CoalesceQualityAsset
|
|
14
16
|
from .credentials import CoalesceCredentials
|
|
15
17
|
from .endpoint import (
|
|
16
18
|
CoalesceEndpointFactory,
|
|
17
19
|
)
|
|
18
|
-
from .
|
|
19
|
-
|
|
20
|
+
from .pagination import CoalescePagination
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
COALESCE_PAGE_SIZE = 300
|
|
26
|
+
COALESCE_PAGE_SIZE_RUN_RESULTS = 1_000
|
|
27
|
+
|
|
28
|
+
COALESCE_TIMEOUT_SECONDS = 90
|
|
20
29
|
|
|
21
|
-
_LIMIT_MAX = 1_000
|
|
22
30
|
_MAX_ERRORS = 200
|
|
23
31
|
|
|
24
|
-
|
|
32
|
+
COALESCE_SAFE_MODE = RequestSafeMode(
|
|
33
|
+
status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
|
|
34
|
+
max_errors=_MAX_ERRORS,
|
|
35
|
+
)
|
|
25
36
|
|
|
26
37
|
|
|
27
|
-
def _run_result_payload(
|
|
38
|
+
def _run_result_payload(
|
|
39
|
+
environment_id: str,
|
|
40
|
+
result: dict,
|
|
41
|
+
query_result: dict,
|
|
42
|
+
) -> dict:
|
|
28
43
|
return {
|
|
44
|
+
"environment_id": environment_id,
|
|
29
45
|
"node_id": result["nodeID"],
|
|
30
46
|
"node_name": result["name"],
|
|
31
47
|
"test_name": query_result["name"],
|
|
@@ -37,13 +53,6 @@ def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
|
37
53
|
}
|
|
38
54
|
|
|
39
55
|
|
|
40
|
-
COALESCE_SAFE_MODE = RequestSafeMode(
|
|
41
|
-
status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
|
|
42
|
-
max_errors=_MAX_ERRORS,
|
|
43
|
-
)
|
|
44
|
-
COALESCE_TIMEOUT_SECONDS = 90
|
|
45
|
-
|
|
46
|
-
|
|
47
56
|
class CoalesceBearerAuth(BearerAuth):
|
|
48
57
|
"""Bearer Authentication for Coalesce"""
|
|
49
58
|
|
|
@@ -69,65 +78,74 @@ class CoalesceClient(APIClient):
|
|
|
69
78
|
timeout=COALESCE_TIMEOUT_SECONDS,
|
|
70
79
|
)
|
|
71
80
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
def _get_paginated(
|
|
82
|
+
self,
|
|
83
|
+
endpoint: str,
|
|
84
|
+
limit: int = COALESCE_PAGE_SIZE,
|
|
85
|
+
params: Optional[dict] = None,
|
|
86
|
+
) -> Callable:
|
|
87
|
+
return partial(
|
|
88
|
+
self._get,
|
|
89
|
+
retry_on_timeout=False, # explained in the docstring
|
|
90
|
+
endpoint=endpoint,
|
|
91
|
+
params={
|
|
92
|
+
"limit": limit,
|
|
93
|
+
**(params or dict()),
|
|
94
|
+
},
|
|
80
95
|
)
|
|
81
|
-
|
|
96
|
+
|
|
97
|
+
def _fetch_environments(self) -> SerializedAsset:
|
|
98
|
+
endpoint = CoalesceEndpointFactory.environments()
|
|
99
|
+
request = self._get_paginated(endpoint=endpoint)
|
|
100
|
+
result = fetch_all_pages(request, CoalescePagination)
|
|
101
|
+
return list(result)
|
|
82
102
|
|
|
83
103
|
def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
|
|
84
104
|
endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
105
|
+
request = self._get_paginated(
|
|
106
|
+
endpoint=endpoint,
|
|
107
|
+
params={"detail": "true"},
|
|
108
|
+
)
|
|
109
|
+
result = fetch_all_pages(request, CoalescePagination)
|
|
110
|
+
return [
|
|
111
|
+
{
|
|
112
|
+
**node,
|
|
113
|
+
"environment_id": environment_id,
|
|
114
|
+
}
|
|
115
|
+
for node in result
|
|
116
|
+
]
|
|
97
117
|
|
|
98
118
|
def _fetch_all_nodes(self) -> SerializedAsset:
|
|
119
|
+
environments = self._fetch_environments()
|
|
120
|
+
total = len(environments)
|
|
99
121
|
nodes: list[dict] = []
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
122
|
+
|
|
123
|
+
for index, env in enumerate(environments):
|
|
124
|
+
env_id = env["id"]
|
|
125
|
+
logger.info(f"Fetching nodes for env #{env_id} - {index}/{total}")
|
|
126
|
+
try:
|
|
127
|
+
nodes.extend(self._fetch_env_nodes(env_id))
|
|
128
|
+
except ValidationError as e:
|
|
129
|
+
# 500 Server Error: Internal Server Error on Coalesce API
|
|
130
|
+
logger.warning(
|
|
131
|
+
f"Skipping nodes for {env_id} due to the following Error: {e}"
|
|
132
|
+
)
|
|
133
|
+
logger.info(f"{len(nodes)} nodes extracted so far")
|
|
103
134
|
return nodes
|
|
104
135
|
|
|
105
136
|
def _fetch_runs(self, starting_from: str) -> SerializedAsset:
|
|
106
|
-
"""
|
|
107
|
-
fetch runs, per environment;
|
|
108
|
-
we break per environment to lower the chance of exceeding the 1k limit
|
|
109
|
-
"""
|
|
110
|
-
runs: list[dict] = []
|
|
111
|
-
for environment in self._fetch_environments():
|
|
112
|
-
environment_id = environment["id"]
|
|
113
|
-
runs.extend(
|
|
114
|
-
self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
115
|
-
)
|
|
116
|
-
return runs
|
|
117
|
-
|
|
118
|
-
def _fetch_recent_runs_per_env(
|
|
119
|
-
self, environment_id: int, starting_from: str
|
|
120
|
-
) -> SerializedAsset:
|
|
121
137
|
endpoint = CoalesceEndpointFactory.runs()
|
|
122
138
|
params = {
|
|
123
|
-
"environmentID": environment_id,
|
|
124
|
-
"limit": _LIMIT_MAX,
|
|
125
139
|
"orderBy": "runEndTime",
|
|
126
140
|
"orderByDirection": "asc",
|
|
127
141
|
"startingFrom": starting_from,
|
|
128
142
|
}
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
request = self._get_paginated(
|
|
144
|
+
endpoint=endpoint,
|
|
145
|
+
params=params,
|
|
146
|
+
limit=COALESCE_PAGE_SIZE_RUN_RESULTS,
|
|
147
|
+
)
|
|
148
|
+
return list(fetch_all_pages(request, CoalescePagination))
|
|
131
149
|
|
|
132
150
|
def _fetch_run_results(self, run_id: str) -> SerializedAsset:
|
|
133
151
|
endpoint = CoalesceEndpointFactory.run_results(run_id)
|
|
@@ -136,51 +154,37 @@ class CoalesceClient(APIClient):
|
|
|
136
154
|
|
|
137
155
|
def _run_results_by_run(
|
|
138
156
|
self,
|
|
157
|
+
environment_id: str,
|
|
139
158
|
run_id: str,
|
|
140
|
-
test_names: NodeIDToNamesMapping,
|
|
141
|
-
column_names: NodeIDToNamesMapping,
|
|
142
159
|
) -> SerializedAsset:
|
|
143
160
|
run_results: list[dict] = []
|
|
144
161
|
for result in self._fetch_run_results(run_id):
|
|
145
|
-
node_id = result["nodeID"]
|
|
146
162
|
for query_result in result["queryResults"]:
|
|
147
|
-
|
|
163
|
+
if query_result["type"] != "sqlTest":
|
|
164
|
+
continue
|
|
165
|
+
run_result = _run_result_payload(
|
|
166
|
+
environment_id,
|
|
167
|
+
result,
|
|
148
168
|
query_result,
|
|
149
|
-
node_id,
|
|
150
|
-
test_names,
|
|
151
|
-
column_names,
|
|
152
169
|
)
|
|
153
|
-
if not _is_test:
|
|
154
|
-
continue
|
|
155
|
-
run_result = _run_result_payload(result, query_result)
|
|
156
170
|
run_results.append(run_result)
|
|
157
171
|
return run_results
|
|
158
172
|
|
|
159
|
-
def
|
|
160
|
-
self,
|
|
173
|
+
def _fetch_all_run_results(
|
|
174
|
+
self,
|
|
175
|
+
starting_from: str,
|
|
161
176
|
) -> SerializedAsset:
|
|
162
177
|
run_results: list[dict] = []
|
|
163
|
-
nodes = self._fetch_env_nodes(environment_id)
|
|
164
|
-
test_names = test_names_per_node(nodes)
|
|
165
|
-
column_names = column_names_per_node(nodes)
|
|
166
|
-
runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
167
178
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
_results = self._run_results_by_run(
|
|
171
|
-
run_id, test_names, column_names
|
|
172
|
-
)
|
|
173
|
-
run_results.extend(_results)
|
|
174
|
-
return run_results
|
|
175
|
-
|
|
176
|
-
def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
|
|
177
|
-
run_results: list[dict] = []
|
|
178
|
-
|
|
179
|
-
for environment in self._fetch_environments():
|
|
180
|
-
environment_id = environment["id"]
|
|
181
|
-
_results = self._run_results_by_env(environment_id, starting_from)
|
|
182
|
-
run_results.extend(_results)
|
|
179
|
+
runs = self._fetch_runs(starting_from)
|
|
180
|
+
total = len(runs)
|
|
183
181
|
|
|
182
|
+
for index, run in enumerate(runs):
|
|
183
|
+
logger.info(f"Extracting run results ({index}/{total})")
|
|
184
|
+
run_id = run["id"]
|
|
185
|
+
environment_id = run["environmentID"]
|
|
186
|
+
current_results = self._run_results_by_run(environment_id, run_id)
|
|
187
|
+
run_results.extend(current_results)
|
|
184
188
|
return run_results
|
|
185
189
|
|
|
186
190
|
def fetch(
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import Optional, Union
|
|
2
|
+
|
|
3
|
+
from ....utils import PaginationModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CoalescePagination(PaginationModel):
|
|
7
|
+
"""
|
|
8
|
+
Class to handle paginated results for Coalesce
|
|
9
|
+
See their documentation here
|
|
10
|
+
https://docs.coalesce.io/docs/api
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
data: list
|
|
14
|
+
next: Union[Optional[str], Optional[int]] = None
|
|
15
|
+
|
|
16
|
+
def is_last(self) -> bool:
|
|
17
|
+
"""Stopping condition for the pagination"""
|
|
18
|
+
return self.next is None
|
|
19
|
+
|
|
20
|
+
def next_page_payload(self):
|
|
21
|
+
"""Payload enabling to generate the request for the next page"""
|
|
22
|
+
return {"startingFrom": self.next}
|
|
23
|
+
|
|
24
|
+
def page_results(self) -> list:
|
|
25
|
+
"""List of results of the current page"""
|
|
26
|
+
return self.data
|
|
@@ -66,16 +66,19 @@ def fetch_all_pages(
|
|
|
66
66
|
"""
|
|
67
67
|
page_number = 1
|
|
68
68
|
response_payload = request()
|
|
69
|
+
|
|
69
70
|
paginated_response = pagination_model(**response_payload)
|
|
71
|
+
|
|
70
72
|
while not paginated_response.is_last():
|
|
71
73
|
logger.debug(f"Fetching page number {page_number}")
|
|
72
74
|
yield from paginated_response.page_results()
|
|
73
75
|
next_page_parameters = paginated_response.next_page_parameters()
|
|
74
|
-
|
|
76
|
+
request_with_pagination = partial(request, **next_page_parameters)
|
|
75
77
|
if rate_limit:
|
|
76
78
|
sleep(rate_limit)
|
|
77
79
|
paginated_response = pagination_model(
|
|
78
|
-
current_page_payload=next_page_parameters,
|
|
80
|
+
current_page_payload=next_page_parameters,
|
|
81
|
+
**request_with_pagination(),
|
|
79
82
|
)
|
|
80
83
|
page_number += 1
|
|
81
84
|
|
|
@@ -54,6 +54,13 @@ SIGMA_SAFE_MODE = RequestSafeMode(
|
|
|
54
54
|
max_errors=_VOLUME_IGNORED,
|
|
55
55
|
status_codes=_IGNORED_ERROR_CODES,
|
|
56
56
|
)
|
|
57
|
+
SIGMA_SAFE_MODE_LINEAGE = RequestSafeMode(
|
|
58
|
+
max_errors=_VOLUME_IGNORED,
|
|
59
|
+
status_codes=(
|
|
60
|
+
*_IGNORED_ERROR_CODES,
|
|
61
|
+
HTTPStatus.FORBIDDEN,
|
|
62
|
+
),
|
|
63
|
+
)
|
|
57
64
|
_THREADS_LINEAGE = 10 # empirically found; hit the rate limit with 20 workers
|
|
58
65
|
_RETRY_NUMBER = 1
|
|
59
66
|
_RETRY_BASE_MS = 60_000
|
|
@@ -210,18 +217,35 @@ class SigmaClient(APIClient):
|
|
|
210
217
|
return contexts
|
|
211
218
|
|
|
212
219
|
def _get_all_lineages(self, elements: list[dict]) -> Iterator[dict]:
|
|
220
|
+
"""
|
|
221
|
+
The safe mode is temporarily modified to include 403 errors.
|
|
222
|
+
|
|
223
|
+
Due to concurrency issues, we force a refresh of the token in hopes that
|
|
224
|
+
the lineage extraction takes less than the token expiration time of
|
|
225
|
+
1 hour.
|
|
226
|
+
"""
|
|
227
|
+
safe_mode = self._safe_mode
|
|
228
|
+
self._safe_mode = SIGMA_SAFE_MODE_LINEAGE
|
|
229
|
+
|
|
213
230
|
lineage_context = self._lineage_context(elements)
|
|
214
231
|
|
|
232
|
+
self._auth.refresh_token()
|
|
233
|
+
|
|
215
234
|
with ThreadPoolExecutor(max_workers=_THREADS_LINEAGE) as executor:
|
|
216
235
|
results = executor.map(self._get_lineage, lineage_context)
|
|
217
236
|
|
|
218
237
|
for lineage in results:
|
|
238
|
+
if not lineage.lineage:
|
|
239
|
+
continue
|
|
240
|
+
|
|
219
241
|
yield {
|
|
220
242
|
**lineage.lineage,
|
|
221
243
|
"workbook_id": lineage.context.workbook_id,
|
|
222
244
|
"element_id": lineage.context.element_id,
|
|
223
245
|
}
|
|
224
246
|
|
|
247
|
+
self._safe_mode = safe_mode
|
|
248
|
+
|
|
225
249
|
@staticmethod
|
|
226
250
|
def _yield_deduplicated_queries(
|
|
227
251
|
queries: Iterable[dict], workbook_id: str
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.36
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,16 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.36 - 2025-08-04
|
|
219
|
+
|
|
220
|
+
* Sigma:
|
|
221
|
+
* Refresh token before lineage extraction
|
|
222
|
+
* Disregard 403 errors during lineage extraction
|
|
223
|
+
|
|
224
|
+
## 0.24.35 - 2025-07-29
|
|
225
|
+
|
|
226
|
+
* Coalesce - Fix pagination issue
|
|
227
|
+
|
|
218
228
|
## 0.24.34 - 2025-07-02
|
|
219
229
|
|
|
220
230
|
* SQLServer: multiple databases
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=HAHFgRYnv-pbsKwbHrRCrWoLpsqr8mg7Fp7tDsBsN9E,19030
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -76,12 +76,10 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
76
76
|
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
77
77
|
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
78
78
|
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
79
|
-
castor_extractor/transformation/coalesce/client/client.py,sha256=
|
|
79
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=3YB82ibaumeSRd510mlrPXKsWefV3lHQQVis9oEK-LQ,6133
|
|
80
80
|
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
81
81
|
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
82
|
-
castor_extractor/transformation/coalesce/client/
|
|
83
|
-
castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
|
|
84
|
-
castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
|
|
82
|
+
castor_extractor/transformation/coalesce/client/pagination.py,sha256=zynyWCMEzUQ7HA1Q5AP4BAOmxRQI6NA5jCPEo0lHn44,705
|
|
85
83
|
castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
|
|
86
84
|
castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
|
|
87
85
|
castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZA2DrgjP7Gi5epAPE,5684
|
|
@@ -108,7 +106,7 @@ castor_extractor/utils/client/api/auth.py,sha256=lq0K3UEl1vwIIa_vKTdlpIQPdE5K1-5
|
|
|
108
106
|
castor_extractor/utils/client/api/auth_test.py,sha256=LlyXytnatg6ZzR4Zkvzk0BH99FYhHX7qn_nyr2MSnDI,1305
|
|
109
107
|
castor_extractor/utils/client/api/client.py,sha256=qmj7KoNqt6F-cmpdaMiz_aVxzwMCgbDNcgzXSbCdu1Y,5183
|
|
110
108
|
castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
|
|
111
|
-
castor_extractor/utils/client/api/pagination.py,sha256=
|
|
109
|
+
castor_extractor/utils/client/api/pagination.py,sha256=tNL89bvgnMJd0ajJA07wTTReH3PJOQm3xsa93SKHFss,2499
|
|
112
110
|
castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
|
|
113
111
|
castor_extractor/utils/client/api/safe_request.py,sha256=5pvI2WPRDtitX9F1aYcXTIMPNmDikRK9dKTD3ctoeoQ,1774
|
|
114
112
|
castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
|
|
@@ -273,7 +271,7 @@ castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLG
|
|
|
273
271
|
castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
|
|
274
272
|
castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
|
|
275
273
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
|
|
276
|
-
castor_extractor/visualization/sigma/client/client.py,sha256=
|
|
274
|
+
castor_extractor/visualization/sigma/client/client.py,sha256=ifCxhZ8-p9u7MnJRE8EYF_YP_G3REr_PELTSrtHiZwk,10099
|
|
277
275
|
castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
|
|
278
276
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
279
277
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
|
|
@@ -430,8 +428,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLx
|
|
|
430
428
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
431
429
|
castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
|
|
432
430
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
433
|
-
castor_extractor-0.24.
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
436
|
-
castor_extractor-0.24.
|
|
437
|
-
castor_extractor-0.24.
|
|
431
|
+
castor_extractor-0.24.36.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
432
|
+
castor_extractor-0.24.36.dist-info/METADATA,sha256=m14Hk_AYJo9_bZE7IOb6U_LdhG8JfXnVqisiJHjgMS4,26483
|
|
433
|
+
castor_extractor-0.24.36.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
434
|
+
castor_extractor-0.24.36.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
435
|
+
castor_extractor-0.24.36.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
NodeIDToNamesMapping = dict[str, set[str]]
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
from ....utils import SerializedAsset
|
|
2
|
-
from .type import NodeIDToNamesMapping
|
|
3
|
-
|
|
4
|
-
_NULL_SUFFIX = ": Null"
|
|
5
|
-
_UNIQUE_SUFFIX = ": Unique"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def is_test(
|
|
9
|
-
query_result: dict,
|
|
10
|
-
node_id: str,
|
|
11
|
-
test_names: NodeIDToNamesMapping,
|
|
12
|
-
column_names: NodeIDToNamesMapping,
|
|
13
|
-
) -> bool:
|
|
14
|
-
"""
|
|
15
|
-
checks whether a query result is a test result or not.
|
|
16
|
-
|
|
17
|
-
all this implementation can soon be replaced by checking whether
|
|
18
|
-
query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
|
|
19
|
-
"""
|
|
20
|
-
# test scoped on the node (table)
|
|
21
|
-
result_name = query_result["name"]
|
|
22
|
-
if result_name in test_names.get(node_id, {}):
|
|
23
|
-
return True
|
|
24
|
-
|
|
25
|
-
# test scoped on the column
|
|
26
|
-
if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
|
|
27
|
-
_UNIQUE_SUFFIX
|
|
28
|
-
):
|
|
29
|
-
column_name = result_name.split(":")[0]
|
|
30
|
-
if column_name in column_names.get(node_id, {}):
|
|
31
|
-
return True
|
|
32
|
-
return False
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
36
|
-
"""mapping nodeID: set(testName)"""
|
|
37
|
-
mapping: dict[str, set[str]] = {}
|
|
38
|
-
for node in nodes:
|
|
39
|
-
node_id = node["id"]
|
|
40
|
-
tests = node.get("metadata", {}).get("appliedNodeTests", [])
|
|
41
|
-
mapping[node_id] = {test["name"] for test in tests}
|
|
42
|
-
return mapping
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
46
|
-
"""mapping nodeID: set(columnNames)"""
|
|
47
|
-
mapping: dict[str, set[str]] = {}
|
|
48
|
-
for node in nodes:
|
|
49
|
-
node_id = node["id"]
|
|
50
|
-
columns = node.get("metadata", {}).get("columns", [])
|
|
51
|
-
mapping[node_id] = {column["name"] for column in columns}
|
|
52
|
-
return mapping
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
from .utils import is_test
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def test_is_test():
|
|
5
|
-
test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
|
|
6
|
-
column_names = {"some-uuid": {"carthago", "delenda", "est"}}
|
|
7
|
-
|
|
8
|
-
happy_node_test = is_test(
|
|
9
|
-
query_result={"name": "check-mirrors"},
|
|
10
|
-
node_id="some-uuid",
|
|
11
|
-
test_names=test_names,
|
|
12
|
-
column_names=column_names,
|
|
13
|
-
)
|
|
14
|
-
assert happy_node_test is True
|
|
15
|
-
|
|
16
|
-
unknown_node_test = is_test(
|
|
17
|
-
query_result={"name": "check-engine"},
|
|
18
|
-
node_id="some-uuid",
|
|
19
|
-
test_names=test_names,
|
|
20
|
-
column_names=column_names,
|
|
21
|
-
)
|
|
22
|
-
assert unknown_node_test is False
|
|
23
|
-
|
|
24
|
-
happy_column_test_unique = is_test(
|
|
25
|
-
query_result={"name": "carthago: Unique"},
|
|
26
|
-
node_id="some-uuid",
|
|
27
|
-
test_names=test_names,
|
|
28
|
-
column_names=column_names,
|
|
29
|
-
)
|
|
30
|
-
assert happy_column_test_unique is True
|
|
31
|
-
|
|
32
|
-
happy_column_test_null = is_test(
|
|
33
|
-
query_result={"name": "carthago: Null"},
|
|
34
|
-
node_id="some-uuid",
|
|
35
|
-
test_names=test_names,
|
|
36
|
-
column_names=column_names,
|
|
37
|
-
)
|
|
38
|
-
assert happy_column_test_null is True
|
|
39
|
-
|
|
40
|
-
unknown_column_test = is_test(
|
|
41
|
-
query_result={"name": "rome: Unique"},
|
|
42
|
-
node_id="some-uuid",
|
|
43
|
-
test_names=test_names,
|
|
44
|
-
column_names=column_names,
|
|
45
|
-
)
|
|
46
|
-
assert unknown_column_test is False
|
|
47
|
-
|
|
48
|
-
unknown_node_id_test = is_test(
|
|
49
|
-
query_result={"name": "whatever: Unique"},
|
|
50
|
-
node_id="unknown-uuid",
|
|
51
|
-
test_names=test_names,
|
|
52
|
-
column_names=column_names,
|
|
53
|
-
)
|
|
54
|
-
assert unknown_node_id_test is False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|