castor-extractor 0.24.34__py3-none-any.whl → 0.24.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.36 - 2025-08-04
4
+
5
+ * Sigma:
6
+ * Refresh token before lineage extraction
7
+ * Disregard 403 errors during lineage extraction
8
+
9
+ ## 0.24.35 - 2025-07-29
10
+
11
+ * Coalesce - Fix pagination issue
12
+
3
13
  ## 0.24.34 - 2025-07-02
4
14
 
5
15
  * SQLServer: multiple databases
@@ -1,31 +1,47 @@
1
1
  import logging
2
+ from functools import partial
2
3
  from http import HTTPStatus
3
- from typing import Iterator, Optional
4
+ from typing import Callable, Optional
4
5
 
5
- from requests import ConnectionError
6
+ from pydantic import ValidationError
6
7
 
7
8
  from ....utils import (
8
9
  APIClient,
9
10
  BearerAuth,
10
11
  RequestSafeMode,
11
12
  SerializedAsset,
13
+ fetch_all_pages,
12
14
  )
13
15
  from ..assets import CoalesceAsset, CoalesceQualityAsset
14
16
  from .credentials import CoalesceCredentials
15
17
  from .endpoint import (
16
18
  CoalesceEndpointFactory,
17
19
  )
18
- from .type import NodeIDToNamesMapping
19
- from .utils import column_names_per_node, is_test, test_names_per_node
20
+ from .pagination import CoalescePagination
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ COALESCE_PAGE_SIZE = 300
26
+ COALESCE_PAGE_SIZE_RUN_RESULTS = 1_000
27
+
28
+ COALESCE_TIMEOUT_SECONDS = 90
20
29
 
21
- _LIMIT_MAX = 1_000
22
30
  _MAX_ERRORS = 200
23
31
 
24
- logger = logging.getLogger(__name__)
32
+ COALESCE_SAFE_MODE = RequestSafeMode(
33
+ status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
34
+ max_errors=_MAX_ERRORS,
35
+ )
25
36
 
26
37
 
27
- def _run_result_payload(result: dict, query_result: dict) -> dict:
38
+ def _run_result_payload(
39
+ environment_id: str,
40
+ result: dict,
41
+ query_result: dict,
42
+ ) -> dict:
28
43
  return {
44
+ "environment_id": environment_id,
29
45
  "node_id": result["nodeID"],
30
46
  "node_name": result["name"],
31
47
  "test_name": query_result["name"],
@@ -37,13 +53,6 @@ def _run_result_payload(result: dict, query_result: dict) -> dict:
37
53
  }
38
54
 
39
55
 
40
- COALESCE_SAFE_MODE = RequestSafeMode(
41
- status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
42
- max_errors=_MAX_ERRORS,
43
- )
44
- COALESCE_TIMEOUT_SECONDS = 90
45
-
46
-
47
56
  class CoalesceBearerAuth(BearerAuth):
48
57
  """Bearer Authentication for Coalesce"""
49
58
 
@@ -69,65 +78,74 @@ class CoalesceClient(APIClient):
69
78
  timeout=COALESCE_TIMEOUT_SECONDS,
70
79
  )
71
80
 
72
- def _fetch_environments(self) -> Iterator[dict]:
73
- endpoint = CoalesceEndpointFactory.environments()
74
- result = self._get(endpoint=endpoint)
75
- return result["data"]
76
-
77
- def _node_details(self, environment_id: int, node_id: str) -> dict:
78
- endpoint = CoalesceEndpointFactory.nodes(
79
- environment_id=environment_id, node_id=node_id
81
+ def _get_paginated(
82
+ self,
83
+ endpoint: str,
84
+ limit: int = COALESCE_PAGE_SIZE,
85
+ params: Optional[dict] = None,
86
+ ) -> Callable:
87
+ return partial(
88
+ self._get,
89
+ retry_on_timeout=False, # explained in the docstring
90
+ endpoint=endpoint,
91
+ params={
92
+ "limit": limit,
93
+ **(params or dict()),
94
+ },
80
95
  )
81
- return self._get(endpoint=endpoint)
96
+
97
+ def _fetch_environments(self) -> SerializedAsset:
98
+ endpoint = CoalesceEndpointFactory.environments()
99
+ request = self._get_paginated(endpoint=endpoint)
100
+ result = fetch_all_pages(request, CoalescePagination)
101
+ return list(result)
82
102
 
83
103
  def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
84
104
  endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
85
- result = self._get(endpoint=endpoint)
86
- nodes: list[dict] = []
87
- for node in result["data"]:
88
- try:
89
- details = self._node_details(environment_id, node["id"])
90
- nodes.append({**node, **details})
91
- except ConnectionError as e:
92
- node_id = node["id"]
93
- message = f"ConnectionError, environment: {environment_id}, node: {node_id}"
94
- logger.warning(message)
95
- raise e
96
- return nodes
105
+ request = self._get_paginated(
106
+ endpoint=endpoint,
107
+ params={"detail": "true"},
108
+ )
109
+ result = fetch_all_pages(request, CoalescePagination)
110
+ return [
111
+ {
112
+ **node,
113
+ "environment_id": environment_id,
114
+ }
115
+ for node in result
116
+ ]
97
117
 
98
118
  def _fetch_all_nodes(self) -> SerializedAsset:
119
+ environments = self._fetch_environments()
120
+ total = len(environments)
99
121
  nodes: list[dict] = []
100
- for environment in self._fetch_environments():
101
- environment_id = environment["id"]
102
- nodes.extend(self._fetch_env_nodes(environment_id))
122
+
123
+ for index, env in enumerate(environments):
124
+ env_id = env["id"]
125
+ logger.info(f"Fetching nodes for env #{env_id} - {index}/{total}")
126
+ try:
127
+ nodes.extend(self._fetch_env_nodes(env_id))
128
+ except ValidationError as e:
129
+ # 500 Server Error: Internal Server Error on Coalesce API
130
+ logger.warning(
131
+ f"Skipping nodes for {env_id} due to the following Error: {e}"
132
+ )
133
+ logger.info(f"{len(nodes)} nodes extracted so far")
103
134
  return nodes
104
135
 
105
136
  def _fetch_runs(self, starting_from: str) -> SerializedAsset:
106
- """
107
- fetch runs, per environment;
108
- we break per environment to lower the chance of exceeding the 1k limit
109
- """
110
- runs: list[dict] = []
111
- for environment in self._fetch_environments():
112
- environment_id = environment["id"]
113
- runs.extend(
114
- self._fetch_recent_runs_per_env(environment_id, starting_from)
115
- )
116
- return runs
117
-
118
- def _fetch_recent_runs_per_env(
119
- self, environment_id: int, starting_from: str
120
- ) -> SerializedAsset:
121
137
  endpoint = CoalesceEndpointFactory.runs()
122
138
  params = {
123
- "environmentID": environment_id,
124
- "limit": _LIMIT_MAX,
125
139
  "orderBy": "runEndTime",
126
140
  "orderByDirection": "asc",
127
141
  "startingFrom": starting_from,
128
142
  }
129
- result = self._get(endpoint=endpoint, params=params)
130
- return result["data"]
143
+ request = self._get_paginated(
144
+ endpoint=endpoint,
145
+ params=params,
146
+ limit=COALESCE_PAGE_SIZE_RUN_RESULTS,
147
+ )
148
+ return list(fetch_all_pages(request, CoalescePagination))
131
149
 
132
150
  def _fetch_run_results(self, run_id: str) -> SerializedAsset:
133
151
  endpoint = CoalesceEndpointFactory.run_results(run_id)
@@ -136,51 +154,37 @@ class CoalesceClient(APIClient):
136
154
 
137
155
  def _run_results_by_run(
138
156
  self,
157
+ environment_id: str,
139
158
  run_id: str,
140
- test_names: NodeIDToNamesMapping,
141
- column_names: NodeIDToNamesMapping,
142
159
  ) -> SerializedAsset:
143
160
  run_results: list[dict] = []
144
161
  for result in self._fetch_run_results(run_id):
145
- node_id = result["nodeID"]
146
162
  for query_result in result["queryResults"]:
147
- _is_test = is_test(
163
+ if query_result["type"] != "sqlTest":
164
+ continue
165
+ run_result = _run_result_payload(
166
+ environment_id,
167
+ result,
148
168
  query_result,
149
- node_id,
150
- test_names,
151
- column_names,
152
169
  )
153
- if not _is_test:
154
- continue
155
- run_result = _run_result_payload(result, query_result)
156
170
  run_results.append(run_result)
157
171
  return run_results
158
172
 
159
- def _run_results_by_env(
160
- self, environment_id: int, starting_from: str
173
+ def _fetch_all_run_results(
174
+ self,
175
+ starting_from: str,
161
176
  ) -> SerializedAsset:
162
177
  run_results: list[dict] = []
163
- nodes = self._fetch_env_nodes(environment_id)
164
- test_names = test_names_per_node(nodes)
165
- column_names = column_names_per_node(nodes)
166
- runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
167
178
 
168
- for run in runs:
169
- run_id = run["id"]
170
- _results = self._run_results_by_run(
171
- run_id, test_names, column_names
172
- )
173
- run_results.extend(_results)
174
- return run_results
175
-
176
- def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
177
- run_results: list[dict] = []
178
-
179
- for environment in self._fetch_environments():
180
- environment_id = environment["id"]
181
- _results = self._run_results_by_env(environment_id, starting_from)
182
- run_results.extend(_results)
179
+ runs = self._fetch_runs(starting_from)
180
+ total = len(runs)
183
181
 
182
+ for index, run in enumerate(runs):
183
+ logger.info(f"Extracting run results ({index}/{total})")
184
+ run_id = run["id"]
185
+ environment_id = run["environmentID"]
186
+ current_results = self._run_results_by_run(environment_id, run_id)
187
+ run_results.extend(current_results)
184
188
  return run_results
185
189
 
186
190
  def fetch(
@@ -0,0 +1,26 @@
1
+ from typing import Optional, Union
2
+
3
+ from ....utils import PaginationModel
4
+
5
+
6
+ class CoalescePagination(PaginationModel):
7
+ """
8
+ Class to handle paginated results for Coalesce
9
+ See their documentation here
10
+ https://docs.coalesce.io/docs/api
11
+ """
12
+
13
+ data: list
14
+ next: Union[Optional[str], Optional[int]] = None
15
+
16
+ def is_last(self) -> bool:
17
+ """Stopping condition for the pagination"""
18
+ return self.next is None
19
+
20
+ def next_page_payload(self):
21
+ """Payload enabling to generate the request for the next page"""
22
+ return {"startingFrom": self.next}
23
+
24
+ def page_results(self) -> list:
25
+ """List of results of the current page"""
26
+ return self.data
@@ -66,16 +66,19 @@ def fetch_all_pages(
66
66
  """
67
67
  page_number = 1
68
68
  response_payload = request()
69
+
69
70
  paginated_response = pagination_model(**response_payload)
71
+
70
72
  while not paginated_response.is_last():
71
73
  logger.debug(f"Fetching page number {page_number}")
72
74
  yield from paginated_response.page_results()
73
75
  next_page_parameters = paginated_response.next_page_parameters()
74
- new_request = partial(request, **next_page_parameters)
76
+ request_with_pagination = partial(request, **next_page_parameters)
75
77
  if rate_limit:
76
78
  sleep(rate_limit)
77
79
  paginated_response = pagination_model(
78
- current_page_payload=next_page_parameters, **new_request()
80
+ current_page_payload=next_page_parameters,
81
+ **request_with_pagination(),
79
82
  )
80
83
  page_number += 1
81
84
 
@@ -54,6 +54,13 @@ SIGMA_SAFE_MODE = RequestSafeMode(
54
54
  max_errors=_VOLUME_IGNORED,
55
55
  status_codes=_IGNORED_ERROR_CODES,
56
56
  )
57
+ SIGMA_SAFE_MODE_LINEAGE = RequestSafeMode(
58
+ max_errors=_VOLUME_IGNORED,
59
+ status_codes=(
60
+ *_IGNORED_ERROR_CODES,
61
+ HTTPStatus.FORBIDDEN,
62
+ ),
63
+ )
57
64
  _THREADS_LINEAGE = 10 # empirically found; hit the rate limit with 20 workers
58
65
  _RETRY_NUMBER = 1
59
66
  _RETRY_BASE_MS = 60_000
@@ -210,18 +217,35 @@ class SigmaClient(APIClient):
210
217
  return contexts
211
218
 
212
219
  def _get_all_lineages(self, elements: list[dict]) -> Iterator[dict]:
220
+ """
221
+ The safe mode is temporarily modified to include 403 errors.
222
+
223
+ Due to concurrency issues, we force a refresh of the token in hopes that
224
+ the lineage extraction takes less than the token expiration time of
225
+ 1 hour.
226
+ """
227
+ safe_mode = self._safe_mode
228
+ self._safe_mode = SIGMA_SAFE_MODE_LINEAGE
229
+
213
230
  lineage_context = self._lineage_context(elements)
214
231
 
232
+ self._auth.refresh_token()
233
+
215
234
  with ThreadPoolExecutor(max_workers=_THREADS_LINEAGE) as executor:
216
235
  results = executor.map(self._get_lineage, lineage_context)
217
236
 
218
237
  for lineage in results:
238
+ if not lineage.lineage:
239
+ continue
240
+
219
241
  yield {
220
242
  **lineage.lineage,
221
243
  "workbook_id": lineage.context.workbook_id,
222
244
  "element_id": lineage.context.element_id,
223
245
  }
224
246
 
247
+ self._safe_mode = safe_mode
248
+
225
249
  @staticmethod
226
250
  def _yield_deduplicated_queries(
227
251
  queries: Iterable[dict], workbook_id: str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.34
3
+ Version: 0.24.36
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -215,6 +215,16 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.36 - 2025-08-04
219
+
220
+ * Sigma:
221
+ * Refresh token before lineage extraction
222
+ * Disregard 403 errors during lineage extraction
223
+
224
+ ## 0.24.35 - 2025-07-29
225
+
226
+ * Coalesce - Fix pagination issue
227
+
218
228
  ## 0.24.34 - 2025-07-02
219
229
 
220
230
  * SQLServer: multiple databases
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=4PQMZjH-5BKSERREUHivWM7KKl_PpIDieFYH2PeRmGQ,18840
1
+ CHANGELOG.md,sha256=HAHFgRYnv-pbsKwbHrRCrWoLpsqr8mg7Fp7tDsBsN9E,19030
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -76,12 +76,10 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
76
76
  castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
77
77
  castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
78
78
  castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
79
- castor_extractor/transformation/coalesce/client/client.py,sha256=7EVJDDxnIm5_uMHLFZ2PD6JzfebVglKST9IiURwn4vs,6524
79
+ castor_extractor/transformation/coalesce/client/client.py,sha256=3YB82ibaumeSRd510mlrPXKsWefV3lHQQVis9oEK-LQ,6133
80
80
  castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
81
81
  castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
82
- castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
83
- castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
84
- castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
82
+ castor_extractor/transformation/coalesce/client/pagination.py,sha256=zynyWCMEzUQ7HA1Q5AP4BAOmxRQI6NA5jCPEo0lHn44,705
85
83
  castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
86
84
  castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
87
85
  castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZA2DrgjP7Gi5epAPE,5684
@@ -108,7 +106,7 @@ castor_extractor/utils/client/api/auth.py,sha256=lq0K3UEl1vwIIa_vKTdlpIQPdE5K1-5
108
106
  castor_extractor/utils/client/api/auth_test.py,sha256=LlyXytnatg6ZzR4Zkvzk0BH99FYhHX7qn_nyr2MSnDI,1305
109
107
  castor_extractor/utils/client/api/client.py,sha256=qmj7KoNqt6F-cmpdaMiz_aVxzwMCgbDNcgzXSbCdu1Y,5183
110
108
  castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
111
- castor_extractor/utils/client/api/pagination.py,sha256=ph5TYqPiyFGgygsIhCATAHPIQ9UJNZyiTcqlyRdGEno,2460
109
+ castor_extractor/utils/client/api/pagination.py,sha256=tNL89bvgnMJd0ajJA07wTTReH3PJOQm3xsa93SKHFss,2499
112
110
  castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
113
111
  castor_extractor/utils/client/api/safe_request.py,sha256=5pvI2WPRDtitX9F1aYcXTIMPNmDikRK9dKTD3ctoeoQ,1774
114
112
  castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
@@ -273,7 +271,7 @@ castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLG
273
271
  castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
274
272
  castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
275
273
  castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
276
- castor_extractor/visualization/sigma/client/client.py,sha256=ZE44k5klBVnc5lld3tpjuKGeSdFmlJ0wr5DOB4pEfco,9446
274
+ castor_extractor/visualization/sigma/client/client.py,sha256=ifCxhZ8-p9u7MnJRE8EYF_YP_G3REr_PELTSrtHiZwk,10099
277
275
  castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
278
276
  castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
279
277
  castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
@@ -430,8 +428,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLx
430
428
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
431
429
  castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
432
430
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
433
- castor_extractor-0.24.34.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
434
- castor_extractor-0.24.34.dist-info/METADATA,sha256=-xB8vdjxDHFkDYbyAlL8L-nEbQMqs44GVzN5wgvKfjs,26293
435
- castor_extractor-0.24.34.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
436
- castor_extractor-0.24.34.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
437
- castor_extractor-0.24.34.dist-info/RECORD,,
431
+ castor_extractor-0.24.36.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
432
+ castor_extractor-0.24.36.dist-info/METADATA,sha256=m14Hk_AYJo9_bZE7IOb6U_LdhG8JfXnVqisiJHjgMS4,26483
433
+ castor_extractor-0.24.36.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
434
+ castor_extractor-0.24.36.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
435
+ castor_extractor-0.24.36.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- NodeIDToNamesMapping = dict[str, set[str]]
@@ -1,52 +0,0 @@
1
- from ....utils import SerializedAsset
2
- from .type import NodeIDToNamesMapping
3
-
4
- _NULL_SUFFIX = ": Null"
5
- _UNIQUE_SUFFIX = ": Unique"
6
-
7
-
8
- def is_test(
9
- query_result: dict,
10
- node_id: str,
11
- test_names: NodeIDToNamesMapping,
12
- column_names: NodeIDToNamesMapping,
13
- ) -> bool:
14
- """
15
- checks whether a query result is a test result or not.
16
-
17
- all this implementation can soon be replaced by checking whether
18
- query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
19
- """
20
- # test scoped on the node (table)
21
- result_name = query_result["name"]
22
- if result_name in test_names.get(node_id, {}):
23
- return True
24
-
25
- # test scoped on the column
26
- if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
27
- _UNIQUE_SUFFIX
28
- ):
29
- column_name = result_name.split(":")[0]
30
- if column_name in column_names.get(node_id, {}):
31
- return True
32
- return False
33
-
34
-
35
- def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
36
- """mapping nodeID: set(testName)"""
37
- mapping: dict[str, set[str]] = {}
38
- for node in nodes:
39
- node_id = node["id"]
40
- tests = node.get("metadata", {}).get("appliedNodeTests", [])
41
- mapping[node_id] = {test["name"] for test in tests}
42
- return mapping
43
-
44
-
45
- def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
46
- """mapping nodeID: set(columnNames)"""
47
- mapping: dict[str, set[str]] = {}
48
- for node in nodes:
49
- node_id = node["id"]
50
- columns = node.get("metadata", {}).get("columns", [])
51
- mapping[node_id] = {column["name"] for column in columns}
52
- return mapping
@@ -1,54 +0,0 @@
1
- from .utils import is_test
2
-
3
-
4
- def test_is_test():
5
- test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
6
- column_names = {"some-uuid": {"carthago", "delenda", "est"}}
7
-
8
- happy_node_test = is_test(
9
- query_result={"name": "check-mirrors"},
10
- node_id="some-uuid",
11
- test_names=test_names,
12
- column_names=column_names,
13
- )
14
- assert happy_node_test is True
15
-
16
- unknown_node_test = is_test(
17
- query_result={"name": "check-engine"},
18
- node_id="some-uuid",
19
- test_names=test_names,
20
- column_names=column_names,
21
- )
22
- assert unknown_node_test is False
23
-
24
- happy_column_test_unique = is_test(
25
- query_result={"name": "carthago: Unique"},
26
- node_id="some-uuid",
27
- test_names=test_names,
28
- column_names=column_names,
29
- )
30
- assert happy_column_test_unique is True
31
-
32
- happy_column_test_null = is_test(
33
- query_result={"name": "carthago: Null"},
34
- node_id="some-uuid",
35
- test_names=test_names,
36
- column_names=column_names,
37
- )
38
- assert happy_column_test_null is True
39
-
40
- unknown_column_test = is_test(
41
- query_result={"name": "rome: Unique"},
42
- node_id="some-uuid",
43
- test_names=test_names,
44
- column_names=column_names,
45
- )
46
- assert unknown_column_test is False
47
-
48
- unknown_node_id_test = is_test(
49
- query_result={"name": "whatever: Unique"},
50
- node_id="unknown-uuid",
51
- test_names=test_names,
52
- column_names=column_names,
53
- )
54
- assert unknown_node_id_test is False