castor-extractor 0.24.7__py3-none-any.whl → 0.24.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/transformation/__init__.py +0 -0
- castor_extractor/transformation/coalesce/__init__.py +2 -0
- castor_extractor/transformation/coalesce/assets.py +18 -0
- castor_extractor/transformation/coalesce/client/__init__.py +2 -0
- castor_extractor/transformation/coalesce/client/client.py +180 -0
- castor_extractor/transformation/coalesce/client/credentials.py +23 -0
- castor_extractor/transformation/coalesce/client/endpoint.py +42 -0
- castor_extractor/transformation/coalesce/client/type.py +1 -0
- castor_extractor/transformation/coalesce/client/utils.py +52 -0
- castor_extractor/transformation/coalesce/client/utils_test.py +54 -0
- castor_extractor/visualization/tableau/client/client_metadata_api.py +29 -1
- castor_extractor/visualization/tableau/client/client_metadata_api_test.py +31 -0
- castor_extractor/warehouse/databricks/format.py +1 -1
- {castor_extractor-0.24.7.dist-info → castor_extractor-0.24.9.dist-info}/METADATA +9 -1
- {castor_extractor-0.24.7.dist-info → castor_extractor-0.24.9.dist-info}/RECORD +19 -8
- {castor_extractor-0.24.7.dist-info → castor_extractor-0.24.9.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.7.dist-info → castor_extractor-0.24.9.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.7.dist-info → castor_extractor-0.24.9.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.9 - 2025-04-16
|
|
4
|
+
|
|
5
|
+
* Introduce API client for **Coalesce**
|
|
6
|
+
|
|
7
|
+
## 0.24.8 - 2025-04-16
|
|
8
|
+
|
|
9
|
+
* Tableau - remove duplicates introduced by `offset` pagination
|
|
10
|
+
|
|
3
11
|
## 0.24.7 - 2025-04-07
|
|
4
12
|
|
|
5
13
|
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from ...types import ExternalAsset
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CoalesceAsset(ExternalAsset):
|
|
5
|
+
"""Coalesce assets"""
|
|
6
|
+
|
|
7
|
+
NODES = "nodes"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CoalesceQualityAsset(ExternalAsset):
|
|
11
|
+
"""
|
|
12
|
+
Coalesce Quality Assets
|
|
13
|
+
Remark: having a dedicated Enum for Quality simplifies the process of
|
|
14
|
+
searching pushed files
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
NODES = "nodes"
|
|
18
|
+
RUN_RESULTS = "run_results"
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from http import HTTPStatus
|
|
2
|
+
from typing import Iterator, Optional
|
|
3
|
+
|
|
4
|
+
from ....utils import APIClient, BearerAuth, RequestSafeMode, SerializedAsset
|
|
5
|
+
from ..assets import CoalesceAsset, CoalesceQualityAsset
|
|
6
|
+
from .credentials import CoalesceCredentials
|
|
7
|
+
from .endpoint import (
|
|
8
|
+
CoalesceEndpointFactory,
|
|
9
|
+
)
|
|
10
|
+
from .type import NodeIDToNamesMapping
|
|
11
|
+
from .utils import column_names_per_node, is_test, test_names_per_node
|
|
12
|
+
|
|
13
|
+
_LIMIT_MAX = 1_000
|
|
14
|
+
_MAX_ERRORS = 50
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
18
|
+
return {
|
|
19
|
+
"node_id": result["nodeID"],
|
|
20
|
+
"node_name": result["name"],
|
|
21
|
+
"test_name": query_result["name"],
|
|
22
|
+
"start_time": query_result["startTime"],
|
|
23
|
+
"end_time": query_result["endTime"],
|
|
24
|
+
"status": query_result["status"],
|
|
25
|
+
"success": query_result["success"],
|
|
26
|
+
"isRunning": query_result["isRunning"],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
COALESCE_SAFE_MODE = RequestSafeMode(
|
|
31
|
+
status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
|
|
32
|
+
max_errors=_MAX_ERRORS,
|
|
33
|
+
)
|
|
34
|
+
COALESCE_TIMEOUT_SECONDS = 90
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CoalesceBearerAuth(BearerAuth):
|
|
38
|
+
"""Bearer Authentication for Coalesce"""
|
|
39
|
+
|
|
40
|
+
def fetch_token(self) -> Optional[str]:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def __init__(self, token: str):
|
|
44
|
+
self._token = token
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CoalesceClient(APIClient):
|
|
48
|
+
"""REST API client to extract data from Coalesce"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
credentials: CoalesceCredentials,
|
|
53
|
+
):
|
|
54
|
+
auth = CoalesceBearerAuth(token=credentials.token)
|
|
55
|
+
super().__init__(
|
|
56
|
+
host=credentials.host,
|
|
57
|
+
auth=auth,
|
|
58
|
+
safe_mode=COALESCE_SAFE_MODE,
|
|
59
|
+
timeout=COALESCE_TIMEOUT_SECONDS,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def _fetch_environments(self) -> Iterator[dict]:
|
|
63
|
+
endpoint = CoalesceEndpointFactory.environments()
|
|
64
|
+
result = self._get(endpoint=endpoint)
|
|
65
|
+
return result["data"]
|
|
66
|
+
|
|
67
|
+
def _node_details(self, environment_id: int, node_id: str) -> dict:
|
|
68
|
+
endpoint = CoalesceEndpointFactory.nodes(
|
|
69
|
+
environment_id=environment_id, node_id=node_id
|
|
70
|
+
)
|
|
71
|
+
return self._get(endpoint=endpoint)
|
|
72
|
+
|
|
73
|
+
def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
|
|
74
|
+
endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
|
|
75
|
+
result = self._get(endpoint=endpoint)
|
|
76
|
+
nodes: list[dict] = []
|
|
77
|
+
for node in result["data"]:
|
|
78
|
+
details = self._node_details(environment_id, node["id"])
|
|
79
|
+
nodes.append({**node, **details})
|
|
80
|
+
return nodes
|
|
81
|
+
|
|
82
|
+
def _fetch_all_nodes(self) -> SerializedAsset:
|
|
83
|
+
nodes: list[dict] = []
|
|
84
|
+
for environment in self._fetch_environments():
|
|
85
|
+
environment_id = environment["id"]
|
|
86
|
+
nodes.extend(self._fetch_env_nodes(environment_id))
|
|
87
|
+
return nodes
|
|
88
|
+
|
|
89
|
+
def _fetch_runs(self, starting_from: str) -> SerializedAsset:
|
|
90
|
+
"""
|
|
91
|
+
fetch runs, per environment;
|
|
92
|
+
we break per environment to lower the chance of exceeding the 1k limit
|
|
93
|
+
"""
|
|
94
|
+
runs: list[dict] = []
|
|
95
|
+
for environment in self._fetch_environments():
|
|
96
|
+
environment_id = environment["id"]
|
|
97
|
+
runs.extend(
|
|
98
|
+
self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
99
|
+
)
|
|
100
|
+
return runs
|
|
101
|
+
|
|
102
|
+
def _fetch_recent_runs_per_env(
|
|
103
|
+
self, environment_id: int, starting_from: str
|
|
104
|
+
) -> SerializedAsset:
|
|
105
|
+
endpoint = CoalesceEndpointFactory.runs()
|
|
106
|
+
params = {
|
|
107
|
+
"environmentID": environment_id,
|
|
108
|
+
"limit": _LIMIT_MAX,
|
|
109
|
+
"orderBy": "runEndTime",
|
|
110
|
+
"orderByDirection": "asc",
|
|
111
|
+
"startingFrom": starting_from,
|
|
112
|
+
}
|
|
113
|
+
result = self._get(endpoint=endpoint, params=params)
|
|
114
|
+
return result["data"]
|
|
115
|
+
|
|
116
|
+
def _fetch_run_results(self, run_id: str) -> SerializedAsset:
|
|
117
|
+
endpoint = CoalesceEndpointFactory.run_results(run_id)
|
|
118
|
+
result = self._get(endpoint=endpoint)
|
|
119
|
+
return result["data"]
|
|
120
|
+
|
|
121
|
+
def _run_results_by_run(
|
|
122
|
+
self,
|
|
123
|
+
run_id: str,
|
|
124
|
+
test_names: NodeIDToNamesMapping,
|
|
125
|
+
column_names: NodeIDToNamesMapping,
|
|
126
|
+
) -> SerializedAsset:
|
|
127
|
+
run_results: list[dict] = []
|
|
128
|
+
for result in self._fetch_run_results(run_id):
|
|
129
|
+
node_id = result["nodeID"]
|
|
130
|
+
for query_result in result["queryResults"]:
|
|
131
|
+
_is_test = is_test(
|
|
132
|
+
query_result,
|
|
133
|
+
node_id,
|
|
134
|
+
test_names,
|
|
135
|
+
column_names,
|
|
136
|
+
)
|
|
137
|
+
if not _is_test:
|
|
138
|
+
continue
|
|
139
|
+
run_result = _run_result_payload(result, query_result)
|
|
140
|
+
run_results.append(run_result)
|
|
141
|
+
return run_results
|
|
142
|
+
|
|
143
|
+
def _run_results_by_env(
|
|
144
|
+
self, environment_id: int, starting_from: str
|
|
145
|
+
) -> SerializedAsset:
|
|
146
|
+
run_results: list[dict] = []
|
|
147
|
+
nodes = self._fetch_env_nodes(environment_id)
|
|
148
|
+
test_names = test_names_per_node(nodes)
|
|
149
|
+
column_names = column_names_per_node(nodes)
|
|
150
|
+
runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
151
|
+
|
|
152
|
+
for run in runs:
|
|
153
|
+
run_id = run["id"]
|
|
154
|
+
_results = self._run_results_by_run(
|
|
155
|
+
run_id, test_names, column_names
|
|
156
|
+
)
|
|
157
|
+
run_results.extend(_results)
|
|
158
|
+
return run_results
|
|
159
|
+
|
|
160
|
+
def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
|
|
161
|
+
run_results: list[dict] = []
|
|
162
|
+
|
|
163
|
+
for environment in self._fetch_environments():
|
|
164
|
+
environment_id = environment["id"]
|
|
165
|
+
_results = self._run_results_by_env(environment_id, starting_from)
|
|
166
|
+
run_results.extend(_results)
|
|
167
|
+
|
|
168
|
+
return run_results
|
|
169
|
+
|
|
170
|
+
def fetch(
|
|
171
|
+
self, asset: CoalesceAsset, starting_from=None
|
|
172
|
+
) -> SerializedAsset:
|
|
173
|
+
"""Extract the given Coalesce Asset"""
|
|
174
|
+
if asset in (CoalesceAsset.NODES, CoalesceQualityAsset.NODES):
|
|
175
|
+
return self._fetch_all_nodes()
|
|
176
|
+
elif asset == CoalesceQualityAsset.RUN_RESULTS:
|
|
177
|
+
return self._fetch_all_run_results(starting_from=starting_from)
|
|
178
|
+
raise AssertionError(
|
|
179
|
+
f"Asset {asset} is not supported by CoalesceClient"
|
|
180
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
3
|
+
|
|
4
|
+
CASTOR_ENV_PREFIX = "CASTOR_COALESCE_"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CoalesceCredentials(BaseSettings):
|
|
8
|
+
"""Class to handle Coalesce rest API permissions"""
|
|
9
|
+
|
|
10
|
+
model_config = SettingsConfigDict(
|
|
11
|
+
env_prefix=CASTOR_ENV_PREFIX,
|
|
12
|
+
extra="ignore",
|
|
13
|
+
populate_by_name=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
host: str
|
|
17
|
+
token: str = Field(repr=False)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def token_payload(self) -> dict[str, str]:
|
|
21
|
+
return {
|
|
22
|
+
"client_secret": self.token,
|
|
23
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CoalesceEndpointFactory:
|
|
5
|
+
"""Provide endpoints to hit Coalesce API"""
|
|
6
|
+
|
|
7
|
+
@classmethod
|
|
8
|
+
def environments(cls, environment_id: Optional[int] = None) -> str:
|
|
9
|
+
"""
|
|
10
|
+
When specified, concatenate environment_id at the end to fetch details.
|
|
11
|
+
Otherwise, list existing environments.
|
|
12
|
+
"""
|
|
13
|
+
base = "api/v1/environments"
|
|
14
|
+
if environment_id:
|
|
15
|
+
return base + f"/{environment_id}"
|
|
16
|
+
return base
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def nodes(cls, environment_id: int, node_id: Optional[str] = None) -> str:
|
|
20
|
+
"""
|
|
21
|
+
When specified, concatenate node_id at the end to fetch details.
|
|
22
|
+
Otherwise, list existing nodes in the given environment.
|
|
23
|
+
"""
|
|
24
|
+
base = f"api/v1/environments/{environment_id}/nodes"
|
|
25
|
+
if node_id:
|
|
26
|
+
return base + f"/{node_id}"
|
|
27
|
+
return base
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def runs(cls) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Get runs (additional filtering can be done in the body)
|
|
33
|
+
"""
|
|
34
|
+
base = "api/v1/runs"
|
|
35
|
+
return base
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def run_results(cls, run_id: str) -> str:
|
|
39
|
+
"""
|
|
40
|
+
get run results (including success/fail for tests), given a run id
|
|
41
|
+
"""
|
|
42
|
+
return f"api/v1/runs/{run_id}/results"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
NodeIDToNamesMapping = dict[str, set[str]]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from ....utils import SerializedAsset
|
|
2
|
+
from .type import NodeIDToNamesMapping
|
|
3
|
+
|
|
4
|
+
_NULL_SUFFIX = ": Null"
|
|
5
|
+
_UNIQUE_SUFFIX = ": Unique"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def is_test(
|
|
9
|
+
query_result: dict,
|
|
10
|
+
node_id: str,
|
|
11
|
+
test_names: NodeIDToNamesMapping,
|
|
12
|
+
column_names: NodeIDToNamesMapping,
|
|
13
|
+
) -> bool:
|
|
14
|
+
"""
|
|
15
|
+
checks whether a query result is a test result or not.
|
|
16
|
+
|
|
17
|
+
all this implementation can soon be replaced by checking whether
|
|
18
|
+
query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
|
|
19
|
+
"""
|
|
20
|
+
# test scoped on the node (table)
|
|
21
|
+
result_name = query_result["name"]
|
|
22
|
+
if result_name in test_names.get(node_id, {}):
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
# test scoped on the column
|
|
26
|
+
if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
|
|
27
|
+
_UNIQUE_SUFFIX
|
|
28
|
+
):
|
|
29
|
+
column_name = result_name.split(":")[0]
|
|
30
|
+
if column_name in column_names.get(node_id, {}):
|
|
31
|
+
return True
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
36
|
+
"""mapping nodeID: set(testName)"""
|
|
37
|
+
mapping: dict[str, set[str]] = {}
|
|
38
|
+
for node in nodes:
|
|
39
|
+
node_id = node["id"]
|
|
40
|
+
tests = node.get("metadata", {}).get("appliedNodeTests", [])
|
|
41
|
+
mapping[node_id] = {test["name"] for test in tests}
|
|
42
|
+
return mapping
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
46
|
+
"""mapping nodeID: set(columnNames)"""
|
|
47
|
+
mapping: dict[str, set[str]] = {}
|
|
48
|
+
for node in nodes:
|
|
49
|
+
node_id = node["id"]
|
|
50
|
+
columns = node.get("metadata", {}).get("columns", [])
|
|
51
|
+
mapping[node_id] = {column["name"] for column in columns}
|
|
52
|
+
return mapping
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from .utils import is_test
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_is_test():
|
|
5
|
+
test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
|
|
6
|
+
column_names = {"some-uuid": {"carthago", "delenda", "est"}}
|
|
7
|
+
|
|
8
|
+
happy_node_test = is_test(
|
|
9
|
+
query_result={"name": "check-mirrors"},
|
|
10
|
+
node_id="some-uuid",
|
|
11
|
+
test_names=test_names,
|
|
12
|
+
column_names=column_names,
|
|
13
|
+
)
|
|
14
|
+
assert happy_node_test is True
|
|
15
|
+
|
|
16
|
+
unknown_node_test = is_test(
|
|
17
|
+
query_result={"name": "check-engine"},
|
|
18
|
+
node_id="some-uuid",
|
|
19
|
+
test_names=test_names,
|
|
20
|
+
column_names=column_names,
|
|
21
|
+
)
|
|
22
|
+
assert unknown_node_test is False
|
|
23
|
+
|
|
24
|
+
happy_column_test_unique = is_test(
|
|
25
|
+
query_result={"name": "carthago: Unique"},
|
|
26
|
+
node_id="some-uuid",
|
|
27
|
+
test_names=test_names,
|
|
28
|
+
column_names=column_names,
|
|
29
|
+
)
|
|
30
|
+
assert happy_column_test_unique is True
|
|
31
|
+
|
|
32
|
+
happy_column_test_null = is_test(
|
|
33
|
+
query_result={"name": "carthago: Null"},
|
|
34
|
+
node_id="some-uuid",
|
|
35
|
+
test_names=test_names,
|
|
36
|
+
column_names=column_names,
|
|
37
|
+
)
|
|
38
|
+
assert happy_column_test_null is True
|
|
39
|
+
|
|
40
|
+
unknown_column_test = is_test(
|
|
41
|
+
query_result={"name": "rome: Unique"},
|
|
42
|
+
node_id="some-uuid",
|
|
43
|
+
test_names=test_names,
|
|
44
|
+
column_names=column_names,
|
|
45
|
+
)
|
|
46
|
+
assert unknown_column_test is False
|
|
47
|
+
|
|
48
|
+
unknown_node_id_test = is_test(
|
|
49
|
+
query_result={"name": "whatever: Unique"},
|
|
50
|
+
node_id="unknown-uuid",
|
|
51
|
+
test_names=test_names,
|
|
52
|
+
column_names=column_names,
|
|
53
|
+
)
|
|
54
|
+
assert unknown_node_id_test is False
|
|
@@ -92,6 +92,34 @@ def gql_query_scroll(
|
|
|
92
92
|
break
|
|
93
93
|
|
|
94
94
|
|
|
95
|
+
def _deduplicate(result_pages: Iterator[SerializedAsset]) -> SerializedAsset:
|
|
96
|
+
"""
|
|
97
|
+
Sometimes assets are duplicated, which triggers UniqueViolation errors
|
|
98
|
+
during store_all down the line.
|
|
99
|
+
|
|
100
|
+
We suspect the offset pagination to be the root cause, because we had no
|
|
101
|
+
problem until recently, when we switched from cursor pagination to offset
|
|
102
|
+
pagination (for performance reasons)
|
|
103
|
+
https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_examples.html#pagination
|
|
104
|
+
|
|
105
|
+
This is a straightforward solution to remove these duplicates directly at
|
|
106
|
+
extraction.
|
|
107
|
+
We don't show warnings because duplicates are expected, and we keep only
|
|
108
|
+
the first occurrence since those duplicates are probably identical.
|
|
109
|
+
"""
|
|
110
|
+
deduplicated: SerializedAsset = []
|
|
111
|
+
seen_ids: set[str] = set()
|
|
112
|
+
for page in result_pages:
|
|
113
|
+
for asset in page:
|
|
114
|
+
asset_id = asset["id"]
|
|
115
|
+
if asset_id in seen_ids:
|
|
116
|
+
# skip duplicate
|
|
117
|
+
continue
|
|
118
|
+
deduplicated.append(asset)
|
|
119
|
+
seen_ids.add(asset_id)
|
|
120
|
+
return deduplicated
|
|
121
|
+
|
|
122
|
+
|
|
95
123
|
class TableauClientMetadataApi:
|
|
96
124
|
"""
|
|
97
125
|
Calls the MetadataAPI, using graphQL
|
|
@@ -118,7 +146,7 @@ class TableauClientMetadataApi:
|
|
|
118
146
|
fields=fields,
|
|
119
147
|
page_size=page_size,
|
|
120
148
|
)
|
|
121
|
-
return
|
|
149
|
+
return _deduplicate(result_pages)
|
|
122
150
|
|
|
123
151
|
def _page_size(self, asset: TableauAsset) -> int:
|
|
124
152
|
return (
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from .client_metadata_api import _deduplicate
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test__deduplicate():
|
|
5
|
+
result_pages = iter(
|
|
6
|
+
[
|
|
7
|
+
[
|
|
8
|
+
{"id": 1, "name": "workbook_1"},
|
|
9
|
+
{"id": 2, "name": "workbook_2"},
|
|
10
|
+
],
|
|
11
|
+
[
|
|
12
|
+
{"id": 1, "name": "workbook_1"},
|
|
13
|
+
{"id": 3, "name": "workbook_3"},
|
|
14
|
+
{"id": 4, "name": "workbook_4"},
|
|
15
|
+
],
|
|
16
|
+
[
|
|
17
|
+
{"id": 4, "name": "workbook_4"},
|
|
18
|
+
{"id": 5, "name": "workbook_5"},
|
|
19
|
+
{"id": 5, "name": "workbook_5"},
|
|
20
|
+
{"id": 5, "name": "workbook_5"},
|
|
21
|
+
],
|
|
22
|
+
[
|
|
23
|
+
{"id": 1, "name": "workbook_1"},
|
|
24
|
+
{"id": 3, "name": "workbook_3"},
|
|
25
|
+
],
|
|
26
|
+
]
|
|
27
|
+
)
|
|
28
|
+
deduplicated = _deduplicate(result_pages)
|
|
29
|
+
assert len(deduplicated) == 5
|
|
30
|
+
deduplicated_keys = {item["id"] for item in deduplicated}
|
|
31
|
+
assert deduplicated_keys == {1, 2, 3, 4, 5}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.9
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -210,6 +210,14 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
210
210
|
|
|
211
211
|
# Changelog
|
|
212
212
|
|
|
213
|
+
## 0.24.9 - 2025-04-16
|
|
214
|
+
|
|
215
|
+
* Introduce API client for **Coalesce**
|
|
216
|
+
|
|
217
|
+
## 0.24.8 - 2025-04-16
|
|
218
|
+
|
|
219
|
+
* Tableau - remove duplicates introduced by `offset` pagination
|
|
220
|
+
|
|
213
221
|
## 0.24.7 - 2025-04-07
|
|
214
222
|
|
|
215
223
|
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=UKD2ldg9s00KOoVfWjnyB_m50R0fnpPLbpmkZHKoOQM,16821
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -68,6 +68,16 @@ castor_extractor/quality/soda/client/client.py,sha256=Gd3GaachWx5ZEH_nqgTxiBIbUq
|
|
|
68
68
|
castor_extractor/quality/soda/client/credentials.py,sha256=R1g7nHpJlQ5hBjtUFN06QjjWAouQtb_V-je7cAXXIA4,514
|
|
69
69
|
castor_extractor/quality/soda/client/endpoints.py,sha256=x3B-XlnDF8NJMuk-81N72_6HA-YZEzA895khLyj0j54,228
|
|
70
70
|
castor_extractor/quality/soda/client/pagination.py,sha256=_7caQUNDNPGRufnZNrfYBN3oVXsk99_2wYr67I0ehAs,530
|
|
71
|
+
castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
|
+
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
73
|
+
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
74
|
+
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
75
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=yrPzIk-6VN4MDHwti3Yxy3PCfHmxE6znjuehl_-dYTg,6151
|
|
76
|
+
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
77
|
+
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
78
|
+
castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
|
|
79
|
+
castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
|
|
80
|
+
castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
|
|
71
81
|
castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
|
|
72
82
|
castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
|
|
73
83
|
castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
|
|
@@ -266,7 +276,8 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
|
|
|
266
276
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
267
277
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
268
278
|
castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
|
|
269
|
-
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=
|
|
279
|
+
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fARj7xroHfMd4nlo5CJK5jPok5UsHznOQpIpNaECVHw,5274
|
|
280
|
+
castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=lbsq5mLtqeNc5EsmCw9Mvl8qcvMsTcJTepHwy1ToyvA,969
|
|
270
281
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
271
282
|
castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
|
|
272
283
|
castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
|
|
@@ -317,7 +328,7 @@ castor_extractor/warehouse/databricks/credentials.py,sha256=ExtVcl2NpMXTx1Lg8vHQ
|
|
|
317
328
|
castor_extractor/warehouse/databricks/endpoints.py,sha256=qPoL9CtPFJdwVuW9rJ37nmeMd-nChOBouEVYb4SlaUE,670
|
|
318
329
|
castor_extractor/warehouse/databricks/enums.py,sha256=3T6BbVvbWvfWkD23krsYT1x0kKh1qRzNPl6WpcXe300,274
|
|
319
330
|
castor_extractor/warehouse/databricks/extract.py,sha256=Z4VTEIf0QMiua0QGAlJdQ86kxmGAXekQ304aCKme6IY,7358
|
|
320
|
-
castor_extractor/warehouse/databricks/format.py,sha256=
|
|
331
|
+
castor_extractor/warehouse/databricks/format.py,sha256=S3BOcwJubc1pyKr-li26uftUUfsjfrm5Qf4LqmElXVk,6736
|
|
321
332
|
castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
|
|
322
333
|
castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
|
|
323
334
|
castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
|
|
@@ -405,8 +416,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
405
416
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
406
417
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
407
418
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
408
|
-
castor_extractor-0.24.
|
|
409
|
-
castor_extractor-0.24.
|
|
410
|
-
castor_extractor-0.24.
|
|
411
|
-
castor_extractor-0.24.
|
|
412
|
-
castor_extractor-0.24.
|
|
419
|
+
castor_extractor-0.24.9.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
420
|
+
castor_extractor-0.24.9.dist-info/METADATA,sha256=JDqbNB2dwsOO7_5PKUWP0r4FL217fi7OIEbVaOPljDQ,23985
|
|
421
|
+
castor_extractor-0.24.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
422
|
+
castor_extractor-0.24.9.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
|
|
423
|
+
castor_extractor-0.24.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|