castor-extractor 0.24.4__py3-none-any.whl → 0.24.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +20 -0
- castor_extractor/transformation/__init__.py +0 -0
- castor_extractor/transformation/coalesce/__init__.py +2 -0
- castor_extractor/transformation/coalesce/assets.py +18 -0
- castor_extractor/transformation/coalesce/client/__init__.py +2 -0
- castor_extractor/transformation/coalesce/client/client.py +180 -0
- castor_extractor/transformation/coalesce/client/credentials.py +23 -0
- castor_extractor/transformation/coalesce/client/endpoint.py +42 -0
- castor_extractor/transformation/coalesce/client/type.py +1 -0
- castor_extractor/transformation/coalesce/client/utils.py +52 -0
- castor_extractor/transformation/coalesce/client/utils_test.py +54 -0
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/batch.py +16 -0
- castor_extractor/utils/batch_test.py +27 -0
- castor_extractor/visualization/domo/client/client.py +10 -4
- castor_extractor/visualization/tableau/client/client_metadata_api.py +52 -19
- castor_extractor/visualization/tableau/client/client_metadata_api_test.py +31 -0
- castor_extractor/visualization/tableau/client/gql_queries.py +1 -1
- castor_extractor/warehouse/databricks/format.py +1 -1
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/METADATA +23 -3
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/RECORD +24 -11
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.9 - 2025-04-16
|
|
4
|
+
|
|
5
|
+
* Introduce API client for **Coalesce**
|
|
6
|
+
|
|
7
|
+
## 0.24.8 - 2025-04-16
|
|
8
|
+
|
|
9
|
+
* Tableau - remove duplicates introduced by `offset` pagination
|
|
10
|
+
|
|
11
|
+
## 0.24.7 - 2025-04-07
|
|
12
|
+
|
|
13
|
+
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
14
|
+
|
|
15
|
+
## 0.24.6 - 2025-04-03
|
|
16
|
+
|
|
17
|
+
* Domo - extract cards metadata by batch to prevent from hitting URL max length
|
|
18
|
+
|
|
19
|
+
## 0.24.5 - 2025-04-02
|
|
20
|
+
|
|
21
|
+
* bump dependencies: google-cloud-storage
|
|
22
|
+
|
|
3
23
|
## 0.24.4 - 2025-03-19
|
|
4
24
|
|
|
5
25
|
* Snowflake:
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from ...types import ExternalAsset
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CoalesceAsset(ExternalAsset):
|
|
5
|
+
"""Coalesce assets"""
|
|
6
|
+
|
|
7
|
+
NODES = "nodes"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CoalesceQualityAsset(ExternalAsset):
|
|
11
|
+
"""
|
|
12
|
+
Coalesce Quality Assets
|
|
13
|
+
Remark: having a dedicated Enum for Quality simplifies the process of
|
|
14
|
+
searching pushed files
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
NODES = "nodes"
|
|
18
|
+
RUN_RESULTS = "run_results"
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from http import HTTPStatus
|
|
2
|
+
from typing import Iterator, Optional
|
|
3
|
+
|
|
4
|
+
from ....utils import APIClient, BearerAuth, RequestSafeMode, SerializedAsset
|
|
5
|
+
from ..assets import CoalesceAsset, CoalesceQualityAsset
|
|
6
|
+
from .credentials import CoalesceCredentials
|
|
7
|
+
from .endpoint import (
|
|
8
|
+
CoalesceEndpointFactory,
|
|
9
|
+
)
|
|
10
|
+
from .type import NodeIDToNamesMapping
|
|
11
|
+
from .utils import column_names_per_node, is_test, test_names_per_node
|
|
12
|
+
|
|
13
|
+
_LIMIT_MAX = 1_000
|
|
14
|
+
_MAX_ERRORS = 50
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
18
|
+
return {
|
|
19
|
+
"node_id": result["nodeID"],
|
|
20
|
+
"node_name": result["name"],
|
|
21
|
+
"test_name": query_result["name"],
|
|
22
|
+
"start_time": query_result["startTime"],
|
|
23
|
+
"end_time": query_result["endTime"],
|
|
24
|
+
"status": query_result["status"],
|
|
25
|
+
"success": query_result["success"],
|
|
26
|
+
"isRunning": query_result["isRunning"],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
COALESCE_SAFE_MODE = RequestSafeMode(
|
|
31
|
+
status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
|
|
32
|
+
max_errors=_MAX_ERRORS,
|
|
33
|
+
)
|
|
34
|
+
COALESCE_TIMEOUT_SECONDS = 90
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CoalesceBearerAuth(BearerAuth):
|
|
38
|
+
"""Bearer Authentication for Coalesce"""
|
|
39
|
+
|
|
40
|
+
def fetch_token(self) -> Optional[str]:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def __init__(self, token: str):
|
|
44
|
+
self._token = token
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CoalesceClient(APIClient):
|
|
48
|
+
"""REST API client to extract data from Coalesce"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
credentials: CoalesceCredentials,
|
|
53
|
+
):
|
|
54
|
+
auth = CoalesceBearerAuth(token=credentials.token)
|
|
55
|
+
super().__init__(
|
|
56
|
+
host=credentials.host,
|
|
57
|
+
auth=auth,
|
|
58
|
+
safe_mode=COALESCE_SAFE_MODE,
|
|
59
|
+
timeout=COALESCE_TIMEOUT_SECONDS,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def _fetch_environments(self) -> Iterator[dict]:
|
|
63
|
+
endpoint = CoalesceEndpointFactory.environments()
|
|
64
|
+
result = self._get(endpoint=endpoint)
|
|
65
|
+
return result["data"]
|
|
66
|
+
|
|
67
|
+
def _node_details(self, environment_id: int, node_id: str) -> dict:
|
|
68
|
+
endpoint = CoalesceEndpointFactory.nodes(
|
|
69
|
+
environment_id=environment_id, node_id=node_id
|
|
70
|
+
)
|
|
71
|
+
return self._get(endpoint=endpoint)
|
|
72
|
+
|
|
73
|
+
def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
|
|
74
|
+
endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
|
|
75
|
+
result = self._get(endpoint=endpoint)
|
|
76
|
+
nodes: list[dict] = []
|
|
77
|
+
for node in result["data"]:
|
|
78
|
+
details = self._node_details(environment_id, node["id"])
|
|
79
|
+
nodes.append({**node, **details})
|
|
80
|
+
return nodes
|
|
81
|
+
|
|
82
|
+
def _fetch_all_nodes(self) -> SerializedAsset:
|
|
83
|
+
nodes: list[dict] = []
|
|
84
|
+
for environment in self._fetch_environments():
|
|
85
|
+
environment_id = environment["id"]
|
|
86
|
+
nodes.extend(self._fetch_env_nodes(environment_id))
|
|
87
|
+
return nodes
|
|
88
|
+
|
|
89
|
+
def _fetch_runs(self, starting_from: str) -> SerializedAsset:
|
|
90
|
+
"""
|
|
91
|
+
fetch runs, per environment;
|
|
92
|
+
we break per environment to lower the chance of exceeding the 1k limit
|
|
93
|
+
"""
|
|
94
|
+
runs: list[dict] = []
|
|
95
|
+
for environment in self._fetch_environments():
|
|
96
|
+
environment_id = environment["id"]
|
|
97
|
+
runs.extend(
|
|
98
|
+
self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
99
|
+
)
|
|
100
|
+
return runs
|
|
101
|
+
|
|
102
|
+
def _fetch_recent_runs_per_env(
|
|
103
|
+
self, environment_id: int, starting_from: str
|
|
104
|
+
) -> SerializedAsset:
|
|
105
|
+
endpoint = CoalesceEndpointFactory.runs()
|
|
106
|
+
params = {
|
|
107
|
+
"environmentID": environment_id,
|
|
108
|
+
"limit": _LIMIT_MAX,
|
|
109
|
+
"orderBy": "runEndTime",
|
|
110
|
+
"orderByDirection": "asc",
|
|
111
|
+
"startingFrom": starting_from,
|
|
112
|
+
}
|
|
113
|
+
result = self._get(endpoint=endpoint, params=params)
|
|
114
|
+
return result["data"]
|
|
115
|
+
|
|
116
|
+
def _fetch_run_results(self, run_id: str) -> SerializedAsset:
|
|
117
|
+
endpoint = CoalesceEndpointFactory.run_results(run_id)
|
|
118
|
+
result = self._get(endpoint=endpoint)
|
|
119
|
+
return result["data"]
|
|
120
|
+
|
|
121
|
+
def _run_results_by_run(
|
|
122
|
+
self,
|
|
123
|
+
run_id: str,
|
|
124
|
+
test_names: NodeIDToNamesMapping,
|
|
125
|
+
column_names: NodeIDToNamesMapping,
|
|
126
|
+
) -> SerializedAsset:
|
|
127
|
+
run_results: list[dict] = []
|
|
128
|
+
for result in self._fetch_run_results(run_id):
|
|
129
|
+
node_id = result["nodeID"]
|
|
130
|
+
for query_result in result["queryResults"]:
|
|
131
|
+
_is_test = is_test(
|
|
132
|
+
query_result,
|
|
133
|
+
node_id,
|
|
134
|
+
test_names,
|
|
135
|
+
column_names,
|
|
136
|
+
)
|
|
137
|
+
if not _is_test:
|
|
138
|
+
continue
|
|
139
|
+
run_result = _run_result_payload(result, query_result)
|
|
140
|
+
run_results.append(run_result)
|
|
141
|
+
return run_results
|
|
142
|
+
|
|
143
|
+
def _run_results_by_env(
|
|
144
|
+
self, environment_id: int, starting_from: str
|
|
145
|
+
) -> SerializedAsset:
|
|
146
|
+
run_results: list[dict] = []
|
|
147
|
+
nodes = self._fetch_env_nodes(environment_id)
|
|
148
|
+
test_names = test_names_per_node(nodes)
|
|
149
|
+
column_names = column_names_per_node(nodes)
|
|
150
|
+
runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
151
|
+
|
|
152
|
+
for run in runs:
|
|
153
|
+
run_id = run["id"]
|
|
154
|
+
_results = self._run_results_by_run(
|
|
155
|
+
run_id, test_names, column_names
|
|
156
|
+
)
|
|
157
|
+
run_results.extend(_results)
|
|
158
|
+
return run_results
|
|
159
|
+
|
|
160
|
+
def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
|
|
161
|
+
run_results: list[dict] = []
|
|
162
|
+
|
|
163
|
+
for environment in self._fetch_environments():
|
|
164
|
+
environment_id = environment["id"]
|
|
165
|
+
_results = self._run_results_by_env(environment_id, starting_from)
|
|
166
|
+
run_results.extend(_results)
|
|
167
|
+
|
|
168
|
+
return run_results
|
|
169
|
+
|
|
170
|
+
def fetch(
|
|
171
|
+
self, asset: CoalesceAsset, starting_from=None
|
|
172
|
+
) -> SerializedAsset:
|
|
173
|
+
"""Extract the given Coalesce Asset"""
|
|
174
|
+
if asset in (CoalesceAsset.NODES, CoalesceQualityAsset.NODES):
|
|
175
|
+
return self._fetch_all_nodes()
|
|
176
|
+
elif asset == CoalesceQualityAsset.RUN_RESULTS:
|
|
177
|
+
return self._fetch_all_run_results(starting_from=starting_from)
|
|
178
|
+
raise AssertionError(
|
|
179
|
+
f"Asset {asset} is not supported by CoalesceClient"
|
|
180
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from pydantic import Field
|
|
2
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
3
|
+
|
|
4
|
+
CASTOR_ENV_PREFIX = "CASTOR_COALESCE_"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CoalesceCredentials(BaseSettings):
|
|
8
|
+
"""Class to handle Coalesce rest API permissions"""
|
|
9
|
+
|
|
10
|
+
model_config = SettingsConfigDict(
|
|
11
|
+
env_prefix=CASTOR_ENV_PREFIX,
|
|
12
|
+
extra="ignore",
|
|
13
|
+
populate_by_name=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
host: str
|
|
17
|
+
token: str = Field(repr=False)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def token_payload(self) -> dict[str, str]:
|
|
21
|
+
return {
|
|
22
|
+
"client_secret": self.token,
|
|
23
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CoalesceEndpointFactory:
|
|
5
|
+
"""Provide endpoints to hit Coalesce API"""
|
|
6
|
+
|
|
7
|
+
@classmethod
|
|
8
|
+
def environments(cls, environment_id: Optional[int] = None) -> str:
|
|
9
|
+
"""
|
|
10
|
+
When specified, concatenate environment_id at the end to fetch details.
|
|
11
|
+
Otherwise, list existing environments.
|
|
12
|
+
"""
|
|
13
|
+
base = "api/v1/environments"
|
|
14
|
+
if environment_id:
|
|
15
|
+
return base + f"/{environment_id}"
|
|
16
|
+
return base
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def nodes(cls, environment_id: int, node_id: Optional[str] = None) -> str:
|
|
20
|
+
"""
|
|
21
|
+
When specified, concatenate node_id at the end to fetch details.
|
|
22
|
+
Otherwise, list existing nodes in the given environment.
|
|
23
|
+
"""
|
|
24
|
+
base = f"api/v1/environments/{environment_id}/nodes"
|
|
25
|
+
if node_id:
|
|
26
|
+
return base + f"/{node_id}"
|
|
27
|
+
return base
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def runs(cls) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Get runs (additional filtering can be done in the body)
|
|
33
|
+
"""
|
|
34
|
+
base = "api/v1/runs"
|
|
35
|
+
return base
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def run_results(cls, run_id: str) -> str:
|
|
39
|
+
"""
|
|
40
|
+
get run results (including success/fail for tests), given a run id
|
|
41
|
+
"""
|
|
42
|
+
return f"api/v1/runs/{run_id}/results"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
NodeIDToNamesMapping = dict[str, set[str]]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from ....utils import SerializedAsset
|
|
2
|
+
from .type import NodeIDToNamesMapping
|
|
3
|
+
|
|
4
|
+
_NULL_SUFFIX = ": Null"
|
|
5
|
+
_UNIQUE_SUFFIX = ": Unique"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def is_test(
|
|
9
|
+
query_result: dict,
|
|
10
|
+
node_id: str,
|
|
11
|
+
test_names: NodeIDToNamesMapping,
|
|
12
|
+
column_names: NodeIDToNamesMapping,
|
|
13
|
+
) -> bool:
|
|
14
|
+
"""
|
|
15
|
+
checks whether a query result is a test result or not.
|
|
16
|
+
|
|
17
|
+
all this implementation can soon be replaced by checking whether
|
|
18
|
+
query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
|
|
19
|
+
"""
|
|
20
|
+
# test scoped on the node (table)
|
|
21
|
+
result_name = query_result["name"]
|
|
22
|
+
if result_name in test_names.get(node_id, {}):
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
# test scoped on the column
|
|
26
|
+
if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
|
|
27
|
+
_UNIQUE_SUFFIX
|
|
28
|
+
):
|
|
29
|
+
column_name = result_name.split(":")[0]
|
|
30
|
+
if column_name in column_names.get(node_id, {}):
|
|
31
|
+
return True
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
36
|
+
"""mapping nodeID: set(testName)"""
|
|
37
|
+
mapping: dict[str, set[str]] = {}
|
|
38
|
+
for node in nodes:
|
|
39
|
+
node_id = node["id"]
|
|
40
|
+
tests = node.get("metadata", {}).get("appliedNodeTests", [])
|
|
41
|
+
mapping[node_id] = {test["name"] for test in tests}
|
|
42
|
+
return mapping
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
46
|
+
"""mapping nodeID: set(columnNames)"""
|
|
47
|
+
mapping: dict[str, set[str]] = {}
|
|
48
|
+
for node in nodes:
|
|
49
|
+
node_id = node["id"]
|
|
50
|
+
columns = node.get("metadata", {}).get("columns", [])
|
|
51
|
+
mapping[node_id] = {column["name"] for column in columns}
|
|
52
|
+
return mapping
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from .utils import is_test
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_is_test():
|
|
5
|
+
test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
|
|
6
|
+
column_names = {"some-uuid": {"carthago", "delenda", "est"}}
|
|
7
|
+
|
|
8
|
+
happy_node_test = is_test(
|
|
9
|
+
query_result={"name": "check-mirrors"},
|
|
10
|
+
node_id="some-uuid",
|
|
11
|
+
test_names=test_names,
|
|
12
|
+
column_names=column_names,
|
|
13
|
+
)
|
|
14
|
+
assert happy_node_test is True
|
|
15
|
+
|
|
16
|
+
unknown_node_test = is_test(
|
|
17
|
+
query_result={"name": "check-engine"},
|
|
18
|
+
node_id="some-uuid",
|
|
19
|
+
test_names=test_names,
|
|
20
|
+
column_names=column_names,
|
|
21
|
+
)
|
|
22
|
+
assert unknown_node_test is False
|
|
23
|
+
|
|
24
|
+
happy_column_test_unique = is_test(
|
|
25
|
+
query_result={"name": "carthago: Unique"},
|
|
26
|
+
node_id="some-uuid",
|
|
27
|
+
test_names=test_names,
|
|
28
|
+
column_names=column_names,
|
|
29
|
+
)
|
|
30
|
+
assert happy_column_test_unique is True
|
|
31
|
+
|
|
32
|
+
happy_column_test_null = is_test(
|
|
33
|
+
query_result={"name": "carthago: Null"},
|
|
34
|
+
node_id="some-uuid",
|
|
35
|
+
test_names=test_names,
|
|
36
|
+
column_names=column_names,
|
|
37
|
+
)
|
|
38
|
+
assert happy_column_test_null is True
|
|
39
|
+
|
|
40
|
+
unknown_column_test = is_test(
|
|
41
|
+
query_result={"name": "rome: Unique"},
|
|
42
|
+
node_id="some-uuid",
|
|
43
|
+
test_names=test_names,
|
|
44
|
+
column_names=column_names,
|
|
45
|
+
)
|
|
46
|
+
assert unknown_column_test is False
|
|
47
|
+
|
|
48
|
+
unknown_node_id_test = is_test(
|
|
49
|
+
query_result={"name": "whatever: Unique"},
|
|
50
|
+
node_id="unknown-uuid",
|
|
51
|
+
test_names=test_names,
|
|
52
|
+
column_names=column_names,
|
|
53
|
+
)
|
|
54
|
+
assert unknown_node_id_test is False
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from typing import Iterator, List, TypeVar
|
|
2
|
+
|
|
3
|
+
T = TypeVar("T")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def batch_of_length(
|
|
7
|
+
elements: List[T],
|
|
8
|
+
batch_size: int,
|
|
9
|
+
) -> Iterator[List[T]]:
|
|
10
|
+
"""
|
|
11
|
+
Split the given elements into smaller chunks
|
|
12
|
+
"""
|
|
13
|
+
assert batch_size > 1, "batch size must be greater or equal to 1"
|
|
14
|
+
element_count = len(elements)
|
|
15
|
+
for index in range(0, element_count, batch_size):
|
|
16
|
+
yield elements[index : min((index + batch_size), element_count)]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .batch import batch_of_length
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_batch_of_length():
|
|
7
|
+
elements = ["a", "b", "c", "d", "e", "f", "g", "h"]
|
|
8
|
+
result = list(batch_of_length(elements, 3))
|
|
9
|
+
assert result == [
|
|
10
|
+
["a", "b", "c"],
|
|
11
|
+
["d", "e", "f"],
|
|
12
|
+
["g", "h"],
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
result = list(batch_of_length(elements, 1000))
|
|
16
|
+
assert result == [
|
|
17
|
+
elements,
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
result = list(batch_of_length(elements, 7))
|
|
21
|
+
assert result == [
|
|
22
|
+
["a", "b", "c", "d", "e", "f", "g"],
|
|
23
|
+
["h"],
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
with pytest.raises(AssertionError):
|
|
27
|
+
list(batch_of_length(elements, -12))
|
|
@@ -9,6 +9,7 @@ import requests
|
|
|
9
9
|
from ....utils import (
|
|
10
10
|
RequestSafeMode,
|
|
11
11
|
at_midnight,
|
|
12
|
+
batch_of_length,
|
|
12
13
|
current_date,
|
|
13
14
|
empty_iterator,
|
|
14
15
|
handle_response,
|
|
@@ -48,6 +49,8 @@ _RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
|
|
|
48
49
|
|
|
49
50
|
_PARENT_FOLDER = "/Dashboards"
|
|
50
51
|
|
|
52
|
+
_CARDS_BATCH_SIZE = 100
|
|
53
|
+
|
|
51
54
|
logger = logging.getLogger(__name__)
|
|
52
55
|
|
|
53
56
|
|
|
@@ -156,16 +159,19 @@ class DomoClient:
|
|
|
156
159
|
|
|
157
160
|
return all_results
|
|
158
161
|
|
|
162
|
+
def _cards_metadata(self, card_ids: list[int]) -> Iterator[dict]:
|
|
163
|
+
# batch to avoid hitting the URL max length
|
|
164
|
+
for batch_card_ids in batch_of_length(card_ids, _CARDS_BATCH_SIZE):
|
|
165
|
+
endpoint = self._endpoint_factory.cards_metadata(batch_card_ids)
|
|
166
|
+
yield from self._get_element(endpoint)
|
|
167
|
+
|
|
159
168
|
def _datasources(self, card_ids: list[int]) -> RawData:
|
|
160
169
|
"""Yields all distinct datasources associated to the given cards"""
|
|
161
170
|
if not card_ids:
|
|
162
171
|
return empty_iterator()
|
|
163
172
|
|
|
164
|
-
endpoint = self._endpoint_factory.cards_metadata(card_ids)
|
|
165
|
-
cards_metadata = self._get_element(endpoint)
|
|
166
|
-
|
|
167
173
|
processed: set[str] = set()
|
|
168
|
-
for card in
|
|
174
|
+
for card in self._cards_metadata(card_ids):
|
|
169
175
|
for datasource in card["datasources"]:
|
|
170
176
|
id_ = datasource["dataSourceId"]
|
|
171
177
|
if id_ in processed:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
@@ -9,15 +10,14 @@ from ..constants import DEFAULT_PAGE_SIZE
|
|
|
9
10
|
from .errors import TableauApiError, TableauApiTimeout
|
|
10
11
|
from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
12
15
|
# increase the value when extraction is too slow
|
|
13
16
|
# decrease the value when timeouts arise
|
|
14
17
|
_CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
|
|
15
|
-
# for some clients, extraction of columns tend to hit the node limit
|
|
16
|
-
# https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
|
|
17
|
-
# the workaround is to reduce pagination
|
|
18
|
-
TableauAsset.COLUMN: 50,
|
|
19
18
|
# fields are light but volumes are bigger
|
|
20
19
|
TableauAsset.FIELD: 1000,
|
|
20
|
+
# tables are sometimes heavy
|
|
21
21
|
TableauAsset.TABLE: 50,
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -51,8 +51,9 @@ def _check_errors(answer: dict) -> None:
|
|
|
51
51
|
|
|
52
52
|
def gql_query_scroll(
|
|
53
53
|
server,
|
|
54
|
-
query: str,
|
|
55
54
|
resource: str,
|
|
55
|
+
fields: str,
|
|
56
|
+
page_size: int,
|
|
56
57
|
) -> Iterator[SerializedAsset]:
|
|
57
58
|
"""
|
|
58
59
|
Iterate over GQL query results, handling pagination and cursor
|
|
@@ -67,26 +68,58 @@ def gql_query_scroll(
|
|
|
67
68
|
max_retries=_RETRY_COUNT,
|
|
68
69
|
base_ms=_RETRY_BASE_MS,
|
|
69
70
|
)
|
|
70
|
-
def _call(
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
def _call(first: int, offset: int) -> dict:
|
|
72
|
+
query = QUERY_TEMPLATE.format(
|
|
73
|
+
resource=resource,
|
|
74
|
+
fields=fields,
|
|
75
|
+
first=first,
|
|
76
|
+
offset=offset,
|
|
77
|
+
)
|
|
78
|
+
answer = server.metadata.query(query)
|
|
75
79
|
_check_errors(answer)
|
|
76
80
|
return answer["data"][f"{resource}Connection"]
|
|
77
81
|
|
|
78
|
-
|
|
82
|
+
current_offset = 0
|
|
79
83
|
while True:
|
|
80
|
-
payload = _call(
|
|
84
|
+
payload = _call(first=page_size, offset=current_offset)
|
|
81
85
|
yield payload["nodes"]
|
|
82
86
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
+
current_offset += len(payload["nodes"])
|
|
88
|
+
total = payload["totalCount"]
|
|
89
|
+
logger.info(f"Extracted {current_offset}/{total} {resource}")
|
|
90
|
+
|
|
91
|
+
if not payload["pageInfo"]["hasNextPage"]:
|
|
87
92
|
break
|
|
88
93
|
|
|
89
94
|
|
|
95
|
+
def _deduplicate(result_pages: Iterator[SerializedAsset]) -> SerializedAsset:
|
|
96
|
+
"""
|
|
97
|
+
Sometimes assets are duplicated, which triggers UniqueViolation errors
|
|
98
|
+
during store_all down the line.
|
|
99
|
+
|
|
100
|
+
We suspect the offset pagination to be the root cause, because we had no
|
|
101
|
+
problem until recently, when we switched from cursor pagination to offset
|
|
102
|
+
pagination (for performance reasons)
|
|
103
|
+
https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_examples.html#pagination
|
|
104
|
+
|
|
105
|
+
This is a straightforward solution to remove these duplicates directly at
|
|
106
|
+
extraction.
|
|
107
|
+
We don't show warnings because duplicates are expected, and we keep only
|
|
108
|
+
the first occurrence since those duplicates are probably identical.
|
|
109
|
+
"""
|
|
110
|
+
deduplicated: SerializedAsset = []
|
|
111
|
+
seen_ids: set[str] = set()
|
|
112
|
+
for page in result_pages:
|
|
113
|
+
for asset in page:
|
|
114
|
+
asset_id = asset["id"]
|
|
115
|
+
if asset_id in seen_ids:
|
|
116
|
+
# skip duplicate
|
|
117
|
+
continue
|
|
118
|
+
deduplicated.append(asset)
|
|
119
|
+
seen_ids.add(asset_id)
|
|
120
|
+
return deduplicated
|
|
121
|
+
|
|
122
|
+
|
|
90
123
|
class TableauClientMetadataApi:
|
|
91
124
|
"""
|
|
92
125
|
Calls the MetadataAPI, using graphQL
|
|
@@ -107,13 +140,13 @@ class TableauClientMetadataApi:
|
|
|
107
140
|
fields: str,
|
|
108
141
|
page_size: int = DEFAULT_PAGE_SIZE,
|
|
109
142
|
) -> SerializedAsset:
|
|
110
|
-
|
|
143
|
+
result_pages = gql_query_scroll(
|
|
144
|
+
self._server,
|
|
111
145
|
resource=resource,
|
|
112
146
|
fields=fields,
|
|
113
147
|
page_size=page_size,
|
|
114
148
|
)
|
|
115
|
-
|
|
116
|
-
return [asset for page in result_pages for asset in page]
|
|
149
|
+
return _deduplicate(result_pages)
|
|
117
150
|
|
|
118
151
|
def _page_size(self, asset: TableauAsset) -> int:
|
|
119
152
|
return (
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from .client_metadata_api import _deduplicate
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test__deduplicate():
|
|
5
|
+
result_pages = iter(
|
|
6
|
+
[
|
|
7
|
+
[
|
|
8
|
+
{"id": 1, "name": "workbook_1"},
|
|
9
|
+
{"id": 2, "name": "workbook_2"},
|
|
10
|
+
],
|
|
11
|
+
[
|
|
12
|
+
{"id": 1, "name": "workbook_1"},
|
|
13
|
+
{"id": 3, "name": "workbook_3"},
|
|
14
|
+
{"id": 4, "name": "workbook_4"},
|
|
15
|
+
],
|
|
16
|
+
[
|
|
17
|
+
{"id": 4, "name": "workbook_4"},
|
|
18
|
+
{"id": 5, "name": "workbook_5"},
|
|
19
|
+
{"id": 5, "name": "workbook_5"},
|
|
20
|
+
{"id": 5, "name": "workbook_5"},
|
|
21
|
+
],
|
|
22
|
+
[
|
|
23
|
+
{"id": 1, "name": "workbook_1"},
|
|
24
|
+
{"id": 3, "name": "workbook_3"},
|
|
25
|
+
],
|
|
26
|
+
]
|
|
27
|
+
)
|
|
28
|
+
deduplicated = _deduplicate(result_pages)
|
|
29
|
+
assert len(deduplicated) == 5
|
|
30
|
+
deduplicated_keys = {item["id"] for item in deduplicated}
|
|
31
|
+
assert deduplicated_keys == {1, 2, 3, 4, 5}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.9
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -35,7 +35,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
|
35
35
|
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
36
36
|
Requires-Dist: google-auth (>=2,<3)
|
|
37
37
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
38
|
-
Requires-Dist: google-cloud-storage (>=
|
|
38
|
+
Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
|
|
39
39
|
Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
|
|
40
40
|
Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
|
|
41
41
|
Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
|
|
@@ -51,7 +51,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
|
|
|
51
51
|
Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
|
|
52
52
|
Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
|
|
53
53
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
54
|
-
Requires-Dist: setuptools (>=
|
|
54
|
+
Requires-Dist: setuptools (>=78.1)
|
|
55
55
|
Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
|
|
56
56
|
Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
|
|
57
57
|
Requires-Dist: sqlalchemy (>=1.4,<1.5)
|
|
@@ -210,6 +210,26 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
210
210
|
|
|
211
211
|
# Changelog
|
|
212
212
|
|
|
213
|
+
## 0.24.9 - 2025-04-16
|
|
214
|
+
|
|
215
|
+
* Introduce API client for **Coalesce**
|
|
216
|
+
|
|
217
|
+
## 0.24.8 - 2025-04-16
|
|
218
|
+
|
|
219
|
+
* Tableau - remove duplicates introduced by `offset` pagination
|
|
220
|
+
|
|
221
|
+
## 0.24.7 - 2025-04-07
|
|
222
|
+
|
|
223
|
+
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
224
|
+
|
|
225
|
+
## 0.24.6 - 2025-04-03
|
|
226
|
+
|
|
227
|
+
* Domo - extract cards metadata by batch to prevent from hitting URL max length
|
|
228
|
+
|
|
229
|
+
## 0.24.5 - 2025-04-02
|
|
230
|
+
|
|
231
|
+
* bump dependencies: google-cloud-storage
|
|
232
|
+
|
|
213
233
|
## 0.24.4 - 2025-03-19
|
|
214
234
|
|
|
215
235
|
* Snowflake:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=UKD2ldg9s00KOoVfWjnyB_m50R0fnpPLbpmkZHKoOQM,16821
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -68,6 +68,16 @@ castor_extractor/quality/soda/client/client.py,sha256=Gd3GaachWx5ZEH_nqgTxiBIbUq
|
|
|
68
68
|
castor_extractor/quality/soda/client/credentials.py,sha256=R1g7nHpJlQ5hBjtUFN06QjjWAouQtb_V-je7cAXXIA4,514
|
|
69
69
|
castor_extractor/quality/soda/client/endpoints.py,sha256=x3B-XlnDF8NJMuk-81N72_6HA-YZEzA895khLyj0j54,228
|
|
70
70
|
castor_extractor/quality/soda/client/pagination.py,sha256=_7caQUNDNPGRufnZNrfYBN3oVXsk99_2wYr67I0ehAs,530
|
|
71
|
+
castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
|
+
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
73
|
+
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
74
|
+
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
75
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=yrPzIk-6VN4MDHwti3Yxy3PCfHmxE6znjuehl_-dYTg,6151
|
|
76
|
+
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
77
|
+
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
78
|
+
castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
|
|
79
|
+
castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
|
|
80
|
+
castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
|
|
71
81
|
castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
|
|
72
82
|
castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
|
|
73
83
|
castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
|
|
@@ -77,9 +87,11 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
77
87
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
78
88
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
79
89
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
80
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
90
|
+
castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
|
|
81
91
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
82
92
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
93
|
+
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
94
|
+
castor_extractor/utils/batch_test.py,sha256=84JYXOxiTkZFAceVh0mzN6VtKxcqoFPbxkZfIDyLGlg,606
|
|
83
95
|
castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
|
|
84
96
|
castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHkaDy_uknKz8B0,2075
|
|
85
97
|
castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
|
|
@@ -146,7 +158,7 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
146
158
|
castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
|
|
147
159
|
castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
|
|
148
160
|
castor_extractor/visualization/domo/client/__init__.py,sha256=Do0fU4B8Hhlhahcv734gnJl_ryCztfTBDea7XNCKfB8,72
|
|
149
|
-
castor_extractor/visualization/domo/client/client.py,sha256=
|
|
161
|
+
castor_extractor/visualization/domo/client/client.py,sha256=bgzXWUm-UnTIwgyJKaJkoHzQpDYwWCGCe97MsMFw6ng,9930
|
|
150
162
|
castor_extractor/visualization/domo/client/credentials.py,sha256=4gnsk4Tpt3ggdUYbvyNPJEXeCyTy12s-X24P5hFdULg,873
|
|
151
163
|
castor_extractor/visualization/domo/client/endpoints.py,sha256=eIE9oeZ_cmJSWWDuyxh6JaAOs3y5bTJQQ265HYgpulE,2775
|
|
152
164
|
castor_extractor/visualization/domo/client/pagination.py,sha256=ukVkHVzoH4mfZ29H9YcnC2YrdVolP10wv25J6Q3ehRw,821
|
|
@@ -264,12 +276,13 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
|
|
|
264
276
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
265
277
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
266
278
|
castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
|
|
267
|
-
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=
|
|
279
|
+
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fARj7xroHfMd4nlo5CJK5jPok5UsHznOQpIpNaECVHw,5274
|
|
280
|
+
castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=lbsq5mLtqeNc5EsmCw9Mvl8qcvMsTcJTepHwy1ToyvA,969
|
|
268
281
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
269
282
|
castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
|
|
270
283
|
castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
|
|
271
284
|
castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
|
|
272
|
-
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=
|
|
285
|
+
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
273
286
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
274
287
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
275
288
|
castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
|
|
@@ -315,7 +328,7 @@ castor_extractor/warehouse/databricks/credentials.py,sha256=ExtVcl2NpMXTx1Lg8vHQ
|
|
|
315
328
|
castor_extractor/warehouse/databricks/endpoints.py,sha256=qPoL9CtPFJdwVuW9rJ37nmeMd-nChOBouEVYb4SlaUE,670
|
|
316
329
|
castor_extractor/warehouse/databricks/enums.py,sha256=3T6BbVvbWvfWkD23krsYT1x0kKh1qRzNPl6WpcXe300,274
|
|
317
330
|
castor_extractor/warehouse/databricks/extract.py,sha256=Z4VTEIf0QMiua0QGAlJdQ86kxmGAXekQ304aCKme6IY,7358
|
|
318
|
-
castor_extractor/warehouse/databricks/format.py,sha256=
|
|
331
|
+
castor_extractor/warehouse/databricks/format.py,sha256=S3BOcwJubc1pyKr-li26uftUUfsjfrm5Qf4LqmElXVk,6736
|
|
319
332
|
castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
|
|
320
333
|
castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
|
|
321
334
|
castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
|
|
@@ -403,8 +416,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
403
416
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
404
417
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
405
418
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
406
|
-
castor_extractor-0.24.
|
|
407
|
-
castor_extractor-0.24.
|
|
408
|
-
castor_extractor-0.24.
|
|
409
|
-
castor_extractor-0.24.
|
|
410
|
-
castor_extractor-0.24.
|
|
419
|
+
castor_extractor-0.24.9.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
420
|
+
castor_extractor-0.24.9.dist-info/METADATA,sha256=JDqbNB2dwsOO7_5PKUWP0r4FL217fi7OIEbVaOPljDQ,23985
|
|
421
|
+
castor_extractor-0.24.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
422
|
+
castor_extractor-0.24.9.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
|
|
423
|
+
castor_extractor-0.24.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|