castor-extractor 0.24.33__py3-none-any.whl → 0.24.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +9 -1
- castor_extractor/commands/extract_sqlserver.py +12 -0
- castor_extractor/transformation/coalesce/client/client.py +92 -88
- castor_extractor/transformation/coalesce/client/pagination.py +26 -0
- castor_extractor/utils/__init__.py +7 -1
- castor_extractor/utils/client/api/pagination.py +5 -2
- castor_extractor/utils/collection.py +26 -0
- castor_extractor/utils/collection_test.py +31 -1
- castor_extractor/visualization/looker_studio/client/queries/query.sql +1 -0
- castor_extractor/warehouse/sqlserver/client.py +15 -0
- castor_extractor/warehouse/sqlserver/extract.py +8 -2
- castor_extractor/warehouse/sqlserver/queries/column.sql +10 -10
- castor_extractor/warehouse/sqlserver/queries/database.sql +1 -1
- castor_extractor/warehouse/sqlserver/queries/schema.sql +5 -6
- castor_extractor/warehouse/sqlserver/queries/table.sql +12 -14
- castor_extractor/warehouse/sqlserver/query.py +30 -1
- {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/METADATA +10 -2
- {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/RECORD +21 -23
- castor_extractor/transformation/coalesce/client/type.py +0 -1
- castor_extractor/transformation/coalesce/client/utils.py +0 -52
- castor_extractor/transformation/coalesce/client/utils_test.py +0 -54
- {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.35 - 2025-07-29
|
|
4
|
+
|
|
5
|
+
* Coalesce - Fix pagination issue
|
|
6
|
+
|
|
7
|
+
## 0.24.34 - 2025-07-02
|
|
8
|
+
|
|
9
|
+
* SQLServer: multiple databases
|
|
10
|
+
|
|
3
11
|
## 0.24.33 - 2025-07-10
|
|
4
12
|
|
|
5
13
|
* Tableau - Add an option to skip fields ingestion
|
|
@@ -10,7 +18,7 @@
|
|
|
10
18
|
|
|
11
19
|
## 0.24.31 - 2025-07-02
|
|
12
20
|
|
|
13
|
-
* Looker Studio: add option to list users via a provided JSON file
|
|
21
|
+
* Looker Studio: add an option to list users via a provided JSON file
|
|
14
22
|
|
|
15
23
|
## 0.24.30 - 2025-06-26
|
|
16
24
|
|
|
@@ -23,6 +23,16 @@ def main():
|
|
|
23
23
|
action="store_true",
|
|
24
24
|
help="Skips files already extracted instead of replacing them",
|
|
25
25
|
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--db-allowed",
|
|
28
|
+
nargs="*",
|
|
29
|
+
help="List of databases that should be extracted",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--db-blocked",
|
|
33
|
+
nargs="*",
|
|
34
|
+
help="List of databases that should not be extracted",
|
|
35
|
+
)
|
|
26
36
|
parser.set_defaults(skip_existing=False)
|
|
27
37
|
|
|
28
38
|
args = parser.parse_args()
|
|
@@ -35,4 +45,6 @@ def main():
|
|
|
35
45
|
password=args.password,
|
|
36
46
|
output_directory=args.output,
|
|
37
47
|
skip_existing=args.skip_existing,
|
|
48
|
+
db_allowed=args.db_allowed,
|
|
49
|
+
db_blocked=args.db_blocked,
|
|
38
50
|
)
|
|
@@ -1,31 +1,47 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from functools import partial
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Callable, Optional
|
|
4
5
|
|
|
5
|
-
from
|
|
6
|
+
from pydantic import ValidationError
|
|
6
7
|
|
|
7
8
|
from ....utils import (
|
|
8
9
|
APIClient,
|
|
9
10
|
BearerAuth,
|
|
10
11
|
RequestSafeMode,
|
|
11
12
|
SerializedAsset,
|
|
13
|
+
fetch_all_pages,
|
|
12
14
|
)
|
|
13
15
|
from ..assets import CoalesceAsset, CoalesceQualityAsset
|
|
14
16
|
from .credentials import CoalesceCredentials
|
|
15
17
|
from .endpoint import (
|
|
16
18
|
CoalesceEndpointFactory,
|
|
17
19
|
)
|
|
18
|
-
from .
|
|
19
|
-
|
|
20
|
+
from .pagination import CoalescePagination
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
COALESCE_PAGE_SIZE = 300
|
|
26
|
+
COALESCE_PAGE_SIZE_RUN_RESULTS = 1_000
|
|
27
|
+
|
|
28
|
+
COALESCE_TIMEOUT_SECONDS = 90
|
|
20
29
|
|
|
21
|
-
_LIMIT_MAX = 1_000
|
|
22
30
|
_MAX_ERRORS = 200
|
|
23
31
|
|
|
24
|
-
|
|
32
|
+
COALESCE_SAFE_MODE = RequestSafeMode(
|
|
33
|
+
status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
|
|
34
|
+
max_errors=_MAX_ERRORS,
|
|
35
|
+
)
|
|
25
36
|
|
|
26
37
|
|
|
27
|
-
def _run_result_payload(
|
|
38
|
+
def _run_result_payload(
|
|
39
|
+
environment_id: str,
|
|
40
|
+
result: dict,
|
|
41
|
+
query_result: dict,
|
|
42
|
+
) -> dict:
|
|
28
43
|
return {
|
|
44
|
+
"environment_id": environment_id,
|
|
29
45
|
"node_id": result["nodeID"],
|
|
30
46
|
"node_name": result["name"],
|
|
31
47
|
"test_name": query_result["name"],
|
|
@@ -37,13 +53,6 @@ def _run_result_payload(result: dict, query_result: dict) -> dict:
|
|
|
37
53
|
}
|
|
38
54
|
|
|
39
55
|
|
|
40
|
-
COALESCE_SAFE_MODE = RequestSafeMode(
|
|
41
|
-
status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
|
|
42
|
-
max_errors=_MAX_ERRORS,
|
|
43
|
-
)
|
|
44
|
-
COALESCE_TIMEOUT_SECONDS = 90
|
|
45
|
-
|
|
46
|
-
|
|
47
56
|
class CoalesceBearerAuth(BearerAuth):
|
|
48
57
|
"""Bearer Authentication for Coalesce"""
|
|
49
58
|
|
|
@@ -69,65 +78,74 @@ class CoalesceClient(APIClient):
|
|
|
69
78
|
timeout=COALESCE_TIMEOUT_SECONDS,
|
|
70
79
|
)
|
|
71
80
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
def _get_paginated(
|
|
82
|
+
self,
|
|
83
|
+
endpoint: str,
|
|
84
|
+
limit: int = COALESCE_PAGE_SIZE,
|
|
85
|
+
params: Optional[dict] = None,
|
|
86
|
+
) -> Callable:
|
|
87
|
+
return partial(
|
|
88
|
+
self._get,
|
|
89
|
+
retry_on_timeout=False, # explained in the docstring
|
|
90
|
+
endpoint=endpoint,
|
|
91
|
+
params={
|
|
92
|
+
"limit": limit,
|
|
93
|
+
**(params or dict()),
|
|
94
|
+
},
|
|
80
95
|
)
|
|
81
|
-
|
|
96
|
+
|
|
97
|
+
def _fetch_environments(self) -> SerializedAsset:
|
|
98
|
+
endpoint = CoalesceEndpointFactory.environments()
|
|
99
|
+
request = self._get_paginated(endpoint=endpoint)
|
|
100
|
+
result = fetch_all_pages(request, CoalescePagination)
|
|
101
|
+
return list(result)
|
|
82
102
|
|
|
83
103
|
def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
|
|
84
104
|
endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
105
|
+
request = self._get_paginated(
|
|
106
|
+
endpoint=endpoint,
|
|
107
|
+
params={"detail": "true"},
|
|
108
|
+
)
|
|
109
|
+
result = fetch_all_pages(request, CoalescePagination)
|
|
110
|
+
return [
|
|
111
|
+
{
|
|
112
|
+
**node,
|
|
113
|
+
"environment_id": environment_id,
|
|
114
|
+
}
|
|
115
|
+
for node in result
|
|
116
|
+
]
|
|
97
117
|
|
|
98
118
|
def _fetch_all_nodes(self) -> SerializedAsset:
|
|
119
|
+
environments = self._fetch_environments()
|
|
120
|
+
total = len(environments)
|
|
99
121
|
nodes: list[dict] = []
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
122
|
+
|
|
123
|
+
for index, env in enumerate(environments):
|
|
124
|
+
env_id = env["id"]
|
|
125
|
+
logger.info(f"Fetching nodes for env #{env_id} - {index}/{total}")
|
|
126
|
+
try:
|
|
127
|
+
nodes.extend(self._fetch_env_nodes(env_id))
|
|
128
|
+
except ValidationError as e:
|
|
129
|
+
# 500 Server Error: Internal Server Error on Coalesce API
|
|
130
|
+
logger.warning(
|
|
131
|
+
f"Skipping nodes for {env_id} due to the following Error: {e}"
|
|
132
|
+
)
|
|
133
|
+
logger.info(f"{len(nodes)} nodes extracted so far")
|
|
103
134
|
return nodes
|
|
104
135
|
|
|
105
136
|
def _fetch_runs(self, starting_from: str) -> SerializedAsset:
|
|
106
|
-
"""
|
|
107
|
-
fetch runs, per environment;
|
|
108
|
-
we break per environment to lower the chance of exceeding the 1k limit
|
|
109
|
-
"""
|
|
110
|
-
runs: list[dict] = []
|
|
111
|
-
for environment in self._fetch_environments():
|
|
112
|
-
environment_id = environment["id"]
|
|
113
|
-
runs.extend(
|
|
114
|
-
self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
115
|
-
)
|
|
116
|
-
return runs
|
|
117
|
-
|
|
118
|
-
def _fetch_recent_runs_per_env(
|
|
119
|
-
self, environment_id: int, starting_from: str
|
|
120
|
-
) -> SerializedAsset:
|
|
121
137
|
endpoint = CoalesceEndpointFactory.runs()
|
|
122
138
|
params = {
|
|
123
|
-
"environmentID": environment_id,
|
|
124
|
-
"limit": _LIMIT_MAX,
|
|
125
139
|
"orderBy": "runEndTime",
|
|
126
140
|
"orderByDirection": "asc",
|
|
127
141
|
"startingFrom": starting_from,
|
|
128
142
|
}
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
request = self._get_paginated(
|
|
144
|
+
endpoint=endpoint,
|
|
145
|
+
params=params,
|
|
146
|
+
limit=COALESCE_PAGE_SIZE_RUN_RESULTS,
|
|
147
|
+
)
|
|
148
|
+
return list(fetch_all_pages(request, CoalescePagination))
|
|
131
149
|
|
|
132
150
|
def _fetch_run_results(self, run_id: str) -> SerializedAsset:
|
|
133
151
|
endpoint = CoalesceEndpointFactory.run_results(run_id)
|
|
@@ -136,51 +154,37 @@ class CoalesceClient(APIClient):
|
|
|
136
154
|
|
|
137
155
|
def _run_results_by_run(
|
|
138
156
|
self,
|
|
157
|
+
environment_id: str,
|
|
139
158
|
run_id: str,
|
|
140
|
-
test_names: NodeIDToNamesMapping,
|
|
141
|
-
column_names: NodeIDToNamesMapping,
|
|
142
159
|
) -> SerializedAsset:
|
|
143
160
|
run_results: list[dict] = []
|
|
144
161
|
for result in self._fetch_run_results(run_id):
|
|
145
|
-
node_id = result["nodeID"]
|
|
146
162
|
for query_result in result["queryResults"]:
|
|
147
|
-
|
|
163
|
+
if query_result["type"] != "sqlTest":
|
|
164
|
+
continue
|
|
165
|
+
run_result = _run_result_payload(
|
|
166
|
+
environment_id,
|
|
167
|
+
result,
|
|
148
168
|
query_result,
|
|
149
|
-
node_id,
|
|
150
|
-
test_names,
|
|
151
|
-
column_names,
|
|
152
169
|
)
|
|
153
|
-
if not _is_test:
|
|
154
|
-
continue
|
|
155
|
-
run_result = _run_result_payload(result, query_result)
|
|
156
170
|
run_results.append(run_result)
|
|
157
171
|
return run_results
|
|
158
172
|
|
|
159
|
-
def
|
|
160
|
-
self,
|
|
173
|
+
def _fetch_all_run_results(
|
|
174
|
+
self,
|
|
175
|
+
starting_from: str,
|
|
161
176
|
) -> SerializedAsset:
|
|
162
177
|
run_results: list[dict] = []
|
|
163
|
-
nodes = self._fetch_env_nodes(environment_id)
|
|
164
|
-
test_names = test_names_per_node(nodes)
|
|
165
|
-
column_names = column_names_per_node(nodes)
|
|
166
|
-
runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
|
|
167
178
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
_results = self._run_results_by_run(
|
|
171
|
-
run_id, test_names, column_names
|
|
172
|
-
)
|
|
173
|
-
run_results.extend(_results)
|
|
174
|
-
return run_results
|
|
175
|
-
|
|
176
|
-
def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
|
|
177
|
-
run_results: list[dict] = []
|
|
178
|
-
|
|
179
|
-
for environment in self._fetch_environments():
|
|
180
|
-
environment_id = environment["id"]
|
|
181
|
-
_results = self._run_results_by_env(environment_id, starting_from)
|
|
182
|
-
run_results.extend(_results)
|
|
179
|
+
runs = self._fetch_runs(starting_from)
|
|
180
|
+
total = len(runs)
|
|
183
181
|
|
|
182
|
+
for index, run in enumerate(runs):
|
|
183
|
+
logger.info(f"Extracting run results ({index}/{total})")
|
|
184
|
+
run_id = run["id"]
|
|
185
|
+
environment_id = run["environmentID"]
|
|
186
|
+
current_results = self._run_results_by_run(environment_id, run_id)
|
|
187
|
+
run_results.extend(current_results)
|
|
184
188
|
return run_results
|
|
185
189
|
|
|
186
190
|
def fetch(
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import Optional, Union
|
|
2
|
+
|
|
3
|
+
from ....utils import PaginationModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CoalescePagination(PaginationModel):
|
|
7
|
+
"""
|
|
8
|
+
Class to handle paginated results for Coalesce
|
|
9
|
+
See their documentation here
|
|
10
|
+
https://docs.coalesce.io/docs/api
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
data: list
|
|
14
|
+
next: Union[Optional[str], Optional[int]] = None
|
|
15
|
+
|
|
16
|
+
def is_last(self) -> bool:
|
|
17
|
+
"""Stopping condition for the pagination"""
|
|
18
|
+
return self.next is None
|
|
19
|
+
|
|
20
|
+
def next_page_payload(self):
|
|
21
|
+
"""Payload enabling to generate the request for the next page"""
|
|
22
|
+
return {"startingFrom": self.next}
|
|
23
|
+
|
|
24
|
+
def page_results(self) -> list:
|
|
25
|
+
"""List of results of the current page"""
|
|
26
|
+
return self.data
|
|
@@ -19,7 +19,13 @@ from .client import (
|
|
|
19
19
|
handle_response,
|
|
20
20
|
uri_encode,
|
|
21
21
|
)
|
|
22
|
-
from .collection import
|
|
22
|
+
from .collection import (
|
|
23
|
+
deduplicate,
|
|
24
|
+
empty_iterator,
|
|
25
|
+
filter_items,
|
|
26
|
+
group_by,
|
|
27
|
+
mapping_from_rows,
|
|
28
|
+
)
|
|
23
29
|
from .constants import OUTPUT_DIR
|
|
24
30
|
from .deprecate import deprecate_python
|
|
25
31
|
from .env import from_env
|
|
@@ -66,16 +66,19 @@ def fetch_all_pages(
|
|
|
66
66
|
"""
|
|
67
67
|
page_number = 1
|
|
68
68
|
response_payload = request()
|
|
69
|
+
|
|
69
70
|
paginated_response = pagination_model(**response_payload)
|
|
71
|
+
|
|
70
72
|
while not paginated_response.is_last():
|
|
71
73
|
logger.debug(f"Fetching page number {page_number}")
|
|
72
74
|
yield from paginated_response.page_results()
|
|
73
75
|
next_page_parameters = paginated_response.next_page_parameters()
|
|
74
|
-
|
|
76
|
+
request_with_pagination = partial(request, **next_page_parameters)
|
|
75
77
|
if rate_limit:
|
|
76
78
|
sleep(rate_limit)
|
|
77
79
|
paginated_response = pagination_model(
|
|
78
|
-
current_page_payload=next_page_parameters,
|
|
80
|
+
current_page_payload=next_page_parameters,
|
|
81
|
+
**request_with_pagination(),
|
|
79
82
|
)
|
|
80
83
|
page_number += 1
|
|
81
84
|
|
|
@@ -2,6 +2,8 @@ from collections import defaultdict
|
|
|
2
2
|
from collections.abc import Iterable, Sequence
|
|
3
3
|
from typing import (
|
|
4
4
|
Any,
|
|
5
|
+
List,
|
|
6
|
+
Optional,
|
|
5
7
|
TypeVar,
|
|
6
8
|
)
|
|
7
9
|
|
|
@@ -80,3 +82,27 @@ def deduplicate(
|
|
|
80
82
|
deduplicated.append(element)
|
|
81
83
|
|
|
82
84
|
return deduplicated
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def filter_items(
|
|
88
|
+
items: Iterable[T],
|
|
89
|
+
allowed: Optional[Iterable[T]] = None,
|
|
90
|
+
blocked: Optional[Iterable[T]] = None,
|
|
91
|
+
) -> List[T]:
|
|
92
|
+
"""
|
|
93
|
+
Filters `items` by excluding any in `blocked` or including only those in `allowed`.
|
|
94
|
+
If both `allowed` and `blocked` are None, returns all items.
|
|
95
|
+
If both are provided, raise an error.
|
|
96
|
+
"""
|
|
97
|
+
items = list(items)
|
|
98
|
+
|
|
99
|
+
if allowed and blocked:
|
|
100
|
+
raise AttributeError(
|
|
101
|
+
"Only one of `allowed` and `blocked` can be provided"
|
|
102
|
+
)
|
|
103
|
+
if blocked:
|
|
104
|
+
return [item for item in items if item not in blocked]
|
|
105
|
+
if allowed:
|
|
106
|
+
return [item for item in items if item in allowed]
|
|
107
|
+
|
|
108
|
+
return items
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .collection import deduplicate, filter_items, mapping_from_rows
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
def test__mapping_from_rows__basic_mapping():
|
|
@@ -72,3 +74,31 @@ def test_deduplicate():
|
|
|
72
74
|
{"id": "2", "name": "duplicate"},
|
|
73
75
|
]
|
|
74
76
|
assert deduplicate("id", elements) == [e1, e2, e3]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_sqlserver_databases():
|
|
80
|
+
databases = [
|
|
81
|
+
"prod",
|
|
82
|
+
"staging",
|
|
83
|
+
"test",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
# 1. No allowed or blocked: should return all
|
|
87
|
+
result1 = filter_items(databases)
|
|
88
|
+
assert result1 == [
|
|
89
|
+
"prod",
|
|
90
|
+
"staging",
|
|
91
|
+
"test",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
# 2. Block "prod": only staging and test should remain
|
|
95
|
+
result2 = filter_items(databases, blocked=["prod"])
|
|
96
|
+
assert result2 == ["staging", "test"]
|
|
97
|
+
|
|
98
|
+
# 3. Only allow "staging" and "test"
|
|
99
|
+
result3 = filter_items(databases, allowed=["staging", "test"])
|
|
100
|
+
assert result3 == ["staging", "test"]
|
|
101
|
+
|
|
102
|
+
# 4. allowed and blocked, should raise
|
|
103
|
+
with pytest.raises(AttributeError):
|
|
104
|
+
filter_items(databases, blocked=["prod"], allowed=["staging", "test"])
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
|
|
3
4
|
from sqlalchemy import text
|
|
4
5
|
|
|
5
6
|
from ...utils import ExtractionQuery, SqlalchemyClient, uri_encode
|
|
6
7
|
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
7
10
|
SERVER_URI = "{user}:{password}@{host}:{port}/{database}"
|
|
8
11
|
MSSQL_URI = f"mssql+pymssql://{SERVER_URI}"
|
|
9
12
|
DEFAULT_PORT = 1433
|
|
10
13
|
|
|
11
14
|
_KEYS = ("user", "password", "host", "port", "database")
|
|
12
15
|
|
|
16
|
+
_SYSTEM_DATABASES = ("master", "model", "msdb", "tempdb", "DBAdmin")
|
|
17
|
+
|
|
13
18
|
|
|
14
19
|
def _check_key(credentials: dict) -> None:
|
|
15
20
|
for key in _KEYS:
|
|
@@ -51,3 +56,13 @@ class MSSQLClient(SqlalchemyClient):
|
|
|
51
56
|
yield from results
|
|
52
57
|
finally:
|
|
53
58
|
self.close()
|
|
59
|
+
|
|
60
|
+
def get_databases(self) -> list[str]:
|
|
61
|
+
result = self.execute(
|
|
62
|
+
ExtractionQuery("SELECT name FROM sys.databases", {})
|
|
63
|
+
)
|
|
64
|
+
return [
|
|
65
|
+
row["name"]
|
|
66
|
+
for row in result
|
|
67
|
+
if row["name"] not in _SYSTEM_DATABASES
|
|
68
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from ...utils import LocalStorage, from_env, write_summary
|
|
3
|
+
from ...utils import LocalStorage, filter_items, from_env, write_summary
|
|
4
4
|
from ..abstract import (
|
|
5
5
|
CATALOG_ASSETS,
|
|
6
6
|
EXTERNAL_LINEAGE_ASSETS,
|
|
@@ -51,7 +51,13 @@ def extract_all(**kwargs) -> None:
|
|
|
51
51
|
|
|
52
52
|
client = MSSQLClient(credentials=_credentials(kwargs))
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
databases = filter_items(
|
|
55
|
+
client.get_databases(), kwargs.get("allowed"), kwargs.get("blocked")
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
query_builder = MSSQLQueryBuilder(
|
|
59
|
+
databases=databases,
|
|
60
|
+
)
|
|
55
61
|
|
|
56
62
|
storage = LocalStorage(directory=output_directory)
|
|
57
63
|
|
|
@@ -11,7 +11,7 @@ WITH extended_tables AS (
|
|
|
11
11
|
table_owner_id = principal_id,
|
|
12
12
|
schema_id
|
|
13
13
|
FROM
|
|
14
|
-
sys.tables
|
|
14
|
+
{database}.sys.tables
|
|
15
15
|
|
|
16
16
|
UNION
|
|
17
17
|
|
|
@@ -21,7 +21,7 @@ WITH extended_tables AS (
|
|
|
21
21
|
table_owner_id = principal_id,
|
|
22
22
|
schema_id
|
|
23
23
|
FROM
|
|
24
|
-
sys.views
|
|
24
|
+
{database}.sys.views
|
|
25
25
|
|
|
26
26
|
UNION
|
|
27
27
|
|
|
@@ -31,7 +31,7 @@ WITH extended_tables AS (
|
|
|
31
31
|
table_owner_id = principal_id,
|
|
32
32
|
schema_id
|
|
33
33
|
FROM
|
|
34
|
-
sys.external_tables
|
|
34
|
+
{database}.sys.external_tables
|
|
35
35
|
),
|
|
36
36
|
/*
|
|
37
37
|
`sys.columns` contains, among others:
|
|
@@ -54,11 +54,11 @@ column_ids AS (
|
|
|
54
54
|
schema_name = ss.name,
|
|
55
55
|
schema_id = ss.schema_id,
|
|
56
56
|
comment = CONVERT(varchar(1024), ep.value)
|
|
57
|
-
FROM sys.columns AS sc
|
|
57
|
+
FROM {database}.sys.columns AS sc
|
|
58
58
|
LEFT JOIN extended_tables AS et ON sc.object_id = et.table_id
|
|
59
|
-
LEFT JOIN sys.schemas AS ss ON et.schema_id = ss.schema_id
|
|
60
|
-
LEFT JOIN sys.databases AS sd ON sd.name =
|
|
61
|
-
LEFT JOIN sys.extended_properties AS ep
|
|
59
|
+
LEFT JOIN {database}.sys.schemas AS ss ON et.schema_id = ss.schema_id
|
|
60
|
+
LEFT JOIN {database}.sys.databases AS sd ON sd.name = '{database}'
|
|
61
|
+
LEFT JOIN {database}.sys.extended_properties AS ep
|
|
62
62
|
ON
|
|
63
63
|
sc.object_id = ep.major_id
|
|
64
64
|
AND sc.column_id = ep.minor_id
|
|
@@ -70,9 +70,9 @@ columns AS (
|
|
|
70
70
|
i.database_name,
|
|
71
71
|
i.database_id,
|
|
72
72
|
schema_name = c.table_schema,
|
|
73
|
-
i.schema_id,
|
|
73
|
+
schema_id = CAST(i.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)),
|
|
74
74
|
table_name = c.table_name,
|
|
75
|
-
i.table_id,
|
|
75
|
+
table_id = CAST(i.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)) + '_' + CAST(i.table_id AS VARCHAR(10)),
|
|
76
76
|
c.column_name,
|
|
77
77
|
c.data_type,
|
|
78
78
|
c.ordinal_position,
|
|
@@ -87,7 +87,7 @@ columns AS (
|
|
|
87
87
|
i.comment,
|
|
88
88
|
column_id = CONCAT(i.table_id, '.', c.column_name)
|
|
89
89
|
FROM
|
|
90
|
-
information_schema.columns AS c
|
|
90
|
+
{database}.information_schema.columns AS c
|
|
91
91
|
LEFT JOIN column_ids AS i
|
|
92
92
|
ON
|
|
93
93
|
(
|
|
@@ -3,21 +3,20 @@ WITH ids AS (
|
|
|
3
3
|
SELECT DISTINCT
|
|
4
4
|
table_catalog,
|
|
5
5
|
table_schema
|
|
6
|
-
FROM information_schema.tables
|
|
7
|
-
WHERE table_catalog = DB_NAME()
|
|
6
|
+
FROM {database}.information_schema.tables
|
|
8
7
|
)
|
|
9
8
|
|
|
10
9
|
SELECT
|
|
11
10
|
d.database_id,
|
|
12
11
|
database_name = i.table_catalog,
|
|
13
12
|
schema_name = s.name,
|
|
14
|
-
s.schema_id,
|
|
13
|
+
schema_id = CAST(d.database_id AS VARCHAR(10)) + '_' + CAST(s.schema_id AS VARCHAR(10)),
|
|
15
14
|
schema_owner = u.name,
|
|
16
15
|
schema_owner_id = u.uid
|
|
17
|
-
FROM sys.schemas AS s
|
|
16
|
+
FROM {database}.sys.schemas AS s
|
|
18
17
|
INNER JOIN ids AS i
|
|
19
18
|
ON s.name = i.table_schema
|
|
20
|
-
LEFT JOIN sys.sysusers AS u
|
|
19
|
+
LEFT JOIN {database}.sys.sysusers AS u
|
|
21
20
|
ON s.principal_id = u.uid
|
|
22
|
-
LEFT JOIN sys.databases AS d
|
|
21
|
+
LEFT JOIN {database}.sys.databases AS d
|
|
23
22
|
ON i.table_catalog = d.name
|
|
@@ -11,7 +11,7 @@ WITH extended_tables AS (
|
|
|
11
11
|
table_owner_id = principal_id,
|
|
12
12
|
schema_id
|
|
13
13
|
FROM
|
|
14
|
-
sys.tables
|
|
14
|
+
{database}.sys.tables
|
|
15
15
|
|
|
16
16
|
UNION
|
|
17
17
|
|
|
@@ -21,7 +21,7 @@ WITH extended_tables AS (
|
|
|
21
21
|
table_owner_id = principal_id,
|
|
22
22
|
schema_id
|
|
23
23
|
FROM
|
|
24
|
-
sys.views
|
|
24
|
+
{database}.sys.views
|
|
25
25
|
|
|
26
26
|
UNION
|
|
27
27
|
|
|
@@ -31,14 +31,14 @@ WITH extended_tables AS (
|
|
|
31
31
|
table_owner_id = principal_id,
|
|
32
32
|
schema_id
|
|
33
33
|
FROM
|
|
34
|
-
sys.external_tables
|
|
34
|
+
{database}.sys.external_tables
|
|
35
35
|
),
|
|
36
36
|
-- Get the row count per table
|
|
37
37
|
partitions AS (
|
|
38
38
|
SELECT
|
|
39
39
|
object_id,
|
|
40
40
|
row_count = SUM(rows)
|
|
41
|
-
FROM sys.partitions
|
|
41
|
+
FROM {database}.sys.partitions
|
|
42
42
|
GROUP BY object_id
|
|
43
43
|
),
|
|
44
44
|
-- Append row count to table properties
|
|
@@ -69,13 +69,12 @@ table_ids AS (
|
|
|
69
69
|
table_owner = u.name,
|
|
70
70
|
row_count,
|
|
71
71
|
comment = CONVERT(varchar(1024), ep.value)
|
|
72
|
-
FROM
|
|
73
|
-
|
|
74
|
-
LEFT JOIN sys.schemas AS ss
|
|
72
|
+
FROM extended_tables_with_row_count AS et
|
|
73
|
+
LEFT JOIN {database}.sys.schemas AS ss
|
|
75
74
|
ON et.schema_id = ss.schema_id
|
|
76
|
-
LEFT JOIN sys.sysusers AS u
|
|
75
|
+
LEFT JOIN {database}.sys.sysusers AS u
|
|
77
76
|
ON et.table_owner_id = u.uid
|
|
78
|
-
LEFT JOIN sys.extended_properties AS ep
|
|
77
|
+
LEFT JOIN {database}.sys.extended_properties AS ep
|
|
79
78
|
ON (
|
|
80
79
|
et.table_id = ep.major_id
|
|
81
80
|
AND ep.minor_id = 0
|
|
@@ -91,19 +90,18 @@ meta AS (
|
|
|
91
90
|
t.table_name,
|
|
92
91
|
t.table_type
|
|
93
92
|
FROM
|
|
94
|
-
information_schema.tables AS t
|
|
95
|
-
LEFT JOIN sys.databases AS db
|
|
93
|
+
{database}.information_schema.tables AS t
|
|
94
|
+
LEFT JOIN {database}.sys.databases AS db
|
|
96
95
|
ON t.table_catalog = db.name
|
|
97
|
-
WHERE t.table_catalog = db_name()
|
|
98
96
|
)
|
|
99
97
|
|
|
100
98
|
SELECT
|
|
101
99
|
m.database_name,
|
|
102
100
|
m.database_id,
|
|
103
101
|
m.schema_name,
|
|
104
|
-
i.schema_id,
|
|
102
|
+
schema_id = CAST(m.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)),
|
|
105
103
|
m.table_name,
|
|
106
|
-
i.table_id,
|
|
104
|
+
table_id = CAST(m.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)) + '_' + CAST(i.table_id AS VARCHAR(10)),
|
|
107
105
|
m.table_type,
|
|
108
106
|
i.table_owner,
|
|
109
107
|
i.table_owner_id,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from ..abstract import (
|
|
@@ -7,6 +8,15 @@ from ..abstract import (
|
|
|
7
8
|
WarehouseAsset,
|
|
8
9
|
)
|
|
9
10
|
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_DATABASE_REQUIRED = (
|
|
15
|
+
WarehouseAsset.SCHEMA,
|
|
16
|
+
WarehouseAsset.TABLE,
|
|
17
|
+
WarehouseAsset.COLUMN,
|
|
18
|
+
)
|
|
19
|
+
|
|
10
20
|
|
|
11
21
|
class MSSQLQueryBuilder(AbstractQueryBuilder):
|
|
12
22
|
"""
|
|
@@ -15,10 +25,29 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
|
|
|
15
25
|
|
|
16
26
|
def __init__(
|
|
17
27
|
self,
|
|
28
|
+
databases: list[str],
|
|
18
29
|
time_filter: Optional[TimeFilter] = None,
|
|
19
30
|
):
|
|
20
31
|
super().__init__(time_filter=time_filter)
|
|
32
|
+
self._databases = databases
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def _format(query: ExtractionQuery, values: dict) -> ExtractionQuery:
|
|
36
|
+
return ExtractionQuery(
|
|
37
|
+
statement=query.statement.format(**values),
|
|
38
|
+
params=query.params,
|
|
39
|
+
)
|
|
21
40
|
|
|
22
41
|
def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
|
|
23
42
|
query = self.build_default(asset)
|
|
24
|
-
|
|
43
|
+
|
|
44
|
+
if asset not in _DATABASE_REQUIRED:
|
|
45
|
+
return [query]
|
|
46
|
+
|
|
47
|
+
logger.info(
|
|
48
|
+
f"\tWill run queries with following database params: {self._databases}",
|
|
49
|
+
)
|
|
50
|
+
return [
|
|
51
|
+
self._format(query, {"database": database})
|
|
52
|
+
for database in self._databases
|
|
53
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.35
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,14 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.35 - 2025-07-29
|
|
219
|
+
|
|
220
|
+
* Coalesce - Fix pagination issue
|
|
221
|
+
|
|
222
|
+
## 0.24.34 - 2025-07-02
|
|
223
|
+
|
|
224
|
+
* SQLServer: multiple databases
|
|
225
|
+
|
|
218
226
|
## 0.24.33 - 2025-07-10
|
|
219
227
|
|
|
220
228
|
* Tableau - Add an option to skip fields ingestion
|
|
@@ -225,7 +233,7 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
225
233
|
|
|
226
234
|
## 0.24.31 - 2025-07-02
|
|
227
235
|
|
|
228
|
-
* Looker Studio: add option to list users via a provided JSON file
|
|
236
|
+
* Looker Studio: add an option to list users via a provided JSON file
|
|
229
237
|
|
|
230
238
|
## 0.24.30 - 2025-06-26
|
|
231
239
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=1S9O_c1LH8T4P78akRxlFS8Tv0i9Jgswy7V9zvd_UQw,18900
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -24,7 +24,7 @@ castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0U
|
|
|
24
24
|
castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80XMYxWReHjdycLsIa61pyeCD-sUDk,962
|
|
25
25
|
castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
|
|
26
26
|
castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
|
|
27
|
-
castor_extractor/commands/extract_sqlserver.py,sha256
|
|
27
|
+
castor_extractor/commands/extract_sqlserver.py,sha256=-20AlQbJ4W3oQytHLKdN8GX__UkrrQukOgSzy2l1WZY,1483
|
|
28
28
|
castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
|
|
29
29
|
castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
|
|
30
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
@@ -76,12 +76,10 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
76
76
|
castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
|
|
77
77
|
castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
|
|
78
78
|
castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
|
|
79
|
-
castor_extractor/transformation/coalesce/client/client.py,sha256=
|
|
79
|
+
castor_extractor/transformation/coalesce/client/client.py,sha256=3YB82ibaumeSRd510mlrPXKsWefV3lHQQVis9oEK-LQ,6133
|
|
80
80
|
castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
|
|
81
81
|
castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
|
|
82
|
-
castor_extractor/transformation/coalesce/client/
|
|
83
|
-
castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
|
|
84
|
-
castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
|
|
82
|
+
castor_extractor/transformation/coalesce/client/pagination.py,sha256=zynyWCMEzUQ7HA1Q5AP4BAOmxRQI6NA5jCPEo0lHn44,705
|
|
85
83
|
castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
|
|
86
84
|
castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
|
|
87
85
|
castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZA2DrgjP7Gi5epAPE,5684
|
|
@@ -96,7 +94,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
96
94
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
97
95
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
98
96
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
99
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
97
|
+
castor_extractor/utils/__init__.py,sha256=z_BdKTUyuug3I5AzCuSGrAVskfLax4_olfORIjhZw_M,1691
|
|
100
98
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
101
99
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
102
100
|
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
@@ -108,7 +106,7 @@ castor_extractor/utils/client/api/auth.py,sha256=lq0K3UEl1vwIIa_vKTdlpIQPdE5K1-5
|
|
|
108
106
|
castor_extractor/utils/client/api/auth_test.py,sha256=LlyXytnatg6ZzR4Zkvzk0BH99FYhHX7qn_nyr2MSnDI,1305
|
|
109
107
|
castor_extractor/utils/client/api/client.py,sha256=qmj7KoNqt6F-cmpdaMiz_aVxzwMCgbDNcgzXSbCdu1Y,5183
|
|
110
108
|
castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
|
|
111
|
-
castor_extractor/utils/client/api/pagination.py,sha256=
|
|
109
|
+
castor_extractor/utils/client/api/pagination.py,sha256=tNL89bvgnMJd0ajJA07wTTReH3PJOQm3xsa93SKHFss,2499
|
|
112
110
|
castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
|
|
113
111
|
castor_extractor/utils/client/api/safe_request.py,sha256=5pvI2WPRDtitX9F1aYcXTIMPNmDikRK9dKTD3ctoeoQ,1774
|
|
114
112
|
castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
|
|
@@ -118,8 +116,8 @@ castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9
|
|
|
118
116
|
castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
|
|
119
117
|
castor_extractor/utils/client/uri.py,sha256=jmP9hY-6PRqdc3-vAOdtll_U6q9VCqSqmBAN6QRs3ZI,150
|
|
120
118
|
castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XUVinOZdpEYrDQ,259
|
|
121
|
-
castor_extractor/utils/collection.py,sha256=
|
|
122
|
-
castor_extractor/utils/collection_test.py,sha256=
|
|
119
|
+
castor_extractor/utils/collection.py,sha256=g2HmB0ievvYHWaZ8iEzkcPPkrBFsh6R6b_liBqcsMjc,3044
|
|
120
|
+
castor_extractor/utils/collection_test.py,sha256=mlw33u4VidazQwWxJMvaFeYX3VB5CAj6rqRG-cRsLrw,2884
|
|
123
121
|
castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0khP4Abdk,39
|
|
124
122
|
castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
|
|
125
123
|
castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
|
|
@@ -197,7 +195,7 @@ castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqN
|
|
|
197
195
|
castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
|
|
198
196
|
castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Phq378VEaFLD-nyP2_A1wge6HUP45jSthhlNjD7aqSg,4085
|
|
199
197
|
castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
|
|
200
|
-
castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=
|
|
198
|
+
castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=p7fiXu5--BlY1FKnoW2CAQF7kPKjcN1tYf_SwvCZus4,1474
|
|
201
199
|
castor_extractor/visualization/looker_studio/extract.py,sha256=NU48xQ83UtRW3jXKJcvofzqgEM2lHGjtTzjbKOSB50A,4059
|
|
202
200
|
castor_extractor/visualization/looker_studio/extract_test.py,sha256=ZckAxUMuoEjJ9RWkfRvt9M8SxblkQvsq-Grb8GSs-y0,492
|
|
203
201
|
castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
|
|
@@ -420,18 +418,18 @@ castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsK
|
|
|
420
418
|
castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
|
|
421
419
|
castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
|
|
422
420
|
castor_extractor/warehouse/sqlserver/__init__.py,sha256=PdOuYznmvKAbfWAm8UdN47MfEsd9jqPi_dDi3WEo1KY,116
|
|
423
|
-
castor_extractor/warehouse/sqlserver/client.py,sha256=
|
|
424
|
-
castor_extractor/warehouse/sqlserver/extract.py,sha256
|
|
421
|
+
castor_extractor/warehouse/sqlserver/client.py,sha256=Bjfpw96IKAQfWPiU5SZYEDfetwfkqZrnKbQYoStcnZc,2007
|
|
422
|
+
castor_extractor/warehouse/sqlserver/extract.py,sha256=-LoHY5wAGJk4vutrO3N0_PaRqts7rkEn7pADRHzoxiI,2249
|
|
425
423
|
castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8eeWwOWkxTXvHyVKSVwhURktd8,48
|
|
426
|
-
castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=
|
|
427
|
-
castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=
|
|
428
|
-
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=
|
|
429
|
-
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=
|
|
424
|
+
castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=_K5OS63N7fM7kGPudnnjJEnIyaxR1xE2hoZgnJ_A3p8,2763
|
|
425
|
+
castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
|
|
426
|
+
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=UR3eTiYw7Iq5-GukelnNg_uq6haZ_dwg_SedZfOWUoA,619
|
|
427
|
+
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLxaBR0KuSwIBvb3GbQGdkJYXbn0,2787
|
|
430
428
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
431
|
-
castor_extractor/warehouse/sqlserver/query.py,sha256=
|
|
429
|
+
castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
|
|
432
430
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
433
|
-
castor_extractor-0.24.
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
436
|
-
castor_extractor-0.24.
|
|
437
|
-
castor_extractor-0.24.
|
|
431
|
+
castor_extractor-0.24.35.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
432
|
+
castor_extractor-0.24.35.dist-info/METADATA,sha256=-vrfKzS5B3r2qL7tjFjFBR-AizzuVIexEVJHCci7Z5s,26353
|
|
433
|
+
castor_extractor-0.24.35.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
434
|
+
castor_extractor-0.24.35.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
435
|
+
castor_extractor-0.24.35.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
NodeIDToNamesMapping = dict[str, set[str]]
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
from ....utils import SerializedAsset
|
|
2
|
-
from .type import NodeIDToNamesMapping
|
|
3
|
-
|
|
4
|
-
_NULL_SUFFIX = ": Null"
|
|
5
|
-
_UNIQUE_SUFFIX = ": Unique"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def is_test(
|
|
9
|
-
query_result: dict,
|
|
10
|
-
node_id: str,
|
|
11
|
-
test_names: NodeIDToNamesMapping,
|
|
12
|
-
column_names: NodeIDToNamesMapping,
|
|
13
|
-
) -> bool:
|
|
14
|
-
"""
|
|
15
|
-
checks whether a query result is a test result or not.
|
|
16
|
-
|
|
17
|
-
all this implementation can soon be replaced by checking whether
|
|
18
|
-
query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
|
|
19
|
-
"""
|
|
20
|
-
# test scoped on the node (table)
|
|
21
|
-
result_name = query_result["name"]
|
|
22
|
-
if result_name in test_names.get(node_id, {}):
|
|
23
|
-
return True
|
|
24
|
-
|
|
25
|
-
# test scoped on the column
|
|
26
|
-
if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
|
|
27
|
-
_UNIQUE_SUFFIX
|
|
28
|
-
):
|
|
29
|
-
column_name = result_name.split(":")[0]
|
|
30
|
-
if column_name in column_names.get(node_id, {}):
|
|
31
|
-
return True
|
|
32
|
-
return False
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
36
|
-
"""mapping nodeID: set(testName)"""
|
|
37
|
-
mapping: dict[str, set[str]] = {}
|
|
38
|
-
for node in nodes:
|
|
39
|
-
node_id = node["id"]
|
|
40
|
-
tests = node.get("metadata", {}).get("appliedNodeTests", [])
|
|
41
|
-
mapping[node_id] = {test["name"] for test in tests}
|
|
42
|
-
return mapping
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
|
|
46
|
-
"""mapping nodeID: set(columnNames)"""
|
|
47
|
-
mapping: dict[str, set[str]] = {}
|
|
48
|
-
for node in nodes:
|
|
49
|
-
node_id = node["id"]
|
|
50
|
-
columns = node.get("metadata", {}).get("columns", [])
|
|
51
|
-
mapping[node_id] = {column["name"] for column in columns}
|
|
52
|
-
return mapping
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
from .utils import is_test
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def test_is_test():
|
|
5
|
-
test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
|
|
6
|
-
column_names = {"some-uuid": {"carthago", "delenda", "est"}}
|
|
7
|
-
|
|
8
|
-
happy_node_test = is_test(
|
|
9
|
-
query_result={"name": "check-mirrors"},
|
|
10
|
-
node_id="some-uuid",
|
|
11
|
-
test_names=test_names,
|
|
12
|
-
column_names=column_names,
|
|
13
|
-
)
|
|
14
|
-
assert happy_node_test is True
|
|
15
|
-
|
|
16
|
-
unknown_node_test = is_test(
|
|
17
|
-
query_result={"name": "check-engine"},
|
|
18
|
-
node_id="some-uuid",
|
|
19
|
-
test_names=test_names,
|
|
20
|
-
column_names=column_names,
|
|
21
|
-
)
|
|
22
|
-
assert unknown_node_test is False
|
|
23
|
-
|
|
24
|
-
happy_column_test_unique = is_test(
|
|
25
|
-
query_result={"name": "carthago: Unique"},
|
|
26
|
-
node_id="some-uuid",
|
|
27
|
-
test_names=test_names,
|
|
28
|
-
column_names=column_names,
|
|
29
|
-
)
|
|
30
|
-
assert happy_column_test_unique is True
|
|
31
|
-
|
|
32
|
-
happy_column_test_null = is_test(
|
|
33
|
-
query_result={"name": "carthago: Null"},
|
|
34
|
-
node_id="some-uuid",
|
|
35
|
-
test_names=test_names,
|
|
36
|
-
column_names=column_names,
|
|
37
|
-
)
|
|
38
|
-
assert happy_column_test_null is True
|
|
39
|
-
|
|
40
|
-
unknown_column_test = is_test(
|
|
41
|
-
query_result={"name": "rome: Unique"},
|
|
42
|
-
node_id="some-uuid",
|
|
43
|
-
test_names=test_names,
|
|
44
|
-
column_names=column_names,
|
|
45
|
-
)
|
|
46
|
-
assert unknown_column_test is False
|
|
47
|
-
|
|
48
|
-
unknown_node_id_test = is_test(
|
|
49
|
-
query_result={"name": "whatever: Unique"},
|
|
50
|
-
node_id="unknown-uuid",
|
|
51
|
-
test_names=test_names,
|
|
52
|
-
column_names=column_names,
|
|
53
|
-
)
|
|
54
|
-
assert unknown_node_id_test is False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|