castor-extractor 0.24.49__py3-none-any.whl → 0.24.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +12 -0
- castor_extractor/visualization/powerbi/client/client.py +7 -1
- castor_extractor/visualization/powerbi/client/client_test.py +36 -4
- castor_extractor/visualization/sigma/client/authentication.py +68 -0
- castor_extractor/visualization/sigma/client/client.py +1 -21
- castor_extractor/warehouse/sqlserver/client.py +26 -26
- castor_extractor/warehouse/sqlserver/queries/query.sql +4 -4
- castor_extractor/warehouse/sqlserver/queries/schema.sql +2 -2
- castor_extractor/warehouse/sqlserver/queries/table.sql +6 -6
- castor_extractor/warehouse/sqlserver/queries/user.sql +2 -2
- castor_extractor/warehouse/sqlserver/queries/view_ddl.sql +3 -3
- castor_extractor/warehouse/sqlserver/query.py +10 -6
- {castor_extractor-0.24.49.dist-info → castor_extractor-0.24.52.dist-info}/METADATA +13 -1
- {castor_extractor-0.24.49.dist-info → castor_extractor-0.24.52.dist-info}/RECORD +17 -16
- {castor_extractor-0.24.49.dist-info → castor_extractor-0.24.52.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.49.dist-info → castor_extractor-0.24.52.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.49.dist-info → castor_extractor-0.24.52.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.52 - 2025-09-18
|
|
4
|
+
|
|
5
|
+
* SqlServer : improve extraction of users and technical owners
|
|
6
|
+
|
|
7
|
+
## 0.24.51 - 2025-09-17
|
|
8
|
+
|
|
9
|
+
* PowerBI : don't store sensitive data in activity events
|
|
10
|
+
|
|
11
|
+
## 0.24.50 - 2025-09-15
|
|
12
|
+
|
|
13
|
+
* SqlServer: support multiple databases in queries
|
|
14
|
+
|
|
3
15
|
## 0.24.49 - 2025-09-12
|
|
4
16
|
|
|
5
17
|
* Tableau: add option to bypass ssl certificate verification
|
|
@@ -33,6 +33,8 @@ POWERBI_SCAN_TIMEOUT_S = 60
|
|
|
33
33
|
MAX_RETRY_PAGES = 1
|
|
34
34
|
RETRY_PAGES_TIMEOUT_MS = 35 * 1000 # 35 seconds
|
|
35
35
|
|
|
36
|
+
KEYS_TO_HIDE = ("ClientIP", "UserAgent")
|
|
37
|
+
|
|
36
38
|
logger = logging.getLogger(__name__)
|
|
37
39
|
|
|
38
40
|
|
|
@@ -61,7 +63,11 @@ class PowerbiClient(APIClient):
|
|
|
61
63
|
self._get,
|
|
62
64
|
endpoint=self.endpoint_factory.activity_events(day),
|
|
63
65
|
)
|
|
64
|
-
|
|
66
|
+
for event in fetch_all_pages(request, PowerBiPagination):
|
|
67
|
+
for key in KEYS_TO_HIDE:
|
|
68
|
+
if key in event:
|
|
69
|
+
del event[key]
|
|
70
|
+
yield event
|
|
65
71
|
|
|
66
72
|
def _datasets(self) -> Iterator[dict]:
|
|
67
73
|
"""
|
|
@@ -142,17 +142,44 @@ def test__activity_events(power_bi_client):
|
|
|
142
142
|
day = date.today()
|
|
143
143
|
mocked_get_results = [
|
|
144
144
|
{
|
|
145
|
-
Keys.ACTIVITY_EVENT_ENTITIES: [
|
|
145
|
+
Keys.ACTIVITY_EVENT_ENTITIES: [
|
|
146
|
+
{
|
|
147
|
+
"id": "foo",
|
|
148
|
+
"name": "Foo",
|
|
149
|
+
"ClientIP": "1.1.1.1",
|
|
150
|
+
"UserAgent": "Mozilla/5.0",
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"id": "bar",
|
|
154
|
+
"name": "Bar",
|
|
155
|
+
"ClientIP": "1.1.1.2",
|
|
156
|
+
"UserAgent": "Mozilla/5.0",
|
|
157
|
+
},
|
|
158
|
+
],
|
|
146
159
|
Keys.LAST_RESULT_SET: False,
|
|
147
160
|
Keys.CONTINUATION_URI: "https://next-call-1",
|
|
148
161
|
},
|
|
149
162
|
{
|
|
150
|
-
Keys.ACTIVITY_EVENT_ENTITIES: [
|
|
163
|
+
Keys.ACTIVITY_EVENT_ENTITIES: [
|
|
164
|
+
{
|
|
165
|
+
"id": "baz",
|
|
166
|
+
"name": "Baz",
|
|
167
|
+
"ClientIP": "1.1.1.3",
|
|
168
|
+
"UserAgent": "Mozilla/5.0",
|
|
169
|
+
}
|
|
170
|
+
],
|
|
151
171
|
Keys.LAST_RESULT_SET: False,
|
|
152
172
|
Keys.CONTINUATION_URI: "https://next-call-2",
|
|
153
173
|
},
|
|
154
174
|
{
|
|
155
|
-
Keys.ACTIVITY_EVENT_ENTITIES: [
|
|
175
|
+
Keys.ACTIVITY_EVENT_ENTITIES: [
|
|
176
|
+
{
|
|
177
|
+
"id": "biz",
|
|
178
|
+
"name": "Biz",
|
|
179
|
+
"ClientIP": "1.1.1.4",
|
|
180
|
+
"UserAgent": "Mozilla/5.0",
|
|
181
|
+
}
|
|
182
|
+
],
|
|
156
183
|
Keys.LAST_RESULT_SET: True,
|
|
157
184
|
Keys.CONTINUATION_URI: None,
|
|
158
185
|
},
|
|
@@ -162,7 +189,12 @@ def test__activity_events(power_bi_client):
|
|
|
162
189
|
mocked_get.side_effect = mocked_get_results
|
|
163
190
|
|
|
164
191
|
result = list(power_bi_client._activity_events(day=day))
|
|
165
|
-
assert result == [
|
|
192
|
+
assert result == [
|
|
193
|
+
{"id": "foo", "name": "Foo"},
|
|
194
|
+
{"id": "bar", "name": "Bar"},
|
|
195
|
+
{"id": "baz", "name": "Baz"},
|
|
196
|
+
{"id": "biz", "name": "Biz"},
|
|
197
|
+
]
|
|
166
198
|
|
|
167
199
|
expected_calls = [
|
|
168
200
|
call(endpoint=ENDPOINT_FACTORY.activity_events(day=day)),
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from ....utils import (
|
|
9
|
+
BearerAuth,
|
|
10
|
+
build_url,
|
|
11
|
+
handle_response,
|
|
12
|
+
)
|
|
13
|
+
from .endpoints import (
|
|
14
|
+
SigmaEndpointFactory,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_AUTH_TIMEOUT_S = 60
|
|
21
|
+
_REFRESH_BUFFER_S = 300
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SigmaBearerAuth(BearerAuth):
|
|
25
|
+
def __init__(self, host: str, token_payload: dict[str, str]):
|
|
26
|
+
auth_endpoint = SigmaEndpointFactory.authentication()
|
|
27
|
+
self.authentication_url = build_url(host, auth_endpoint)
|
|
28
|
+
self.token_payload = token_payload
|
|
29
|
+
self._token_expires_at: Optional[float] = None
|
|
30
|
+
self._token_lock = threading.Lock()
|
|
31
|
+
|
|
32
|
+
def fetch_token(self) -> str:
|
|
33
|
+
"""Returns the token and sets its expiration time."""
|
|
34
|
+
token_api_path = self.authentication_url
|
|
35
|
+
token_response = requests.post(
|
|
36
|
+
token_api_path, data=self.token_payload, timeout=_AUTH_TIMEOUT_S
|
|
37
|
+
)
|
|
38
|
+
response_data = handle_response(token_response)
|
|
39
|
+
expires_in_seconds = int(response_data["expires_in"])
|
|
40
|
+
self._token_expires_at = time.time() + expires_in_seconds
|
|
41
|
+
return response_data["access_token"]
|
|
42
|
+
|
|
43
|
+
def _is_token_expired_or_expiring_soon(self) -> bool:
|
|
44
|
+
"""
|
|
45
|
+
Returns True if the token is expired or will expire soon (within buffer time)
|
|
46
|
+
"""
|
|
47
|
+
if self._token_expires_at is None:
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
return time.time() >= (self._token_expires_at - _REFRESH_BUFFER_S)
|
|
51
|
+
|
|
52
|
+
def _needs_refresh(self, force_refresh: bool = False) -> bool:
|
|
53
|
+
"""Returns True if the token needs to be refreshed."""
|
|
54
|
+
is_expired = self._is_token_expired_or_expiring_soon()
|
|
55
|
+
return not self._token or force_refresh or is_expired
|
|
56
|
+
|
|
57
|
+
def _fetch_token(self, force_refresh: bool = False) -> Optional[str]:
|
|
58
|
+
"""Returns the API token, refreshing it if needed (thread-safe)."""
|
|
59
|
+
if not self._needs_refresh(force_refresh):
|
|
60
|
+
return f"Bearer {self._token}"
|
|
61
|
+
|
|
62
|
+
with self._token_lock:
|
|
63
|
+
if not self._needs_refresh(force_refresh):
|
|
64
|
+
return f"Bearer {self._token}"
|
|
65
|
+
|
|
66
|
+
logger.info("Refreshing authentication token...")
|
|
67
|
+
self._token = self.fetch_token()
|
|
68
|
+
return f"Bearer {self._token}"
|
|
@@ -5,19 +5,16 @@ from functools import partial
|
|
|
5
5
|
from http import HTTPStatus
|
|
6
6
|
from typing import Callable, Iterable, Optional
|
|
7
7
|
|
|
8
|
-
import requests
|
|
9
8
|
from pydantic import BaseModel
|
|
10
9
|
|
|
11
10
|
from ....utils import (
|
|
12
11
|
APIClient,
|
|
13
|
-
BearerAuth,
|
|
14
12
|
RequestSafeMode,
|
|
15
|
-
build_url,
|
|
16
13
|
fetch_all_pages,
|
|
17
|
-
handle_response,
|
|
18
14
|
retry,
|
|
19
15
|
)
|
|
20
16
|
from ..assets import SigmaAsset
|
|
17
|
+
from .authentication import SigmaBearerAuth
|
|
21
18
|
from .credentials import SigmaCredentials
|
|
22
19
|
from .endpoints import SigmaEndpointFactory
|
|
23
20
|
from .pagination import (
|
|
@@ -39,7 +36,6 @@ _DATA_ELEMENTS: tuple[str, ...] = (
|
|
|
39
36
|
"viz",
|
|
40
37
|
)
|
|
41
38
|
|
|
42
|
-
_AUTH_TIMEOUT_S = 60
|
|
43
39
|
_SIGMA_TIMEOUT_S = 300
|
|
44
40
|
|
|
45
41
|
_SIGMA_HEADERS = {
|
|
@@ -85,20 +81,6 @@ class Lineage(BaseModel):
|
|
|
85
81
|
context: LineageContext
|
|
86
82
|
|
|
87
83
|
|
|
88
|
-
class SigmaBearerAuth(BearerAuth):
|
|
89
|
-
def __init__(self, host: str, token_payload: dict[str, str]):
|
|
90
|
-
auth_endpoint = SigmaEndpointFactory.authentication()
|
|
91
|
-
self.authentication_url = build_url(host, auth_endpoint)
|
|
92
|
-
self.token_payload = token_payload
|
|
93
|
-
|
|
94
|
-
def fetch_token(self):
|
|
95
|
-
token_api_path = self.authentication_url
|
|
96
|
-
token_response = requests.post(
|
|
97
|
-
token_api_path, data=self.token_payload, timeout=_AUTH_TIMEOUT_S
|
|
98
|
-
)
|
|
99
|
-
return handle_response(token_response)["access_token"]
|
|
100
|
-
|
|
101
|
-
|
|
102
84
|
class SigmaClient(APIClient):
|
|
103
85
|
def __init__(
|
|
104
86
|
self,
|
|
@@ -240,8 +222,6 @@ class SigmaClient(APIClient):
|
|
|
240
222
|
|
|
241
223
|
lineage_context = self._lineage_context(elements)
|
|
242
224
|
|
|
243
|
-
self._auth.refresh_token()
|
|
244
|
-
|
|
245
225
|
with ThreadPoolExecutor(max_workers=_THREADS_LINEAGE) as executor:
|
|
246
226
|
results = executor.map(self._get_lineage, lineage_context)
|
|
247
227
|
|
|
@@ -7,13 +7,20 @@ from ...utils import ExtractionQuery, SqlalchemyClient, uri_encode
|
|
|
7
7
|
|
|
8
8
|
logger = logging.getLogger(__name__)
|
|
9
9
|
|
|
10
|
-
SERVER_URI = "{user}:{password}@{host}:{port}
|
|
10
|
+
SERVER_URI = "{user}:{password}@{host}:{port}"
|
|
11
11
|
MSSQL_URI = f"mssql+pymssql://{SERVER_URI}"
|
|
12
12
|
DEFAULT_PORT = 1433
|
|
13
13
|
|
|
14
|
-
_KEYS = ("user", "password", "host", "port"
|
|
14
|
+
_KEYS = ("user", "password", "host", "port")
|
|
15
15
|
|
|
16
|
-
_SYSTEM_DATABASES = (
|
|
16
|
+
_SYSTEM_DATABASES = (
|
|
17
|
+
"DBAdmin",
|
|
18
|
+
"dbaTools",
|
|
19
|
+
"master",
|
|
20
|
+
"model",
|
|
21
|
+
"msdb",
|
|
22
|
+
"tempdb",
|
|
23
|
+
)
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
def _check_key(credentials: dict) -> None:
|
|
@@ -39,7 +46,6 @@ class MSSQLClient(SqlalchemyClient):
|
|
|
39
46
|
password=uri_encode(credentials["password"]),
|
|
40
47
|
host=credentials["host"],
|
|
41
48
|
port=credentials.get("port") or DEFAULT_PORT,
|
|
42
|
-
database=credentials["database"],
|
|
43
49
|
)
|
|
44
50
|
return uri
|
|
45
51
|
|
|
@@ -67,12 +73,6 @@ class MSSQLClient(SqlalchemyClient):
|
|
|
67
73
|
if row["name"] not in _SYSTEM_DATABASES
|
|
68
74
|
]
|
|
69
75
|
|
|
70
|
-
def _current_database(self) -> str:
|
|
71
|
-
result = self.execute(
|
|
72
|
-
ExtractionQuery("SELECT DB_NAME() AS database_name", {})
|
|
73
|
-
)
|
|
74
|
-
return next(result)["database_name"]
|
|
75
|
-
|
|
76
76
|
def _has_access(self, name: str, object_type: str, permission: str) -> bool:
|
|
77
77
|
query_text = f"""
|
|
78
78
|
SELECT
|
|
@@ -83,45 +83,45 @@ class MSSQLClient(SqlalchemyClient):
|
|
|
83
83
|
result = next(self.execute(query))
|
|
84
84
|
return result["has_permission"] == 1
|
|
85
85
|
|
|
86
|
-
def _has_table_read_access(self, table_name: str) -> bool:
|
|
86
|
+
def _has_table_read_access(self, database: str, table_name: str) -> bool:
|
|
87
87
|
"""
|
|
88
88
|
Check whether we have READ access to the given table
|
|
89
89
|
"""
|
|
90
90
|
return self._has_access(
|
|
91
|
-
name=table_name,
|
|
91
|
+
name=f"[{database}].{table_name}",
|
|
92
92
|
object_type="OBJECT",
|
|
93
93
|
permission="SELECT",
|
|
94
94
|
)
|
|
95
95
|
|
|
96
|
-
def _has_view_database_state(self) -> bool:
|
|
96
|
+
def _has_view_database_state(self, database: str) -> bool:
|
|
97
97
|
"""
|
|
98
98
|
Check whether we have VIEW DATABASE STATE permissions, which
|
|
99
99
|
is necessary to fetch data from the Query Store
|
|
100
100
|
"""
|
|
101
101
|
return self._has_access(
|
|
102
|
-
name=
|
|
102
|
+
name=database,
|
|
103
103
|
object_type="DATABASE",
|
|
104
104
|
permission="VIEW DATABASE STATE",
|
|
105
105
|
)
|
|
106
106
|
|
|
107
|
-
def _has_query_store(self) -> bool:
|
|
107
|
+
def _has_query_store(self, database: str) -> bool:
|
|
108
108
|
"""
|
|
109
|
-
Checks whether the Query Store is activated on this
|
|
109
|
+
Checks whether the Query Store is activated on this database.
|
|
110
110
|
This is required to extract the SQL queries history.
|
|
111
111
|
https://learn.microsoft.com/en-us/sql/relational-databases/performance/monitoring-performance-by-using-the-query-store?view=sql-server-ver17"""
|
|
112
|
-
sql = """
|
|
112
|
+
sql = f"""
|
|
113
113
|
SELECT
|
|
114
114
|
desired_state
|
|
115
115
|
FROM
|
|
116
|
-
sys.database_query_store_options
|
|
116
|
+
[{database}].sys.database_query_store_options
|
|
117
117
|
"""
|
|
118
118
|
query = ExtractionQuery(sql, {})
|
|
119
119
|
# 2 = READ_WRITE, which means the Query Store is activated
|
|
120
120
|
return next(self.execute(query))["desired_state"] == 2
|
|
121
121
|
|
|
122
|
-
def has_queries_permissions(self) -> bool:
|
|
122
|
+
def has_queries_permissions(self, database: str) -> bool:
|
|
123
123
|
"""
|
|
124
|
-
Verify that we
|
|
124
|
+
Verify that we have the required permissions to extract
|
|
125
125
|
query history and view object definitions (DDL).
|
|
126
126
|
|
|
127
127
|
This check ensures:
|
|
@@ -144,16 +144,16 @@ class MSSQLClient(SqlalchemyClient):
|
|
|
144
144
|
|
|
145
145
|
has_permissions = True
|
|
146
146
|
for table in tables:
|
|
147
|
-
if not self._has_table_read_access(table):
|
|
148
|
-
logger.info(f"Missing READ
|
|
147
|
+
if not self._has_table_read_access(database, table):
|
|
148
|
+
logger.info(f"Missing READ permission on {database}.{table}")
|
|
149
149
|
has_permissions = False
|
|
150
150
|
|
|
151
|
-
if not self._has_view_database_state():
|
|
152
|
-
logger.info("Missing
|
|
151
|
+
if not self._has_view_database_state(database):
|
|
152
|
+
logger.info(f"Missing VIEW DATABASE STATE in database {database}")
|
|
153
153
|
has_permissions = False
|
|
154
154
|
|
|
155
|
-
if not self._has_query_store():
|
|
156
|
-
logger.info("
|
|
155
|
+
if not self._has_query_store(database):
|
|
156
|
+
logger.info(f"Query Store is not activated in database {database}")
|
|
157
157
|
has_permissions = False
|
|
158
158
|
|
|
159
159
|
return has_permissions
|
|
@@ -10,15 +10,15 @@ SELECT
|
|
|
10
10
|
DATEADD(MICROSECOND, last_duration % 1000000, rs.last_execution_time)
|
|
11
11
|
) AS end_time
|
|
12
12
|
FROM
|
|
13
|
-
sys.query_store_runtime_stats AS rs
|
|
13
|
+
[{database}].sys.query_store_runtime_stats AS rs
|
|
14
14
|
INNER JOIN
|
|
15
|
-
sys.query_store_plan p
|
|
15
|
+
[{database}].sys.query_store_plan p
|
|
16
16
|
ON rs.plan_id = p.plan_id
|
|
17
17
|
INNER JOIN
|
|
18
|
-
sys.query_store_query q
|
|
18
|
+
[{database}].sys.query_store_query q
|
|
19
19
|
ON p.query_id = q.query_id
|
|
20
20
|
INNER JOIN
|
|
21
|
-
sys.query_store_query_text qt
|
|
21
|
+
[{database}].sys.query_store_query_text qt
|
|
22
22
|
ON q.query_text_id = qt.query_text_id
|
|
23
23
|
WHERE
|
|
24
24
|
CAST(rs.last_execution_time AS DATE) = :day
|
|
@@ -21,7 +21,7 @@ SELECT
|
|
|
21
21
|
FROM [{database}].sys.schemas AS s
|
|
22
22
|
INNER JOIN ids AS i
|
|
23
23
|
ON s.name = i.table_schema
|
|
24
|
-
LEFT JOIN [{database}].sys.
|
|
25
|
-
ON s.principal_id = u.
|
|
24
|
+
LEFT JOIN [{database}].sys.database_principals AS u
|
|
25
|
+
ON s.principal_id = u.principal_id
|
|
26
26
|
LEFT JOIN [{database}].sys.databases AS d
|
|
27
27
|
ON i.table_catalog COLLATE DATABASE_DEFAULT = d.name COLLATE DATABASE_DEFAULT
|
|
@@ -8,7 +8,7 @@ WITH extended_tables AS (
|
|
|
8
8
|
SELECT
|
|
9
9
|
table_id = object_id,
|
|
10
10
|
table_name = name,
|
|
11
|
-
|
|
11
|
+
principal_id,
|
|
12
12
|
schema_id
|
|
13
13
|
FROM
|
|
14
14
|
[{database}].sys.tables
|
|
@@ -18,7 +18,7 @@ WITH extended_tables AS (
|
|
|
18
18
|
SELECT
|
|
19
19
|
table_id = object_id,
|
|
20
20
|
table_name = name,
|
|
21
|
-
|
|
21
|
+
principal_id,
|
|
22
22
|
schema_id
|
|
23
23
|
FROM
|
|
24
24
|
[{database}].sys.views
|
|
@@ -28,7 +28,7 @@ WITH extended_tables AS (
|
|
|
28
28
|
SELECT
|
|
29
29
|
table_id = object_id,
|
|
30
30
|
table_name = name,
|
|
31
|
-
|
|
31
|
+
principal_id,
|
|
32
32
|
schema_id
|
|
33
33
|
FROM
|
|
34
34
|
[{database}].sys.external_tables
|
|
@@ -65,15 +65,15 @@ table_ids AS (
|
|
|
65
65
|
table_name,
|
|
66
66
|
schema_name = ss.name,
|
|
67
67
|
schema_id = ss.schema_id,
|
|
68
|
-
table_owner_id,
|
|
68
|
+
table_owner_id = u.name,
|
|
69
69
|
table_owner = u.name,
|
|
70
70
|
row_count,
|
|
71
71
|
comment = CONVERT(varchar(1024), ep.value)
|
|
72
72
|
FROM extended_tables_with_row_count AS et
|
|
73
73
|
LEFT JOIN [{database}].sys.schemas AS ss
|
|
74
74
|
ON et.schema_id = ss.schema_id
|
|
75
|
-
LEFT JOIN [{database}].sys.
|
|
76
|
-
ON et.
|
|
75
|
+
LEFT JOIN [{database}].sys.database_principals AS u
|
|
76
|
+
ON et.principal_id = u.principal_id
|
|
77
77
|
LEFT JOIN [{database}].sys.extended_properties AS ep
|
|
78
78
|
ON (
|
|
79
79
|
et.table_id = ep.major_id
|
|
@@ -4,10 +4,10 @@ SELECT
|
|
|
4
4
|
s.name AS schema_name,
|
|
5
5
|
DB_NAME() AS database_name
|
|
6
6
|
FROM
|
|
7
|
-
sys.views v
|
|
7
|
+
[{database}].sys.views v
|
|
8
8
|
INNER JOIN
|
|
9
|
-
sys.schemas s
|
|
9
|
+
[{database}].sys.schemas s
|
|
10
10
|
ON v.schema_id = s.schema_id
|
|
11
11
|
INNER JOIN
|
|
12
|
-
sys.sql_modules m
|
|
12
|
+
[{database}].sys.sql_modules m
|
|
13
13
|
ON v.object_id = m.object_id
|
|
@@ -16,10 +16,8 @@ _NO_DATABASE_ERROR_MSG = (
|
|
|
16
16
|
"If you are using the db_allow/db_block options, please make sure to use the correct case."
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
WarehouseAsset.
|
|
21
|
-
WarehouseAsset.TABLE,
|
|
22
|
-
WarehouseAsset.COLUMN,
|
|
19
|
+
_SQLSERVER_DUPLICATED_ASSETS: tuple[WarehouseAsset, ...] = (
|
|
20
|
+
WarehouseAsset.USER,
|
|
23
21
|
)
|
|
24
22
|
|
|
25
23
|
|
|
@@ -33,7 +31,10 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
|
|
|
33
31
|
databases: list[str],
|
|
34
32
|
time_filter: Optional[TimeFilter] = None,
|
|
35
33
|
):
|
|
36
|
-
super().__init__(
|
|
34
|
+
super().__init__(
|
|
35
|
+
time_filter=time_filter,
|
|
36
|
+
duplicated=_SQLSERVER_DUPLICATED_ASSETS,
|
|
37
|
+
)
|
|
37
38
|
if not databases:
|
|
38
39
|
raise ValueError(_NO_DATABASE_ERROR_MSG)
|
|
39
40
|
self._databases = databases
|
|
@@ -48,7 +49,10 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
|
|
|
48
49
|
def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
|
|
49
50
|
query = self.build_default(asset)
|
|
50
51
|
|
|
51
|
-
if asset
|
|
52
|
+
if asset == WarehouseAsset.DATABASE:
|
|
53
|
+
# database.sql does not include a {database} placeholder.
|
|
54
|
+
# Indeed, databases are extracted at the server level
|
|
55
|
+
# (not scoped to a specific database).
|
|
52
56
|
return [query]
|
|
53
57
|
|
|
54
58
|
logger.info(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.52
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,18 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.52 - 2025-09-18
|
|
219
|
+
|
|
220
|
+
* SqlServer : improve extraction of users and technical owners
|
|
221
|
+
|
|
222
|
+
## 0.24.51 - 2025-09-17
|
|
223
|
+
|
|
224
|
+
* PowerBI : don't store sensitive data in activity events
|
|
225
|
+
|
|
226
|
+
## 0.24.50 - 2025-09-15
|
|
227
|
+
|
|
228
|
+
* SqlServer: support multiple databases in queries
|
|
229
|
+
|
|
218
230
|
## 0.24.49 - 2025-09-12
|
|
219
231
|
|
|
220
232
|
* Tableau: add option to bypass ssl certificate verification
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=ruS47cNmG5EMJYGYtkGGyhh7A5NgNz4TxzS8h0lP_Co,20477
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -238,8 +238,8 @@ castor_extractor/visualization/powerbi/__init__.py,sha256=hoZ73ngLhMc9edqxO9PUIE
|
|
|
238
238
|
castor_extractor/visualization/powerbi/assets.py,sha256=IB_XKwgdN1pZYGZ4RfeHrLjflianTzWf_6tg-4CIwu0,742
|
|
239
239
|
castor_extractor/visualization/powerbi/client/__init__.py,sha256=UPIhMaCCdNxhiLdkItC0IPFE_AMi-SgqI_ahwjB9utI,151
|
|
240
240
|
castor_extractor/visualization/powerbi/client/authentication.py,sha256=cTohunKr1nUDfvxB0sejJSyfE2BdCtwT1WMPecWlbyU,1045
|
|
241
|
-
castor_extractor/visualization/powerbi/client/client.py,sha256=
|
|
242
|
-
castor_extractor/visualization/powerbi/client/client_test.py,sha256=
|
|
241
|
+
castor_extractor/visualization/powerbi/client/client.py,sha256=Q_WHYGFpHT4wJ6nZvJa96nBVcpUGv7E2WnyZHBftsJM,8340
|
|
242
|
+
castor_extractor/visualization/powerbi/client/client_test.py,sha256=zWgfc8fOHSRn3hxiX8ujJysmNHeypIoKin9h8_h178k,6668
|
|
243
243
|
castor_extractor/visualization/powerbi/client/constants.py,sha256=88R_aGachNNUZh6OSH2fkDwZtY4KTStzKm_g7HNCqqo,387
|
|
244
244
|
castor_extractor/visualization/powerbi/client/credentials.py,sha256=OVWdhZSNODzTdLysY-sbpBZ3uUkLokeayQZnbJAqt2I,1386
|
|
245
245
|
castor_extractor/visualization/powerbi/client/credentials_test.py,sha256=TzFqxsWVQ3sXR_n0bJsexK9Uz7ceXCEPVqDGWTJzW60,993
|
|
@@ -272,7 +272,8 @@ castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLG
|
|
|
272
272
|
castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
|
|
273
273
|
castor_extractor/visualization/sigma/assets.py,sha256=uKGKDaeY1ejc7XGh4eFaNp2ygG7hgca132xsX4eCwKQ,380
|
|
274
274
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
|
|
275
|
-
castor_extractor/visualization/sigma/client/
|
|
275
|
+
castor_extractor/visualization/sigma/client/authentication.py,sha256=gHukrpfboIjZc_O9CcuDtrl6U-StH0J73VY2J74Bm9o,2279
|
|
276
|
+
castor_extractor/visualization/sigma/client/client.py,sha256=De0xWJfUssfrwzyMNh8D2IIouUQzcS0qLUQrUYtjVkY,10827
|
|
276
277
|
castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
|
|
277
278
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
278
279
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=i7KTKnl2Os6752CdtJl0vPSC_Z6JxmacodV_saOnce0,1662
|
|
@@ -421,20 +422,20 @@ castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsK
|
|
|
421
422
|
castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
|
|
422
423
|
castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
|
|
423
424
|
castor_extractor/warehouse/sqlserver/__init__.py,sha256=PdOuYznmvKAbfWAm8UdN47MfEsd9jqPi_dDi3WEo1KY,116
|
|
424
|
-
castor_extractor/warehouse/sqlserver/client.py,sha256=
|
|
425
|
+
castor_extractor/warehouse/sqlserver/client.py,sha256=_rSJOCkp2OkNXQr-4jNLC2_lGJSxw1-qu2L3eaQrpN8,5048
|
|
425
426
|
castor_extractor/warehouse/sqlserver/extract.py,sha256=HEJFDM1a4OeQH7OWhYhCOjhkHfGW6qf_qvjFAeMGPYg,2623
|
|
426
427
|
castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8eeWwOWkxTXvHyVKSVwhURktd8,48
|
|
427
428
|
castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=ojiUQQnHXdWMbgaYOcxKBiwfi7rtu_tyamK6r4t4IBM,2929
|
|
428
429
|
castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
|
|
429
|
-
castor_extractor/warehouse/sqlserver/queries/query.sql,sha256=
|
|
430
|
-
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=
|
|
431
|
-
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=
|
|
432
|
-
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=
|
|
433
|
-
castor_extractor/warehouse/sqlserver/queries/view_ddl.sql,sha256=
|
|
434
|
-
castor_extractor/warehouse/sqlserver/query.py,sha256=
|
|
430
|
+
castor_extractor/warehouse/sqlserver/queries/query.sql,sha256=bkENw7QovlG4MyYe5q3XNPs3ajUr_3bNzpbm0Y2upYo,821
|
|
431
|
+
castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=jUnZ10kIZk44nKQ5KiyjZ0YFdypYQj__OlPDRq71EAw,909
|
|
432
|
+
castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=ggzatJOlOfGkMG1NS-hD-n1-3WLbV9Yh8IsQrEFO5X4,2831
|
|
433
|
+
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=MAlnTis43E3Amu1e1Oz_qhaX8Bz-iN0Lrbf9RiohX7Y,99
|
|
434
|
+
castor_extractor/warehouse/sqlserver/queries/view_ddl.sql,sha256=9rynvx6MWg3iZzrWPB7haZfVKEPkxulzryE2g19x804,315
|
|
435
|
+
castor_extractor/warehouse/sqlserver/query.py,sha256=c8f7_SEMR17DhbtzuYphWqWDQ0sCRy-nR442RRBZVYw,1773
|
|
435
436
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
436
|
-
castor_extractor-0.24.
|
|
437
|
-
castor_extractor-0.24.
|
|
438
|
-
castor_extractor-0.24.
|
|
439
|
-
castor_extractor-0.24.
|
|
440
|
-
castor_extractor-0.24.
|
|
437
|
+
castor_extractor-0.24.52.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
438
|
+
castor_extractor-0.24.52.dist-info/METADATA,sha256=0wd_HdsZnM75f8hXza9FNIvjipmHGUDHOe5yjIYX1Ig,27930
|
|
439
|
+
castor_extractor-0.24.52.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
440
|
+
castor_extractor-0.24.52.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
441
|
+
castor_extractor-0.24.52.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|