castor-extractor 0.24.29__py3-none-any.whl → 0.24.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +12 -0
- castor_extractor/commands/extract_looker_studio.py +8 -0
- castor_extractor/visualization/looker_studio/client/client.py +23 -6
- castor_extractor/visualization/looker_studio/extract.py +32 -0
- castor_extractor/visualization/looker_studio/extract_test.py +19 -0
- castor_extractor/visualization/salesforce_reporting/assets.py +1 -0
- castor_extractor/visualization/salesforce_reporting/client/rest.py +52 -0
- castor_extractor/visualization/sigma/client/client.py +47 -7
- castor_extractor/visualization/sigma/client/client_test.py +19 -0
- castor_extractor/visualization/sigma/client/pagination.py +1 -0
- castor_extractor/warehouse/databricks/sql_client.py +14 -12
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.32.dist-info}/METADATA +13 -1
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.32.dist-info}/RECORD +16 -14
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.32.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.32.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.29.dist-info → castor_extractor-0.24.32.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.32 - 2025-07-02
|
|
4
|
+
|
|
5
|
+
* Salesforce reporting - extract report's metadata
|
|
6
|
+
*
|
|
7
|
+
## 0.24.31 - 2025-07-02
|
|
8
|
+
|
|
9
|
+
* Looker Studio: add option to list users via a provided JSON file
|
|
10
|
+
|
|
11
|
+
## 0.24.30 - 2025-06-26
|
|
12
|
+
|
|
13
|
+
* Sigma: remove retry on timeout, decrease pagination for queries
|
|
14
|
+
|
|
3
15
|
## 0.24.29 - 2025-06-24
|
|
4
16
|
|
|
5
17
|
* Strategy: skip descriptions on ValueErrors
|
|
@@ -30,6 +30,14 @@ def main():
|
|
|
30
30
|
default=False,
|
|
31
31
|
help="Skips the extraction of activity logs",
|
|
32
32
|
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--users-file-path",
|
|
35
|
+
help=(
|
|
36
|
+
"Optional path to a JSON file with user email addresses "
|
|
37
|
+
'as a list of strings (e.g. ["foo@bar.com", "fee@bar.com"]). '
|
|
38
|
+
"If provided, only extracts assets owned by the specified users."
|
|
39
|
+
),
|
|
40
|
+
)
|
|
33
41
|
|
|
34
42
|
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
35
43
|
|
|
@@ -36,23 +36,40 @@ class LookerStudioClient:
|
|
|
36
36
|
self,
|
|
37
37
|
credentials: LookerStudioCredentials,
|
|
38
38
|
bigquery_credentials: Optional[dict] = None,
|
|
39
|
+
user_emails: Optional[list[str]] = None,
|
|
39
40
|
):
|
|
40
41
|
self.admin_sdk_client = AdminSDKClient(credentials)
|
|
41
42
|
self.looker_studio_client = LookerStudioAPIClient(credentials)
|
|
43
|
+
self.user_emails = user_emails
|
|
42
44
|
|
|
43
45
|
self.bigquery_client: Optional[BigQueryClient] = None
|
|
44
46
|
if bigquery_credentials:
|
|
45
47
|
self.bigquery_client = BigQueryClient(bigquery_credentials)
|
|
46
48
|
|
|
47
|
-
def
|
|
49
|
+
def _list_user_emails(self) -> Iterator[str]:
|
|
48
50
|
"""
|
|
49
|
-
|
|
51
|
+
Lists user emails either from a provided JSON file or via the Admin SDK API.
|
|
52
|
+
|
|
53
|
+
Using all Google Workspace users can be inefficient for large clients -
|
|
54
|
+
the client might spend hours checking thousands of users for Looker Studio
|
|
55
|
+
assets when only a handful actually own any. A JSON file allows
|
|
56
|
+
targeting known owners instead.
|
|
50
57
|
"""
|
|
51
|
-
|
|
58
|
+
if self.user_emails is not None:
|
|
59
|
+
yield from self.user_emails
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
for user in self.admin_sdk_client.list_users():
|
|
63
|
+
yield user[USER_EMAIL_FIELD]
|
|
52
64
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
65
|
+
def _get_assets(self) -> Iterator[dict]:
|
|
66
|
+
"""
|
|
67
|
+
Extracts reports and data sources user by user. The loop is necessary
|
|
68
|
+
because the Looker Studio API can only retrieve the assets owned by a
|
|
69
|
+
single user.
|
|
70
|
+
"""
|
|
71
|
+
for user_email in self._list_user_emails():
|
|
72
|
+
yield from self.looker_studio_client.fetch_user_assets(user_email)
|
|
56
73
|
|
|
57
74
|
def _get_source_queries(self) -> Iterator[dict]:
|
|
58
75
|
"""
|
|
@@ -70,21 +70,53 @@ def _bigquery_credentials_or_none(params: dict) -> Optional[dict]:
|
|
|
70
70
|
return cast(dict, json.load(file))
|
|
71
71
|
|
|
72
72
|
|
|
73
|
+
def _validate_user_emails(user_emails: list[str]):
|
|
74
|
+
"""
|
|
75
|
+
Raises an error if the user emails are not in the expected format (list of strings),
|
|
76
|
+
or if the list is empty.
|
|
77
|
+
"""
|
|
78
|
+
if not isinstance(user_emails, list):
|
|
79
|
+
raise TypeError("The users file must be a list")
|
|
80
|
+
|
|
81
|
+
if len(user_emails) == 0:
|
|
82
|
+
raise ValueError("The users file must contain at least one user email")
|
|
83
|
+
|
|
84
|
+
if not all(isinstance(email, str) for email in user_emails):
|
|
85
|
+
raise TypeError("All items in users list must be strings")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _read_optional_user_emails(
|
|
89
|
+
users_file_path: Optional[str],
|
|
90
|
+
) -> Optional[list[str]]:
|
|
91
|
+
"""Loads the user emails from a file, if it was provided."""
|
|
92
|
+
if not users_file_path:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
with open(users_file_path, "r") as file:
|
|
96
|
+
user_emails = json.load(file)
|
|
97
|
+
|
|
98
|
+
_validate_user_emails(user_emails)
|
|
99
|
+
return user_emails
|
|
100
|
+
|
|
101
|
+
|
|
73
102
|
def extract_all(**kwargs) -> None:
|
|
74
103
|
"""
|
|
75
104
|
Extracts data from Looker Studio and stores the output files locally under
|
|
76
105
|
the given output_directory.
|
|
77
106
|
"""
|
|
107
|
+
users_file_path = kwargs.get("users_file_path")
|
|
78
108
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
79
109
|
|
|
80
110
|
credentials = _credentials(kwargs)
|
|
81
111
|
has_view_activity_logs = bool(credentials.has_view_activity_logs)
|
|
112
|
+
user_emails = _read_optional_user_emails(users_file_path)
|
|
82
113
|
|
|
83
114
|
bigquery_credentials = _bigquery_credentials_or_none(kwargs)
|
|
84
115
|
|
|
85
116
|
client = LookerStudioClient(
|
|
86
117
|
credentials=credentials,
|
|
87
118
|
bigquery_credentials=bigquery_credentials,
|
|
119
|
+
user_emails=user_emails,
|
|
88
120
|
)
|
|
89
121
|
ts = current_timestamp()
|
|
90
122
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .extract import _validate_user_emails
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test__validate_user_emails():
|
|
7
|
+
with pytest.raises(TypeError):
|
|
8
|
+
_validate_user_emails("toto@tata.com")
|
|
9
|
+
|
|
10
|
+
with pytest.raises(TypeError):
|
|
11
|
+
_validate_user_emails({"not": "the", "right": "format"})
|
|
12
|
+
|
|
13
|
+
with pytest.raises(ValueError):
|
|
14
|
+
_validate_user_emails([])
|
|
15
|
+
|
|
16
|
+
with pytest.raises(TypeError):
|
|
17
|
+
_validate_user_emails([1, 2, 3, 4])
|
|
18
|
+
|
|
19
|
+
_validate_user_emails(["admin@toto.com", "tata@toto.com"])
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections.abc import Iterator
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
6
|
+
import requests
|
|
7
|
+
|
|
5
8
|
from ....utils import build_url
|
|
6
9
|
from ....utils.salesforce import SalesforceBaseClient
|
|
7
10
|
from ..assets import SalesforceReportingAsset
|
|
@@ -15,6 +18,8 @@ REQUIRING_URL_ASSETS = (
|
|
|
15
18
|
SalesforceReportingAsset.FOLDERS,
|
|
16
19
|
)
|
|
17
20
|
|
|
21
|
+
_CONCURRENT_THREADS = 50
|
|
22
|
+
|
|
18
23
|
|
|
19
24
|
class SalesforceReportingClient(SalesforceBaseClient):
|
|
20
25
|
"""
|
|
@@ -50,6 +55,50 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
50
55
|
url = self._get_asset_url(asset_type, asset)
|
|
51
56
|
yield {**asset, "Url": url}
|
|
52
57
|
|
|
58
|
+
def _metadata(self, report_id: str) -> Optional[dict]:
|
|
59
|
+
url = f"services/data/v60.0/analytics/reports/{report_id}/describe"
|
|
60
|
+
try:
|
|
61
|
+
metadata = self._get(url, retry_on_timeout=False)
|
|
62
|
+
# pick only what we need to build the lineage
|
|
63
|
+
columns = metadata["reportExtendedMetadata"]["detailColumnInfo"]
|
|
64
|
+
return {
|
|
65
|
+
"reportId": report_id,
|
|
66
|
+
"detailColumnInfo": columns or dict(),
|
|
67
|
+
}
|
|
68
|
+
except (requests.HTTPError, requests.RequestException) as ex:
|
|
69
|
+
# Extracting column metadata is used only for lineage purposes
|
|
70
|
+
# and is non-critical. API errors are common during this step,
|
|
71
|
+
# so we choose to skip them rather than fail the process. The same
|
|
72
|
+
# rows consistently fail, and retries have proven ineffective.
|
|
73
|
+
logger.info(ex)
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def _fetch_reports_metadata(self) -> Iterator[dict]:
|
|
77
|
+
"""
|
|
78
|
+
Use the "describe" endpoint to extract report metadata.
|
|
79
|
+
Keep only the detailColumnInfo, which is required for building the lineage.
|
|
80
|
+
|
|
81
|
+
More info here:
|
|
82
|
+
https://developer.salesforce.com/docs/atlas.en-us.api_analytics.meta/api_analytics/sforce_analytics_rest_api_getbasic_reportmetadata.htm
|
|
83
|
+
https://www.notion.so/castordoc/Salesforce-Lineage-216a1c3d458580859888cf4ca2d7fa51?source=copy_link
|
|
84
|
+
"""
|
|
85
|
+
# The "describe" endpoint requires report_ids. To avoid introducing
|
|
86
|
+
# task dependencies, we opted to re-extract the reports.
|
|
87
|
+
# It is fast anyway, since it's running a SQL query
|
|
88
|
+
reports = self.fetch(SalesforceReportingAsset.REPORTS)
|
|
89
|
+
report_ids = [report["Id"] for report in reports]
|
|
90
|
+
|
|
91
|
+
# Calling "describe" on each report individually can be slow,
|
|
92
|
+
# especially for accounts with thousands of reports. That's why
|
|
93
|
+
# we use multithreading here — it significantly improves performance.
|
|
94
|
+
with ThreadPoolExecutor(max_workers=_CONCURRENT_THREADS) as executor:
|
|
95
|
+
fetch_results = executor.map(self._metadata, report_ids)
|
|
96
|
+
|
|
97
|
+
for metadata in fetch_results:
|
|
98
|
+
if not metadata:
|
|
99
|
+
continue
|
|
100
|
+
yield metadata
|
|
101
|
+
|
|
53
102
|
def fetch(self, asset: SalesforceReportingAsset) -> list[dict]:
|
|
54
103
|
"""
|
|
55
104
|
Fetch Salesforce Reporting assets
|
|
@@ -59,4 +108,7 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
59
108
|
if asset in REQUIRING_URL_ASSETS:
|
|
60
109
|
return list(self._fetch_and_add_url(asset))
|
|
61
110
|
|
|
111
|
+
if asset == SalesforceReportingAsset.REPORTS_METADATA:
|
|
112
|
+
return list(self._fetch_reports_metadata())
|
|
113
|
+
|
|
62
114
|
return list(self._query_all(queries[asset]))
|
|
@@ -2,7 +2,7 @@ from collections.abc import Iterator
|
|
|
2
2
|
from concurrent.futures import ThreadPoolExecutor
|
|
3
3
|
from functools import partial
|
|
4
4
|
from http import HTTPStatus
|
|
5
|
-
from typing import Callable, Optional
|
|
5
|
+
from typing import Callable, Iterable, Optional
|
|
6
6
|
|
|
7
7
|
import requests
|
|
8
8
|
from pydantic import BaseModel
|
|
@@ -19,7 +19,11 @@ from ....utils import (
|
|
|
19
19
|
from ..assets import SigmaAsset
|
|
20
20
|
from .credentials import SigmaCredentials
|
|
21
21
|
from .endpoints import SigmaEndpointFactory
|
|
22
|
-
from .pagination import
|
|
22
|
+
from .pagination import (
|
|
23
|
+
SIGMA_API_LIMIT,
|
|
24
|
+
SIGMA_QUERIES_PAGINATION_LIMIT,
|
|
25
|
+
SigmaPagination,
|
|
26
|
+
)
|
|
23
27
|
|
|
24
28
|
_CONTENT_TYPE = "application/x-www-form-urlencoded"
|
|
25
29
|
|
|
@@ -101,9 +105,27 @@ class SigmaClient(APIClient):
|
|
|
101
105
|
safe_mode=safe_mode or SIGMA_SAFE_MODE,
|
|
102
106
|
)
|
|
103
107
|
|
|
104
|
-
def _get_paginated(
|
|
108
|
+
def _get_paginated(
|
|
109
|
+
self,
|
|
110
|
+
endpoint: str,
|
|
111
|
+
limit: int = SIGMA_API_LIMIT,
|
|
112
|
+
) -> Callable:
|
|
113
|
+
"""
|
|
114
|
+
Sigma’s API does not experience random timeouts, unlike some other APIs.
|
|
115
|
+
However, extracting queries from certain workbooks can take a
|
|
116
|
+
significant amount of time.
|
|
117
|
+
Previously, when a timeout occurred, the system would retry multiple
|
|
118
|
+
times — even though we knew it would eventually fail due to the inherent
|
|
119
|
+
slowness of the operation.
|
|
120
|
+
These retries only delayed the inevitable failure without adding value.
|
|
121
|
+
To address this, we've disabled retries on timeout and instead adjusted
|
|
122
|
+
the page size when extracting queries.
|
|
123
|
+
"""
|
|
105
124
|
return partial(
|
|
106
|
-
self._get,
|
|
125
|
+
self._get,
|
|
126
|
+
retry_on_timeout=False, # explained in the docstring
|
|
127
|
+
endpoint=endpoint,
|
|
128
|
+
params={"limit": limit},
|
|
107
129
|
)
|
|
108
130
|
|
|
109
131
|
def _get_all_datasets(self) -> Iterator[dict]:
|
|
@@ -200,16 +222,34 @@ class SigmaClient(APIClient):
|
|
|
200
222
|
"element_id": lineage.context.element_id,
|
|
201
223
|
}
|
|
202
224
|
|
|
225
|
+
@staticmethod
|
|
226
|
+
def _yield_deduplicated_queries(
|
|
227
|
+
queries: Iterable[dict], workbook_id: str
|
|
228
|
+
) -> Iterator[dict]:
|
|
229
|
+
"""
|
|
230
|
+
Returns unique queries for a workbook. This is necessary because the API
|
|
231
|
+
unfortunately returns duplicate entries for some workbook elements.
|
|
232
|
+
"""
|
|
233
|
+
seen_elements = set()
|
|
234
|
+
|
|
235
|
+
for query in queries:
|
|
236
|
+
element_id = query["elementId"]
|
|
237
|
+
if element_id in seen_elements:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
seen_elements.add(element_id)
|
|
241
|
+
yield {**query, "workbook_id": workbook_id}
|
|
242
|
+
|
|
203
243
|
def _get_all_queries(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
204
244
|
for workbook in workbooks:
|
|
205
245
|
workbook_id = workbook["workbookId"]
|
|
206
246
|
request = self._get_paginated(
|
|
207
|
-
SigmaEndpointFactory.queries(workbook_id)
|
|
247
|
+
SigmaEndpointFactory.queries(workbook_id),
|
|
248
|
+
limit=SIGMA_QUERIES_PAGINATION_LIMIT,
|
|
208
249
|
)
|
|
209
250
|
queries = fetch_all_pages(request, SigmaPagination)
|
|
210
251
|
|
|
211
|
-
|
|
212
|
-
yield {**query, "workbook_id": workbook_id}
|
|
252
|
+
yield from self._yield_deduplicated_queries(queries, workbook_id)
|
|
213
253
|
|
|
214
254
|
def fetch(
|
|
215
255
|
self,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .client import SigmaClient
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_SigmaClient__yield_deduplicated_queries():
|
|
5
|
+
workbook_id = "workbook1"
|
|
6
|
+
mock_queries = [
|
|
7
|
+
{"elementId": "element1", "name": "Query 1"},
|
|
8
|
+
{"elementId": "element2", "name": "Query 2"},
|
|
9
|
+
{"elementId": "element1", "name": "Query 1"}, # Duplicate
|
|
10
|
+
{"elementId": "element3", "name": "Query 3"},
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
queries = list(
|
|
14
|
+
SigmaClient._yield_deduplicated_queries(mock_queries, workbook_id)
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
assert len(queries) == 3
|
|
18
|
+
for query in queries:
|
|
19
|
+
assert query["workbook_id"] == workbook_id
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from datetime import date
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
from databricks import sql # type: ignore
|
|
7
6
|
|
|
@@ -17,7 +16,7 @@ _INFORMATION_SCHEMA_SQL = "SELECT * FROM system.information_schema"
|
|
|
17
16
|
|
|
18
17
|
_LINEAGE_SQL_TPL = """
|
|
19
18
|
SELECT * FROM system.access.{table_name}
|
|
20
|
-
WHERE event_date =
|
|
19
|
+
WHERE event_date = DATE('{day}')
|
|
21
20
|
"""
|
|
22
21
|
|
|
23
22
|
|
|
@@ -34,11 +33,7 @@ class DatabricksSQLClient:
|
|
|
34
33
|
self._host = credentials.host
|
|
35
34
|
self._token = credentials.token
|
|
36
35
|
|
|
37
|
-
def execute_sql(
|
|
38
|
-
self,
|
|
39
|
-
query: str,
|
|
40
|
-
params: Optional[dict] = None,
|
|
41
|
-
):
|
|
36
|
+
def execute_sql(self, query: str):
|
|
42
37
|
"""
|
|
43
38
|
Execute a SQL query on Databricks system tables and return the results.
|
|
44
39
|
https://docs.databricks.com/en/dev-tools/python-sql-connector.html
|
|
@@ -52,7 +47,7 @@ class DatabricksSQLClient:
|
|
|
52
47
|
access_token=self._token,
|
|
53
48
|
) as connection:
|
|
54
49
|
with connection.cursor() as cursor:
|
|
55
|
-
cursor.execute(query
|
|
50
|
+
cursor.execute(query)
|
|
56
51
|
return cursor.fetchall()
|
|
57
52
|
|
|
58
53
|
def _needs_extraction(self, entity: TagEntity) -> bool:
|
|
@@ -89,16 +84,23 @@ class DatabricksSQLClient:
|
|
|
89
84
|
return mapping
|
|
90
85
|
|
|
91
86
|
def get_lineage(
|
|
92
|
-
self,
|
|
87
|
+
self,
|
|
88
|
+
lineage_entity: LineageEntity,
|
|
89
|
+
day: date,
|
|
93
90
|
) -> list[dict]:
|
|
94
91
|
"""
|
|
95
92
|
Fetch {TABLE|COLUMN} lineage of the given day, via system tables
|
|
96
93
|
https://docs.databricks.com/en/admin/system-tables/lineage.html
|
|
94
|
+
|
|
95
|
+
Unfortunately, passing parameters is not always supported. We have to
|
|
96
|
+
format the query beforehand and pass it as plain text for execution.
|
|
97
97
|
"""
|
|
98
98
|
table_name = f"{lineage_entity.value.lower()}_lineage"
|
|
99
|
-
query = _LINEAGE_SQL_TPL.format(
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
query = _LINEAGE_SQL_TPL.format(
|
|
100
|
+
table_name=table_name,
|
|
101
|
+
day=day,
|
|
102
|
+
)
|
|
103
|
+
result = self.execute_sql(query)
|
|
102
104
|
data = []
|
|
103
105
|
for row in result:
|
|
104
106
|
data.append(row.asDict())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.32
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,6 +215,18 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.32 - 2025-07-02
|
|
219
|
+
|
|
220
|
+
* Salesforce reporting - extract report's metadata
|
|
221
|
+
*
|
|
222
|
+
## 0.24.31 - 2025-07-02
|
|
223
|
+
|
|
224
|
+
* Looker Studio: add option to list users via a provided JSON file
|
|
225
|
+
|
|
226
|
+
## 0.24.30 - 2025-06-26
|
|
227
|
+
|
|
228
|
+
* Sigma: remove retry on timeout, decrease pagination for queries
|
|
229
|
+
|
|
218
230
|
## 0.24.29 - 2025-06-24
|
|
219
231
|
|
|
220
232
|
* Strategy: skip descriptions on ValueErrors
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=lQxSt8IlqUEvw7ldjh6EV3ifzbvsiqfztK5dHGx_Y8g,18703
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -10,7 +10,7 @@ castor_extractor/commands/extract_confluence.py,sha256=blYcnDqywXNKRQ1aZAD9FclhL
|
|
|
10
10
|
castor_extractor/commands/extract_databricks.py,sha256=SVKyoa-BBUQAM6HRHf1Wdg9-tpICic2yyvXQwHcNBhA,1264
|
|
11
11
|
castor_extractor/commands/extract_domo.py,sha256=jvAawUsUTHrwCn_koK6StmQr4n_b5GyvJi6uu6WS0SM,1061
|
|
12
12
|
castor_extractor/commands/extract_looker.py,sha256=cySLiolLCgrREJ9d0kMrJ7P8K3efHTBTzShalWVfI3A,1214
|
|
13
|
-
castor_extractor/commands/extract_looker_studio.py,sha256=
|
|
13
|
+
castor_extractor/commands/extract_looker_studio.py,sha256=YFQWabmBEaWZFCquMNZw_iq6TF95KBVGdemcYiJMhk8,1399
|
|
14
14
|
castor_extractor/commands/extract_metabase_api.py,sha256=NXctea4GT_1iRDitY92nV3TKSqhjEUwYSxwPJMRS3iw,786
|
|
15
15
|
castor_extractor/commands/extract_metabase_db.py,sha256=tYIhTPPgj1mN-07LyWcL6e-YoGp7HCWda58-5Ukyg_I,1255
|
|
16
16
|
castor_extractor/commands/extract_mode.py,sha256=Q4iO-VAKMg4zFPejhAO-foZibL5Ht3jsnhWKwJ0oqUU,823
|
|
@@ -191,14 +191,15 @@ castor_extractor/visualization/looker_studio/__init__.py,sha256=GccG-GJXoNhjXFPk
|
|
|
191
191
|
castor_extractor/visualization/looker_studio/assets.py,sha256=rI73rbVrfwkkepqZr0zPouP2lPUfJxSi21RKtOTHtAA,308
|
|
192
192
|
castor_extractor/visualization/looker_studio/client/__init__.py,sha256=YkQaVDJa-7KSwdOLjtgKJMRiafbGNKC_46YVx0hYZ1Q,129
|
|
193
193
|
castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=HIeyT9JTW1TPwVzD2Q-VfJ99jMP80Z-4CznKAnTnp2w,3493
|
|
194
|
-
castor_extractor/visualization/looker_studio/client/client.py,sha256=
|
|
194
|
+
castor_extractor/visualization/looker_studio/client/client.py,sha256=N4AtFMAPhLoe2i2rVhCbqEVIoxzcBKFh29V9LjRcdH0,3969
|
|
195
195
|
castor_extractor/visualization/looker_studio/client/credentials.py,sha256=F4ISI8Ua_HJsMuGhYql28o3hKYR4sL_uzkrUkRiekRo,1347
|
|
196
196
|
castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqNDdlDBOOpiF7LpjyHMrzeClJktidCr1pTDUs,669
|
|
197
197
|
castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
|
|
198
198
|
castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Phq378VEaFLD-nyP2_A1wge6HUP45jSthhlNjD7aqSg,4085
|
|
199
199
|
castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
|
|
200
200
|
castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=Ub4rdrJ5WTPWKI-eVmXrNMv0Ktmti4b-93zZBr0xEB0,1426
|
|
201
|
-
castor_extractor/visualization/looker_studio/extract.py,sha256=
|
|
201
|
+
castor_extractor/visualization/looker_studio/extract.py,sha256=NU48xQ83UtRW3jXKJcvofzqgEM2lHGjtTzjbKOSB50A,4059
|
|
202
|
+
castor_extractor/visualization/looker_studio/extract_test.py,sha256=ZckAxUMuoEjJ9RWkfRvt9M8SxblkQvsq-Grb8GSs-y0,492
|
|
202
203
|
castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
|
|
203
204
|
castor_extractor/visualization/metabase/assets.py,sha256=nu3FwQBU_hdS2DBvgXAwQlEEi76QiNK2tMKEtMyctaY,2874
|
|
204
205
|
castor_extractor/visualization/metabase/client/__init__.py,sha256=KBvaPMofBRV3m_sZAnKNCrJGr-Z88EbpdzEzWPQ_uBk,99
|
|
@@ -264,18 +265,19 @@ castor_extractor/visualization/qlik/client/rest.py,sha256=x_Vx0xjRvj4D5FPm2CzBH_
|
|
|
264
265
|
castor_extractor/visualization/qlik/client/rest_test.py,sha256=yfiUht6BcpBYS2uGdaKCH-tYe0fQ-joM4MbitKwOf24,1799
|
|
265
266
|
castor_extractor/visualization/qlik/extract.py,sha256=CkJ2UELZmADUxdB84VGH5-qd1tz9Dh_ywoLULTkbrII,2186
|
|
266
267
|
castor_extractor/visualization/salesforce_reporting/__init__.py,sha256=MvArD0GKNIpCDvLIYcpKrjMjFLhMyDETK6i3k0Fb6Tk,124
|
|
267
|
-
castor_extractor/visualization/salesforce_reporting/assets.py,sha256=
|
|
268
|
+
castor_extractor/visualization/salesforce_reporting/assets.py,sha256=m9UnyaocyzSYZh-NkiyjAyk9D1OMBwyP9gqAJrSIwMg,313
|
|
268
269
|
castor_extractor/visualization/salesforce_reporting/client/__init__.py,sha256=DIA6f_vNJZqT89qVYxg98Le7QeDn2y0Qew03V3J9t9o,44
|
|
269
|
-
castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256
|
|
270
|
+
castor_extractor/visualization/salesforce_reporting/client/rest.py,sha256=AqL1DTOpRy9KToJTlZohqUaRR5q-Xta79exi8odh2uE,4333
|
|
270
271
|
castor_extractor/visualization/salesforce_reporting/client/soql.py,sha256=ytZnX6zE-NoS_Kz12KghMcCM4ukPwhMj6U0rQZ_8Isk,1621
|
|
271
272
|
castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLGf4HDTFqhVTQAvv_OrKxc8waycfBKdsVAc,1359
|
|
272
273
|
castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
|
|
273
274
|
castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
|
|
274
275
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
|
|
275
|
-
castor_extractor/visualization/sigma/client/client.py,sha256=
|
|
276
|
+
castor_extractor/visualization/sigma/client/client.py,sha256=ZE44k5klBVnc5lld3tpjuKGeSdFmlJ0wr5DOB4pEfco,9446
|
|
277
|
+
castor_extractor/visualization/sigma/client/client_test.py,sha256=ae0ZOvKutCm44jnrJ-0_A5Y6ZGyDkMf9Ml3eEP8dNkY,581
|
|
276
278
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
277
279
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
|
|
278
|
-
castor_extractor/visualization/sigma/client/pagination.py,sha256=
|
|
280
|
+
castor_extractor/visualization/sigma/client/pagination.py,sha256=2bFA7GiBUUasFtHJKA90516d283p7Pg50-4zw6Fwt8I,726
|
|
279
281
|
castor_extractor/visualization/sigma/extract.py,sha256=XIT1qsj6g6dgBWP8HPfj_medZexu48EaY9tUwi14gzM,2298
|
|
280
282
|
castor_extractor/visualization/strategy/__init__.py,sha256=HOMv4JxqF5ZmViWi-pDE-PSXJRLTdXal_jtpHG_rlR8,123
|
|
281
283
|
castor_extractor/visualization/strategy/assets.py,sha256=yFXF_dX01patC0HQ1eU7Jo_4DZ4m6IJEg0uCB71tMoI,480
|
|
@@ -345,7 +347,7 @@ castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0
|
|
|
345
347
|
castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
|
|
346
348
|
castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
|
|
347
349
|
castor_extractor/warehouse/databricks/pagination.py,sha256=sM1G0sN1pf1TPpI0Y3Oew378UGEKVkMRc2Mlu9tDjLo,545
|
|
348
|
-
castor_extractor/warehouse/databricks/sql_client.py,sha256=
|
|
350
|
+
castor_extractor/warehouse/databricks/sql_client.py,sha256=BchHMNqHPtZsJWhj2XYq3QVVTj3XfKhzhhPTJng8vXo,3656
|
|
349
351
|
castor_extractor/warehouse/databricks/types.py,sha256=-TFX4jS6_c3wQLOpJTKpLeGS21YIPjKDjISnzeUPdCc,46
|
|
350
352
|
castor_extractor/warehouse/databricks/utils.py,sha256=5CKn6Me1Tus97H_qDEz_5tkhd4ARmwk2qiC3GndjyCc,1969
|
|
351
353
|
castor_extractor/warehouse/databricks/utils_test.py,sha256=_guTuzRWRTZdDY7ils0X1K8jhI9T877MEtw3x_YDg9I,2415
|
|
@@ -428,8 +430,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
428
430
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
429
431
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
430
432
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
431
|
-
castor_extractor-0.24.
|
|
432
|
-
castor_extractor-0.24.
|
|
433
|
-
castor_extractor-0.24.
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
433
|
+
castor_extractor-0.24.32.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
434
|
+
castor_extractor-0.24.32.dist-info/METADATA,sha256=vsfvzg3F_c34Ek6G9oQ5LRVpGafwrxIJdXnNcJO4_n8,26156
|
|
435
|
+
castor_extractor-0.24.32.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
436
|
+
castor_extractor-0.24.32.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
437
|
+
castor_extractor-0.24.32.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|