castor-extractor 0.24.4__py3-none-any.whl → 0.24.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +12 -0
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/batch.py +16 -0
- castor_extractor/utils/batch_test.py +27 -0
- castor_extractor/visualization/domo/client/client.py +10 -4
- castor_extractor/visualization/tableau/client/client_metadata_api.py +23 -18
- castor_extractor/visualization/tableau/client/gql_queries.py +1 -1
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.7.dist-info}/METADATA +15 -3
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.7.dist-info}/RECORD +12 -10
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.7.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.7.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.4.dist-info → castor_extractor-0.24.7.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.7 - 2025-04-07
|
|
4
|
+
|
|
5
|
+
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
6
|
+
|
|
7
|
+
## 0.24.6 - 2025-04-03
|
|
8
|
+
|
|
9
|
+
* Domo - extract cards metadata by batch to prevent from hitting URL max length
|
|
10
|
+
|
|
11
|
+
## 0.24.5 - 2025-04-02
|
|
12
|
+
|
|
13
|
+
* bump dependencies: google-cloud-storage
|
|
14
|
+
|
|
3
15
|
## 0.24.4 - 2025-03-19
|
|
4
16
|
|
|
5
17
|
* Snowflake:
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from typing import Iterator, List, TypeVar
|
|
2
|
+
|
|
3
|
+
T = TypeVar("T")
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def batch_of_length(
|
|
7
|
+
elements: List[T],
|
|
8
|
+
batch_size: int,
|
|
9
|
+
) -> Iterator[List[T]]:
|
|
10
|
+
"""
|
|
11
|
+
Split the given elements into smaller chunks
|
|
12
|
+
"""
|
|
13
|
+
assert batch_size > 1, "batch size must be greater or equal to 1"
|
|
14
|
+
element_count = len(elements)
|
|
15
|
+
for index in range(0, element_count, batch_size):
|
|
16
|
+
yield elements[index : min((index + batch_size), element_count)]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from .batch import batch_of_length
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_batch_of_length():
|
|
7
|
+
elements = ["a", "b", "c", "d", "e", "f", "g", "h"]
|
|
8
|
+
result = list(batch_of_length(elements, 3))
|
|
9
|
+
assert result == [
|
|
10
|
+
["a", "b", "c"],
|
|
11
|
+
["d", "e", "f"],
|
|
12
|
+
["g", "h"],
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
result = list(batch_of_length(elements, 1000))
|
|
16
|
+
assert result == [
|
|
17
|
+
elements,
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
result = list(batch_of_length(elements, 7))
|
|
21
|
+
assert result == [
|
|
22
|
+
["a", "b", "c", "d", "e", "f", "g"],
|
|
23
|
+
["h"],
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
with pytest.raises(AssertionError):
|
|
27
|
+
list(batch_of_length(elements, -12))
|
|
@@ -9,6 +9,7 @@ import requests
|
|
|
9
9
|
from ....utils import (
|
|
10
10
|
RequestSafeMode,
|
|
11
11
|
at_midnight,
|
|
12
|
+
batch_of_length,
|
|
12
13
|
current_date,
|
|
13
14
|
empty_iterator,
|
|
14
15
|
handle_response,
|
|
@@ -48,6 +49,8 @@ _RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
|
|
|
48
49
|
|
|
49
50
|
_PARENT_FOLDER = "/Dashboards"
|
|
50
51
|
|
|
52
|
+
_CARDS_BATCH_SIZE = 100
|
|
53
|
+
|
|
51
54
|
logger = logging.getLogger(__name__)
|
|
52
55
|
|
|
53
56
|
|
|
@@ -156,16 +159,19 @@ class DomoClient:
|
|
|
156
159
|
|
|
157
160
|
return all_results
|
|
158
161
|
|
|
162
|
+
def _cards_metadata(self, card_ids: list[int]) -> Iterator[dict]:
|
|
163
|
+
# batch to avoid hitting the URL max length
|
|
164
|
+
for batch_card_ids in batch_of_length(card_ids, _CARDS_BATCH_SIZE):
|
|
165
|
+
endpoint = self._endpoint_factory.cards_metadata(batch_card_ids)
|
|
166
|
+
yield from self._get_element(endpoint)
|
|
167
|
+
|
|
159
168
|
def _datasources(self, card_ids: list[int]) -> RawData:
|
|
160
169
|
"""Yields all distinct datasources associated to the given cards"""
|
|
161
170
|
if not card_ids:
|
|
162
171
|
return empty_iterator()
|
|
163
172
|
|
|
164
|
-
endpoint = self._endpoint_factory.cards_metadata(card_ids)
|
|
165
|
-
cards_metadata = self._get_element(endpoint)
|
|
166
|
-
|
|
167
173
|
processed: set[str] = set()
|
|
168
|
-
for card in
|
|
174
|
+
for card in self._cards_metadata(card_ids):
|
|
169
175
|
for datasource in card["datasources"]:
|
|
170
176
|
id_ = datasource["dataSourceId"]
|
|
171
177
|
if id_ in processed:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Iterator
|
|
2
3
|
from typing import Optional
|
|
3
4
|
|
|
@@ -9,15 +10,14 @@ from ..constants import DEFAULT_PAGE_SIZE
|
|
|
9
10
|
from .errors import TableauApiError, TableauApiTimeout
|
|
10
11
|
from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
12
15
|
# increase the value when extraction is too slow
|
|
13
16
|
# decrease the value when timeouts arise
|
|
14
17
|
_CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
|
|
15
|
-
# for some clients, extraction of columns tend to hit the node limit
|
|
16
|
-
# https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
|
|
17
|
-
# the workaround is to reduce pagination
|
|
18
|
-
TableauAsset.COLUMN: 50,
|
|
19
18
|
# fields are light but volumes are bigger
|
|
20
19
|
TableauAsset.FIELD: 1000,
|
|
20
|
+
# tables are sometimes heavy
|
|
21
21
|
TableauAsset.TABLE: 50,
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -51,8 +51,9 @@ def _check_errors(answer: dict) -> None:
|
|
|
51
51
|
|
|
52
52
|
def gql_query_scroll(
|
|
53
53
|
server,
|
|
54
|
-
query: str,
|
|
55
54
|
resource: str,
|
|
55
|
+
fields: str,
|
|
56
|
+
page_size: int,
|
|
56
57
|
) -> Iterator[SerializedAsset]:
|
|
57
58
|
"""
|
|
58
59
|
Iterate over GQL query results, handling pagination and cursor
|
|
@@ -67,23 +68,27 @@ def gql_query_scroll(
|
|
|
67
68
|
max_retries=_RETRY_COUNT,
|
|
68
69
|
base_ms=_RETRY_BASE_MS,
|
|
69
70
|
)
|
|
70
|
-
def _call(
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
def _call(first: int, offset: int) -> dict:
|
|
72
|
+
query = QUERY_TEMPLATE.format(
|
|
73
|
+
resource=resource,
|
|
74
|
+
fields=fields,
|
|
75
|
+
first=first,
|
|
76
|
+
offset=offset,
|
|
77
|
+
)
|
|
78
|
+
answer = server.metadata.query(query)
|
|
75
79
|
_check_errors(answer)
|
|
76
80
|
return answer["data"][f"{resource}Connection"]
|
|
77
81
|
|
|
78
|
-
|
|
82
|
+
current_offset = 0
|
|
79
83
|
while True:
|
|
80
|
-
payload = _call(
|
|
84
|
+
payload = _call(first=page_size, offset=current_offset)
|
|
81
85
|
yield payload["nodes"]
|
|
82
86
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
+
current_offset += len(payload["nodes"])
|
|
88
|
+
total = payload["totalCount"]
|
|
89
|
+
logger.info(f"Extracted {current_offset}/{total} {resource}")
|
|
90
|
+
|
|
91
|
+
if not payload["pageInfo"]["hasNextPage"]:
|
|
87
92
|
break
|
|
88
93
|
|
|
89
94
|
|
|
@@ -107,12 +112,12 @@ class TableauClientMetadataApi:
|
|
|
107
112
|
fields: str,
|
|
108
113
|
page_size: int = DEFAULT_PAGE_SIZE,
|
|
109
114
|
) -> SerializedAsset:
|
|
110
|
-
|
|
115
|
+
result_pages = gql_query_scroll(
|
|
116
|
+
self._server,
|
|
111
117
|
resource=resource,
|
|
112
118
|
fields=fields,
|
|
113
119
|
page_size=page_size,
|
|
114
120
|
)
|
|
115
|
-
result_pages = gql_query_scroll(self._server, query, resource)
|
|
116
121
|
return [asset for page in result_pages for asset in page]
|
|
117
122
|
|
|
118
123
|
def _page_size(self, asset: TableauAsset) -> int:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.7
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -35,7 +35,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
|
35
35
|
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
36
36
|
Requires-Dist: google-auth (>=2,<3)
|
|
37
37
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
38
|
-
Requires-Dist: google-cloud-storage (>=
|
|
38
|
+
Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
|
|
39
39
|
Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
|
|
40
40
|
Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
|
|
41
41
|
Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
|
|
@@ -51,7 +51,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
|
|
|
51
51
|
Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
|
|
52
52
|
Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
|
|
53
53
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
54
|
-
Requires-Dist: setuptools (>=
|
|
54
|
+
Requires-Dist: setuptools (>=78.1)
|
|
55
55
|
Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
|
|
56
56
|
Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
|
|
57
57
|
Requires-Dist: sqlalchemy (>=1.4,<1.5)
|
|
@@ -210,6 +210,18 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
210
210
|
|
|
211
211
|
# Changelog
|
|
212
212
|
|
|
213
|
+
## 0.24.7 - 2025-04-07
|
|
214
|
+
|
|
215
|
+
* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
|
|
216
|
+
|
|
217
|
+
## 0.24.6 - 2025-04-03
|
|
218
|
+
|
|
219
|
+
* Domo - extract cards metadata by batch to prevent from hitting URL max length
|
|
220
|
+
|
|
221
|
+
## 0.24.5 - 2025-04-02
|
|
222
|
+
|
|
223
|
+
* bump dependencies: google-cloud-storage
|
|
224
|
+
|
|
213
225
|
## 0.24.4 - 2025-03-19
|
|
214
226
|
|
|
215
227
|
* Snowflake:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=UWuENqrKnLu244f4Of6dtZ59XZ7jrLWkcQni3MqXPBg,16667
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -77,9 +77,11 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
77
77
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
78
78
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
79
79
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
80
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
80
|
+
castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
|
|
81
81
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
82
82
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
83
|
+
castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
|
|
84
|
+
castor_extractor/utils/batch_test.py,sha256=84JYXOxiTkZFAceVh0mzN6VtKxcqoFPbxkZfIDyLGlg,606
|
|
83
85
|
castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
|
|
84
86
|
castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHkaDy_uknKz8B0,2075
|
|
85
87
|
castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
|
|
@@ -146,7 +148,7 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
146
148
|
castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
|
|
147
149
|
castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
|
|
148
150
|
castor_extractor/visualization/domo/client/__init__.py,sha256=Do0fU4B8Hhlhahcv734gnJl_ryCztfTBDea7XNCKfB8,72
|
|
149
|
-
castor_extractor/visualization/domo/client/client.py,sha256=
|
|
151
|
+
castor_extractor/visualization/domo/client/client.py,sha256=bgzXWUm-UnTIwgyJKaJkoHzQpDYwWCGCe97MsMFw6ng,9930
|
|
150
152
|
castor_extractor/visualization/domo/client/credentials.py,sha256=4gnsk4Tpt3ggdUYbvyNPJEXeCyTy12s-X24P5hFdULg,873
|
|
151
153
|
castor_extractor/visualization/domo/client/endpoints.py,sha256=eIE9oeZ_cmJSWWDuyxh6JaAOs3y5bTJQQ265HYgpulE,2775
|
|
152
154
|
castor_extractor/visualization/domo/client/pagination.py,sha256=ukVkHVzoH4mfZ29H9YcnC2YrdVolP10wv25J6Q3ehRw,821
|
|
@@ -264,12 +266,12 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
|
|
|
264
266
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
265
267
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
266
268
|
castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
|
|
267
|
-
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=
|
|
269
|
+
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=VHNV1Q0EVKuiFKm1yKSx4tIuPGww4Mlw3yui2DgKe7I,4196
|
|
268
270
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
269
271
|
castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
|
|
270
272
|
castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
|
|
271
273
|
castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
|
|
272
|
-
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=
|
|
274
|
+
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
273
275
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
274
276
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
275
277
|
castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
|
|
@@ -403,8 +405,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
403
405
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
404
406
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
405
407
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
406
|
-
castor_extractor-0.24.
|
|
407
|
-
castor_extractor-0.24.
|
|
408
|
-
castor_extractor-0.24.
|
|
409
|
-
castor_extractor-0.24.
|
|
410
|
-
castor_extractor-0.24.
|
|
408
|
+
castor_extractor-0.24.7.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
409
|
+
castor_extractor-0.24.7.dist-info/METADATA,sha256=qWp3OBv1FO123RJqz2YKTEd12WzhKoDmcxVZLhvzn6M,23831
|
|
410
|
+
castor_extractor-0.24.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
411
|
+
castor_extractor-0.24.7.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
|
|
412
|
+
castor_extractor-0.24.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|