castor-extractor 0.24.32__py3-none-any.whl → 0.24.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +5 -1
- castor_extractor/commands/extract_tableau.py +15 -0
- castor_extractor/visualization/tableau/client/client.py +9 -1
- castor_extractor/visualization/tableau/client/client_metadata_api.py +49 -11
- castor_extractor/visualization/tableau/extract.py +4 -0
- {castor_extractor-0.24.32.dist-info → castor_extractor-0.24.33.dist-info}/METADATA +6 -2
- {castor_extractor-0.24.32.dist-info → castor_extractor-0.24.33.dist-info}/RECORD +10 -10
- {castor_extractor-0.24.32.dist-info → castor_extractor-0.24.33.dist-info}/LICENCE +0 -0
- {castor_extractor-0.24.32.dist-info → castor_extractor-0.24.33.dist-info}/WHEEL +0 -0
- {castor_extractor-0.24.32.dist-info → castor_extractor-0.24.33.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.24.33 - 2025-07-10
|
|
4
|
+
|
|
5
|
+
* Tableau - Add an option to skip fields ingestion
|
|
6
|
+
|
|
3
7
|
## 0.24.32 - 2025-07-02
|
|
4
8
|
|
|
5
9
|
* Salesforce reporting - extract report's metadata
|
|
6
|
-
|
|
10
|
+
|
|
7
11
|
## 0.24.31 - 2025-07-02
|
|
8
12
|
|
|
9
13
|
* Looker Studio: add option to list users via a provided JSON file
|
|
@@ -28,6 +28,13 @@ def main():
|
|
|
28
28
|
help="Option to avoid extracting Tableau columns, default to False",
|
|
29
29
|
)
|
|
30
30
|
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--skip-fields",
|
|
33
|
+
dest="skip_fields",
|
|
34
|
+
action="store_true",
|
|
35
|
+
help="Option to avoid extracting Tableau fields, default to False",
|
|
36
|
+
)
|
|
37
|
+
|
|
31
38
|
parser.add_argument(
|
|
32
39
|
"--with-pulse",
|
|
33
40
|
dest="with_pulse",
|
|
@@ -41,6 +48,14 @@ def main():
|
|
|
41
48
|
required=False,
|
|
42
49
|
)
|
|
43
50
|
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"-ie",
|
|
53
|
+
"--ignore-errors",
|
|
54
|
+
action="store_true",
|
|
55
|
+
dest="ignore_errors",
|
|
56
|
+
help="Allow partial extraction of Fields and Columns: skip batch in case of Timeout errors",
|
|
57
|
+
)
|
|
58
|
+
|
|
44
59
|
parser.add_argument("-o", "--output", help="Directory to write to")
|
|
45
60
|
|
|
46
61
|
tableau.extract_all(**parse_filled_arguments(parser))
|
|
@@ -122,13 +122,17 @@ class TableauClient:
|
|
|
122
122
|
credentials: TableauCredentials,
|
|
123
123
|
timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
|
|
124
124
|
with_columns: bool = True,
|
|
125
|
+
with_fields: bool = True,
|
|
125
126
|
with_pulse: bool = False,
|
|
126
127
|
override_page_size: Optional[int] = None,
|
|
128
|
+
ignore_errors: bool = False,
|
|
127
129
|
):
|
|
128
130
|
self._credentials = credentials
|
|
129
131
|
self._server = _server(credentials.server_url, timeout_sec)
|
|
130
132
|
self._with_columns = with_columns
|
|
133
|
+
self._with_fields = with_fields
|
|
131
134
|
self._with_pulse = with_pulse
|
|
135
|
+
self._ignore_errors = ignore_errors
|
|
132
136
|
|
|
133
137
|
self._client_metadata = TableauClientMetadataApi(
|
|
134
138
|
server=self._server,
|
|
@@ -221,6 +225,10 @@ class TableauClient:
|
|
|
221
225
|
logger.info(f"Skipping asset {asset} - deactivated columns")
|
|
222
226
|
return []
|
|
223
227
|
|
|
228
|
+
if asset == TableauAsset.FIELD and not self._with_fields:
|
|
229
|
+
logger.info(f"Skipping asset {asset} - deactivated fields")
|
|
230
|
+
return []
|
|
231
|
+
|
|
224
232
|
logger.info(f"Extracting {asset.name}...")
|
|
225
233
|
|
|
226
234
|
if asset == TableauAsset.DATASOURCE:
|
|
@@ -240,4 +248,4 @@ class TableauClient:
|
|
|
240
248
|
return self._client_rest.fetch(asset)
|
|
241
249
|
|
|
242
250
|
# other assets can be extracted via Metadata API
|
|
243
|
-
return self._client_metadata.fetch(asset)
|
|
251
|
+
return self._client_metadata.fetch(asset, self._ignore_errors)
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from collections.abc import Iterator
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
+
import requests
|
|
5
6
|
import tableauserverclient as TSC # type: ignore
|
|
6
7
|
|
|
7
8
|
from ....utils import SerializedAsset, retry
|
|
@@ -12,6 +13,13 @@ from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
|
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
16
|
+
# These assets are known to be error-prone, so it's acceptable if a few are missed.
|
|
17
|
+
# If errors occur, skip the current batch.
|
|
18
|
+
_SAFE_MODE_ASSETS = (
|
|
19
|
+
TableauAsset.COLUMN,
|
|
20
|
+
TableauAsset.FIELD,
|
|
21
|
+
)
|
|
22
|
+
|
|
15
23
|
# increase the value when extraction is too slow
|
|
16
24
|
# decrease the value when timeouts arise
|
|
17
25
|
_CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
|
|
@@ -92,6 +100,7 @@ def gql_query_scroll(
|
|
|
92
100
|
resource: str,
|
|
93
101
|
fields: str,
|
|
94
102
|
page_size: int,
|
|
103
|
+
skip_batch: bool,
|
|
95
104
|
) -> Iterator[SerializedAsset]:
|
|
96
105
|
"""
|
|
97
106
|
Iterate over GQL query results, handling pagination and cursor
|
|
@@ -119,15 +128,22 @@ def gql_query_scroll(
|
|
|
119
128
|
|
|
120
129
|
current_offset = 0
|
|
121
130
|
while True:
|
|
122
|
-
|
|
123
|
-
|
|
131
|
+
try:
|
|
132
|
+
payload = _call(first=page_size, offset=current_offset)
|
|
133
|
+
yield payload["nodes"]
|
|
134
|
+
|
|
135
|
+
current_offset += len(payload["nodes"])
|
|
136
|
+
total = payload["totalCount"]
|
|
137
|
+
logger.info(f"Extracted {current_offset}/{total} {resource}")
|
|
124
138
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
139
|
+
if not payload["pageInfo"]["hasNextPage"]:
|
|
140
|
+
break
|
|
141
|
+
except requests.exceptions.ReadTimeout:
|
|
142
|
+
if not skip_batch:
|
|
143
|
+
raise
|
|
128
144
|
|
|
129
|
-
|
|
130
|
-
|
|
145
|
+
logger.warning("Skipping batch because of TableauServer Timeout")
|
|
146
|
+
current_offset += page_size
|
|
131
147
|
|
|
132
148
|
|
|
133
149
|
def _deduplicate(result_pages: Iterator[SerializedAsset]) -> SerializedAsset:
|
|
@@ -177,12 +193,14 @@ class TableauClientMetadataApi:
|
|
|
177
193
|
resource: str,
|
|
178
194
|
fields: str,
|
|
179
195
|
page_size: int = DEFAULT_PAGE_SIZE,
|
|
196
|
+
skip_batch: bool = False,
|
|
180
197
|
) -> SerializedAsset:
|
|
181
198
|
result_pages = gql_query_scroll(
|
|
182
199
|
self._server,
|
|
183
200
|
resource=resource,
|
|
184
201
|
fields=fields,
|
|
185
202
|
page_size=page_size,
|
|
203
|
+
skip_batch=skip_batch,
|
|
186
204
|
)
|
|
187
205
|
return _deduplicate(result_pages)
|
|
188
206
|
|
|
@@ -193,21 +211,41 @@ class TableauClientMetadataApi:
|
|
|
193
211
|
or DEFAULT_PAGE_SIZE
|
|
194
212
|
)
|
|
195
213
|
|
|
196
|
-
def _fetch_fields(self) -> SerializedAsset:
|
|
214
|
+
def _fetch_fields(self, skip_batch: bool = False) -> SerializedAsset:
|
|
197
215
|
result: SerializedAsset = []
|
|
198
216
|
page_size = self._page_size(TableauAsset.FIELD)
|
|
199
217
|
for resource, fields in FIELDS_QUERIES:
|
|
200
|
-
current = self._call(
|
|
218
|
+
current = self._call(
|
|
219
|
+
resource,
|
|
220
|
+
fields,
|
|
221
|
+
page_size,
|
|
222
|
+
skip_batch=skip_batch,
|
|
223
|
+
)
|
|
201
224
|
result.extend(current)
|
|
202
225
|
return result
|
|
203
226
|
|
|
227
|
+
@staticmethod
|
|
228
|
+
def _should_skip_batch_with_timeout(
|
|
229
|
+
asset: TableauAsset,
|
|
230
|
+
ignore_metadata_errors: bool = False,
|
|
231
|
+
) -> bool:
|
|
232
|
+
return asset in _SAFE_MODE_ASSETS and ignore_metadata_errors
|
|
233
|
+
|
|
204
234
|
def fetch(
|
|
205
235
|
self,
|
|
206
236
|
asset: TableauAsset,
|
|
237
|
+
ignore_errors: bool = False,
|
|
207
238
|
) -> SerializedAsset:
|
|
239
|
+
skip_batch = self._should_skip_batch_with_timeout(asset, ignore_errors)
|
|
240
|
+
|
|
208
241
|
if asset == TableauAsset.FIELD:
|
|
209
|
-
return self._fetch_fields()
|
|
242
|
+
return self._fetch_fields(skip_batch=skip_batch)
|
|
210
243
|
|
|
211
244
|
page_size = self._page_size(asset)
|
|
212
245
|
resource, fields = GQL_QUERIES[asset]
|
|
213
|
-
return self._call(
|
|
246
|
+
return self._call(
|
|
247
|
+
resource=resource,
|
|
248
|
+
fields=fields,
|
|
249
|
+
page_size=page_size,
|
|
250
|
+
skip_batch=skip_batch,
|
|
251
|
+
)
|
|
@@ -33,16 +33,20 @@ def extract_all(**kwargs) -> None:
|
|
|
33
33
|
"""
|
|
34
34
|
output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
|
|
35
35
|
with_columns = not kwargs.get("skip_columns")
|
|
36
|
+
with_fields = not kwargs.get("skip_fields")
|
|
36
37
|
with_pulse = kwargs.get("with_pulse") or False
|
|
37
38
|
page_size = kwargs.get("page_size")
|
|
39
|
+
ignore_errors = kwargs.get("ignore_errors") or False
|
|
38
40
|
timestamp = current_timestamp()
|
|
39
41
|
|
|
40
42
|
credentials = TableauCredentials(**kwargs)
|
|
41
43
|
client = TableauClient(
|
|
42
44
|
credentials,
|
|
43
45
|
with_columns=with_columns,
|
|
46
|
+
with_fields=with_fields,
|
|
44
47
|
with_pulse=with_pulse,
|
|
45
48
|
override_page_size=page_size,
|
|
49
|
+
ignore_errors=ignore_errors,
|
|
46
50
|
)
|
|
47
51
|
client.login()
|
|
48
52
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.33
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -215,10 +215,14 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
|
|
|
215
215
|
|
|
216
216
|
# Changelog
|
|
217
217
|
|
|
218
|
+
## 0.24.33 - 2025-07-10
|
|
219
|
+
|
|
220
|
+
* Tableau - Add an option to skip fields ingestion
|
|
221
|
+
|
|
218
222
|
## 0.24.32 - 2025-07-02
|
|
219
223
|
|
|
220
224
|
* Salesforce reporting - extract report's metadata
|
|
221
|
-
|
|
225
|
+
|
|
222
226
|
## 0.24.31 - 2025-07-02
|
|
223
227
|
|
|
224
228
|
* Looker Studio: add option to list users via a provided JSON file
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=jKQMJGiDeDEZG-753wDrtfOoOYa5Db5Liy0AsATdsuc,18779
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -26,7 +26,7 @@ castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_
|
|
|
26
26
|
castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
|
|
27
27
|
castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
|
|
28
28
|
castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
|
|
29
|
-
castor_extractor/commands/extract_tableau.py,sha256=
|
|
29
|
+
castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
|
|
30
30
|
castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
|
|
31
31
|
castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
|
|
32
32
|
castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
|
|
@@ -289,8 +289,8 @@ castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C
|
|
|
289
289
|
castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
|
|
290
290
|
castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
|
|
291
291
|
castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
|
|
292
|
-
castor_extractor/visualization/tableau/client/client.py,sha256=
|
|
293
|
-
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=
|
|
292
|
+
castor_extractor/visualization/tableau/client/client.py,sha256=QV-GFS4nEq976JLji57pIfsw2ZZaGTvfCFqy6_HOWMg,8204
|
|
293
|
+
castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=eAq9rjrB_2ZCQy9NwREHBOTXZffWdkwtwhzswm1pEfk,7449
|
|
294
294
|
castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=rikyQKDLFYHLJhHJTF3LwWhKJ80svtTsYp5n7n9oTU8,2665
|
|
295
295
|
castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
|
|
296
296
|
castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
|
|
@@ -299,7 +299,7 @@ castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9y
|
|
|
299
299
|
castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
|
|
300
300
|
castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
|
|
301
301
|
castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
302
|
-
castor_extractor/visualization/tableau/extract.py,sha256=
|
|
302
|
+
castor_extractor/visualization/tableau/extract.py,sha256=9mSHFJ2DGlW-cDYiRZlJafAgj4_ObACxO0l9vBBfjUw,1683
|
|
303
303
|
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
304
304
|
castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
|
|
305
305
|
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
@@ -430,8 +430,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
430
430
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
431
431
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
432
432
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
433
|
-
castor_extractor-0.24.
|
|
434
|
-
castor_extractor-0.24.
|
|
435
|
-
castor_extractor-0.24.
|
|
436
|
-
castor_extractor-0.24.
|
|
437
|
-
castor_extractor-0.24.
|
|
433
|
+
castor_extractor-0.24.33.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
434
|
+
castor_extractor-0.24.33.dist-info/METADATA,sha256=vCEpwDM8sngoUEfrGtRPSjtCjTw6zxJGiJrnmj4eq_Y,26232
|
|
435
|
+
castor_extractor-0.24.33.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
436
|
+
castor_extractor-0.24.33.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
|
|
437
|
+
castor_extractor-0.24.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|