castor-extractor 0.24.32__py3-none-any.whl → 0.24.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,9 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.33 - 2025-07-10
4
+
5
+ * Tableau - Add an option to skip fields ingestion
6
+
3
7
  ## 0.24.32 - 2025-07-02
4
8
 
5
9
  * Salesforce reporting - extract report's metadata
6
- *
10
+
7
11
  ## 0.24.31 - 2025-07-02
8
12
 
9
13
  * Looker Studio: add option to list users via a provided JSON file
@@ -28,6 +28,13 @@ def main():
28
28
  help="Option to avoid extracting Tableau columns, default to False",
29
29
  )
30
30
 
31
+ parser.add_argument(
32
+ "--skip-fields",
33
+ dest="skip_fields",
34
+ action="store_true",
35
+ help="Option to avoid extracting Tableau fields, default to False",
36
+ )
37
+
31
38
  parser.add_argument(
32
39
  "--with-pulse",
33
40
  dest="with_pulse",
@@ -41,6 +48,14 @@ def main():
41
48
  required=False,
42
49
  )
43
50
 
51
+ parser.add_argument(
52
+ "-ie",
53
+ "--ignore-errors",
54
+ action="store_true",
55
+ dest="ignore_errors",
56
+ help="Allow partial extraction of Fields and Columns: skip batch in case of Timeout errors",
57
+ )
58
+
44
59
  parser.add_argument("-o", "--output", help="Directory to write to")
45
60
 
46
61
  tableau.extract_all(**parse_filled_arguments(parser))
@@ -122,13 +122,17 @@ class TableauClient:
122
122
  credentials: TableauCredentials,
123
123
  timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
124
124
  with_columns: bool = True,
125
+ with_fields: bool = True,
125
126
  with_pulse: bool = False,
126
127
  override_page_size: Optional[int] = None,
128
+ ignore_errors: bool = False,
127
129
  ):
128
130
  self._credentials = credentials
129
131
  self._server = _server(credentials.server_url, timeout_sec)
130
132
  self._with_columns = with_columns
133
+ self._with_fields = with_fields
131
134
  self._with_pulse = with_pulse
135
+ self._ignore_errors = ignore_errors
132
136
 
133
137
  self._client_metadata = TableauClientMetadataApi(
134
138
  server=self._server,
@@ -221,6 +225,10 @@ class TableauClient:
221
225
  logger.info(f"Skipping asset {asset} - deactivated columns")
222
226
  return []
223
227
 
228
+ if asset == TableauAsset.FIELD and not self._with_fields:
229
+ logger.info(f"Skipping asset {asset} - deactivated fields")
230
+ return []
231
+
224
232
  logger.info(f"Extracting {asset.name}...")
225
233
 
226
234
  if asset == TableauAsset.DATASOURCE:
@@ -240,4 +248,4 @@ class TableauClient:
240
248
  return self._client_rest.fetch(asset)
241
249
 
242
250
  # other assets can be extracted via Metadata API
243
- return self._client_metadata.fetch(asset)
251
+ return self._client_metadata.fetch(asset, self._ignore_errors)
@@ -2,6 +2,7 @@ import logging
2
2
  from collections.abc import Iterator
3
3
  from typing import Optional
4
4
 
5
+ import requests
5
6
  import tableauserverclient as TSC # type: ignore
6
7
 
7
8
  from ....utils import SerializedAsset, retry
@@ -12,6 +13,13 @@ from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
12
13
 
13
14
  logger = logging.getLogger(__name__)
14
15
 
16
+ # These assets are known to be error-prone, so it's acceptable if a few are missed.
17
+ # If errors occur, skip the current batch.
18
+ _SAFE_MODE_ASSETS = (
19
+ TableauAsset.COLUMN,
20
+ TableauAsset.FIELD,
21
+ )
22
+
15
23
  # increase the value when extraction is too slow
16
24
  # decrease the value when timeouts arise
17
25
  _CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
@@ -92,6 +100,7 @@ def gql_query_scroll(
92
100
  resource: str,
93
101
  fields: str,
94
102
  page_size: int,
103
+ skip_batch: bool,
95
104
  ) -> Iterator[SerializedAsset]:
96
105
  """
97
106
  Iterate over GQL query results, handling pagination and cursor
@@ -119,15 +128,22 @@ def gql_query_scroll(
119
128
 
120
129
  current_offset = 0
121
130
  while True:
122
- payload = _call(first=page_size, offset=current_offset)
123
- yield payload["nodes"]
131
+ try:
132
+ payload = _call(first=page_size, offset=current_offset)
133
+ yield payload["nodes"]
134
+
135
+ current_offset += len(payload["nodes"])
136
+ total = payload["totalCount"]
137
+ logger.info(f"Extracted {current_offset}/{total} {resource}")
124
138
 
125
- current_offset += len(payload["nodes"])
126
- total = payload["totalCount"]
127
- logger.info(f"Extracted {current_offset}/{total} {resource}")
139
+ if not payload["pageInfo"]["hasNextPage"]:
140
+ break
141
+ except requests.exceptions.ReadTimeout:
142
+ if not skip_batch:
143
+ raise
128
144
 
129
- if not payload["pageInfo"]["hasNextPage"]:
130
- break
145
+ logger.warning("Skipping batch because of TableauServer Timeout")
146
+ current_offset += page_size
131
147
 
132
148
 
133
149
  def _deduplicate(result_pages: Iterator[SerializedAsset]) -> SerializedAsset:
@@ -177,12 +193,14 @@ class TableauClientMetadataApi:
177
193
  resource: str,
178
194
  fields: str,
179
195
  page_size: int = DEFAULT_PAGE_SIZE,
196
+ skip_batch: bool = False,
180
197
  ) -> SerializedAsset:
181
198
  result_pages = gql_query_scroll(
182
199
  self._server,
183
200
  resource=resource,
184
201
  fields=fields,
185
202
  page_size=page_size,
203
+ skip_batch=skip_batch,
186
204
  )
187
205
  return _deduplicate(result_pages)
188
206
 
@@ -193,21 +211,41 @@ class TableauClientMetadataApi:
193
211
  or DEFAULT_PAGE_SIZE
194
212
  )
195
213
 
196
- def _fetch_fields(self) -> SerializedAsset:
214
+ def _fetch_fields(self, skip_batch: bool = False) -> SerializedAsset:
197
215
  result: SerializedAsset = []
198
216
  page_size = self._page_size(TableauAsset.FIELD)
199
217
  for resource, fields in FIELDS_QUERIES:
200
- current = self._call(resource, fields, page_size)
218
+ current = self._call(
219
+ resource,
220
+ fields,
221
+ page_size,
222
+ skip_batch=skip_batch,
223
+ )
201
224
  result.extend(current)
202
225
  return result
203
226
 
227
+ @staticmethod
228
+ def _should_skip_batch_with_timeout(
229
+ asset: TableauAsset,
230
+ ignore_metadata_errors: bool = False,
231
+ ) -> bool:
232
+ return asset in _SAFE_MODE_ASSETS and ignore_metadata_errors
233
+
204
234
  def fetch(
205
235
  self,
206
236
  asset: TableauAsset,
237
+ ignore_errors: bool = False,
207
238
  ) -> SerializedAsset:
239
+ skip_batch = self._should_skip_batch_with_timeout(asset, ignore_errors)
240
+
208
241
  if asset == TableauAsset.FIELD:
209
- return self._fetch_fields()
242
+ return self._fetch_fields(skip_batch=skip_batch)
210
243
 
211
244
  page_size = self._page_size(asset)
212
245
  resource, fields = GQL_QUERIES[asset]
213
- return self._call(resource, fields, page_size)
246
+ return self._call(
247
+ resource=resource,
248
+ fields=fields,
249
+ page_size=page_size,
250
+ skip_batch=skip_batch,
251
+ )
@@ -33,16 +33,20 @@ def extract_all(**kwargs) -> None:
33
33
  """
34
34
  output_directory = kwargs.get("output") or from_env(OUTPUT_DIR)
35
35
  with_columns = not kwargs.get("skip_columns")
36
+ with_fields = not kwargs.get("skip_fields")
36
37
  with_pulse = kwargs.get("with_pulse") or False
37
38
  page_size = kwargs.get("page_size")
39
+ ignore_errors = kwargs.get("ignore_errors") or False
38
40
  timestamp = current_timestamp()
39
41
 
40
42
  credentials = TableauCredentials(**kwargs)
41
43
  client = TableauClient(
42
44
  credentials,
43
45
  with_columns=with_columns,
46
+ with_fields=with_fields,
44
47
  with_pulse=with_pulse,
45
48
  override_page_size=page_size,
49
+ ignore_errors=ignore_errors,
46
50
  )
47
51
  client.login()
48
52
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.32
3
+ Version: 0.24.33
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -215,10 +215,14 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
215
215
 
216
216
  # Changelog
217
217
 
218
+ ## 0.24.33 - 2025-07-10
219
+
220
+ * Tableau - Add an option to skip fields ingestion
221
+
218
222
  ## 0.24.32 - 2025-07-02
219
223
 
220
224
  * Salesforce reporting - extract report's metadata
221
- *
225
+
222
226
  ## 0.24.31 - 2025-07-02
223
227
 
224
228
  * Looker Studio: add option to list users via a provided JSON file
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=lQxSt8IlqUEvw7ldjh6EV3ifzbvsiqfztK5dHGx_Y8g,18703
1
+ CHANGELOG.md,sha256=jKQMJGiDeDEZG-753wDrtfOoOYa5Db5Liy0AsATdsuc,18779
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -26,7 +26,7 @@ castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_
26
26
  castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
27
27
  castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
28
28
  castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
29
- castor_extractor/commands/extract_tableau.py,sha256=ngujGYohWOqOK1qjIP1Hh951jr0KNKNSeOyoaOnO450,1558
29
+ castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
30
30
  castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
31
31
  castor_extractor/commands/file_check.py,sha256=TJx76Ymd0QCECmq35zRJMkPE8DJtSInB28MuSXWk8Ao,2644
32
32
  castor_extractor/commands/upload.py,sha256=rLXp7gQ8zb1kLbho4FT87q8eJd8Gvo_TkyIynAaQ-4s,1342
@@ -289,8 +289,8 @@ castor_extractor/visualization/strategy/extract.py,sha256=2fBuvS2xiOGXRpxXnZsE_C
289
289
  castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwyuwn3yJ5C_KbpBU0ySJDcQ,138
290
290
  castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
291
291
  castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
292
- castor_extractor/visualization/tableau/client/client.py,sha256=iJ3Y-vwPvmPyAUTs1PqFJEZelPGiLvsiwXpTI3b5THc,7867
293
- castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=ryRq4_qUok8vvWGhj5CNWXtwR2JlUsu1qjsov2KhQTE,6286
292
+ castor_extractor/visualization/tableau/client/client.py,sha256=QV-GFS4nEq976JLji57pIfsw2ZZaGTvfCFqy6_HOWMg,8204
293
+ castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=eAq9rjrB_2ZCQy9NwREHBOTXZffWdkwtwhzswm1pEfk,7449
294
294
  castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=rikyQKDLFYHLJhHJTF3LwWhKJ80svtTsYp5n7n9oTU8,2665
295
295
  castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
296
296
  castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
@@ -299,7 +299,7 @@ castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9y
299
299
  castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
300
300
  castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
301
301
  castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
302
- castor_extractor/visualization/tableau/extract.py,sha256=hGVr1BZVsHlIgNXOFusRN2YwUUhXvF3reOeN8g1CTEo,1508
302
+ castor_extractor/visualization/tableau/extract.py,sha256=9mSHFJ2DGlW-cDYiRZlJafAgj4_ObACxO0l9vBBfjUw,1683
303
303
  castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
304
304
  castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
305
305
  castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
@@ -430,8 +430,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
430
430
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
431
431
  castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
432
432
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
433
- castor_extractor-0.24.32.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
434
- castor_extractor-0.24.32.dist-info/METADATA,sha256=vsfvzg3F_c34Ek6G9oQ5LRVpGafwrxIJdXnNcJO4_n8,26156
435
- castor_extractor-0.24.32.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
436
- castor_extractor-0.24.32.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
437
- castor_extractor-0.24.32.dist-info/RECORD,,
433
+ castor_extractor-0.24.33.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
434
+ castor_extractor-0.24.33.dist-info/METADATA,sha256=vCEpwDM8sngoUEfrGtRPSjtCjTw6zxJGiJrnmj4eq_Y,26232
435
+ castor_extractor-0.24.33.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
436
+ castor_extractor-0.24.33.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
437
+ castor_extractor-0.24.33.dist-info/RECORD,,