castor-extractor 0.24.4__py3-none-any.whl → 0.24.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.24.7 - 2025-04-07
4
+
5
+ * Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
6
+
7
+ ## 0.24.6 - 2025-04-03
8
+
9
+ * Domo - extract cards metadata by batch to prevent from hitting URL max length
10
+
11
+ ## 0.24.5 - 2025-04-02
12
+
13
+ * bump dependencies: google-cloud-storage
14
+
3
15
  ## 0.24.4 - 2025-03-19
4
16
 
5
17
  * Snowflake:
@@ -1,4 +1,5 @@
1
1
  from .argument_parser import parse_filled_arguments
2
+ from .batch import batch_of_length
2
3
  from .client import (
3
4
  AbstractSourceClient,
4
5
  APIClient,
@@ -0,0 +1,16 @@
1
+ from typing import Iterator, List, TypeVar
2
+
3
+ T = TypeVar("T")
4
+
5
+
6
+ def batch_of_length(
7
+ elements: List[T],
8
+ batch_size: int,
9
+ ) -> Iterator[List[T]]:
10
+ """
11
+ Split the given elements into smaller chunks
12
+ """
13
+ assert batch_size > 1, "batch size must be greater or equal to 1"
14
+ element_count = len(elements)
15
+ for index in range(0, element_count, batch_size):
16
+ yield elements[index : min((index + batch_size), element_count)]
@@ -0,0 +1,27 @@
1
+ import pytest
2
+
3
+ from .batch import batch_of_length
4
+
5
+
6
+ def test_batch_of_length():
7
+ elements = ["a", "b", "c", "d", "e", "f", "g", "h"]
8
+ result = list(batch_of_length(elements, 3))
9
+ assert result == [
10
+ ["a", "b", "c"],
11
+ ["d", "e", "f"],
12
+ ["g", "h"],
13
+ ]
14
+
15
+ result = list(batch_of_length(elements, 1000))
16
+ assert result == [
17
+ elements,
18
+ ]
19
+
20
+ result = list(batch_of_length(elements, 7))
21
+ assert result == [
22
+ ["a", "b", "c", "d", "e", "f", "g"],
23
+ ["h"],
24
+ ]
25
+
26
+ with pytest.raises(AssertionError):
27
+ list(batch_of_length(elements, -12))
@@ -9,6 +9,7 @@ import requests
9
9
  from ....utils import (
10
10
  RequestSafeMode,
11
11
  at_midnight,
12
+ batch_of_length,
12
13
  current_date,
13
14
  empty_iterator,
14
15
  handle_response,
@@ -48,6 +49,8 @@ _RETRY_BASE_MS = 10 * 60 * 1000 # 10 minutes
48
49
 
49
50
  _PARENT_FOLDER = "/Dashboards"
50
51
 
52
+ _CARDS_BATCH_SIZE = 100
53
+
51
54
  logger = logging.getLogger(__name__)
52
55
 
53
56
 
@@ -156,16 +159,19 @@ class DomoClient:
156
159
 
157
160
  return all_results
158
161
 
162
+ def _cards_metadata(self, card_ids: list[int]) -> Iterator[dict]:
163
+ # batch to avoid hitting the URL max length
164
+ for batch_card_ids in batch_of_length(card_ids, _CARDS_BATCH_SIZE):
165
+ endpoint = self._endpoint_factory.cards_metadata(batch_card_ids)
166
+ yield from self._get_element(endpoint)
167
+
159
168
  def _datasources(self, card_ids: list[int]) -> RawData:
160
169
  """Yields all distinct datasources associated to the given cards"""
161
170
  if not card_ids:
162
171
  return empty_iterator()
163
172
 
164
- endpoint = self._endpoint_factory.cards_metadata(card_ids)
165
- cards_metadata = self._get_element(endpoint)
166
-
167
173
  processed: set[str] = set()
168
- for card in cards_metadata:
174
+ for card in self._cards_metadata(card_ids):
169
175
  for datasource in card["datasources"]:
170
176
  id_ = datasource["dataSourceId"]
171
177
  if id_ in processed:
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from collections.abc import Iterator
2
3
  from typing import Optional
3
4
 
@@ -9,15 +10,14 @@ from ..constants import DEFAULT_PAGE_SIZE
9
10
  from .errors import TableauApiError, TableauApiTimeout
10
11
  from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
11
12
 
13
+ logger = logging.getLogger(__name__)
14
+
12
15
  # increase the value when extraction is too slow
13
16
  # decrease the value when timeouts arise
14
17
  _CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
15
- # for some clients, extraction of columns tend to hit the node limit
16
- # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
17
- # the workaround is to reduce pagination
18
- TableauAsset.COLUMN: 50,
19
18
  # fields are light but volumes are bigger
20
19
  TableauAsset.FIELD: 1000,
20
+ # tables are sometimes heavy
21
21
  TableauAsset.TABLE: 50,
22
22
  }
23
23
 
@@ -51,8 +51,9 @@ def _check_errors(answer: dict) -> None:
51
51
 
52
52
  def gql_query_scroll(
53
53
  server,
54
- query: str,
55
54
  resource: str,
55
+ fields: str,
56
+ page_size: int,
56
57
  ) -> Iterator[SerializedAsset]:
57
58
  """
58
59
  Iterate over GQL query results, handling pagination and cursor
@@ -67,23 +68,27 @@ def gql_query_scroll(
67
68
  max_retries=_RETRY_COUNT,
68
69
  base_ms=_RETRY_BASE_MS,
69
70
  )
70
- def _call(cursor: Optional[str]) -> dict:
71
- # If cursor is defined it must be quoted else use null token
72
- token = "null" if cursor is None else f'"{cursor}"'
73
- query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
74
- answer = server.metadata.query(query_)
71
+ def _call(first: int, offset: int) -> dict:
72
+ query = QUERY_TEMPLATE.format(
73
+ resource=resource,
74
+ fields=fields,
75
+ first=first,
76
+ offset=offset,
77
+ )
78
+ answer = server.metadata.query(query)
75
79
  _check_errors(answer)
76
80
  return answer["data"][f"{resource}Connection"]
77
81
 
78
- cursor = None
82
+ current_offset = 0
79
83
  while True:
80
- payload = _call(cursor)
84
+ payload = _call(first=page_size, offset=current_offset)
81
85
  yield payload["nodes"]
82
86
 
83
- page_info = payload["pageInfo"]
84
- if page_info["hasNextPage"]:
85
- cursor = page_info["endCursor"]
86
- else:
87
+ current_offset += len(payload["nodes"])
88
+ total = payload["totalCount"]
89
+ logger.info(f"Extracted {current_offset}/{total} {resource}")
90
+
91
+ if not payload["pageInfo"]["hasNextPage"]:
87
92
  break
88
93
 
89
94
 
@@ -107,12 +112,12 @@ class TableauClientMetadataApi:
107
112
  fields: str,
108
113
  page_size: int = DEFAULT_PAGE_SIZE,
109
114
  ) -> SerializedAsset:
110
- query = QUERY_TEMPLATE.format(
115
+ result_pages = gql_query_scroll(
116
+ self._server,
111
117
  resource=resource,
112
118
  fields=fields,
113
119
  page_size=page_size,
114
120
  )
115
- result_pages = gql_query_scroll(self._server, query, resource)
116
121
  return [asset for page in result_pages for asset in page]
117
122
 
118
123
  def _page_size(self, asset: TableauAsset) -> int:
@@ -2,7 +2,7 @@ from ..assets import TableauAsset
2
2
 
3
3
  QUERY_TEMPLATE = """
4
4
  {{
5
- {resource}Connection(first: {page_size}, after: AFTER_TOKEN_SIGNAL) {{
5
+ {resource}Connection(first: {first}, offset: {offset}) {{
6
6
  nodes {{ {fields}
7
7
  }}
8
8
  pageInfo {{
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.24.4
3
+ Version: 0.24.7
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -35,7 +35,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
35
35
  Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
36
36
  Requires-Dist: google-auth (>=2,<3)
37
37
  Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
38
- Requires-Dist: google-cloud-storage (>=2,<3)
38
+ Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
39
39
  Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
40
40
  Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
41
41
  Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
@@ -51,7 +51,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
51
51
  Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
52
52
  Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
53
53
  Requires-Dist: requests (>=2.0.0,<3.0.0)
54
- Requires-Dist: setuptools (>=75.6)
54
+ Requires-Dist: setuptools (>=78.1)
55
55
  Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
56
56
  Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
57
57
  Requires-Dist: sqlalchemy (>=1.4,<1.5)
@@ -210,6 +210,18 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
210
210
 
211
211
  # Changelog
212
212
 
213
+ ## 0.24.7 - 2025-04-07
214
+
215
+ * Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
216
+
217
+ ## 0.24.6 - 2025-04-03
218
+
219
+ * Domo - extract cards metadata by batch to prevent from hitting URL max length
220
+
221
+ ## 0.24.5 - 2025-04-02
222
+
223
+ * bump dependencies: google-cloud-storage
224
+
213
225
  ## 0.24.4 - 2025-03-19
214
226
 
215
227
  * Snowflake:
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=1Y5FmmQDspwZaOhKjnJosP2sNd898LeTOmVIMTBt9Bw,16387
1
+ CHANGELOG.md,sha256=UWuENqrKnLu244f4Of6dtZ59XZ7jrLWkcQni3MqXPBg,16667
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -77,9 +77,11 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
77
77
  castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
78
78
  castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
79
79
  castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
80
- castor_extractor/utils/__init__.py,sha256=X7WOOgrpGf7Vh8r-7eNGjuC0rKs0g9GTO3d7hZ18gwo,1550
80
+ castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
81
81
  castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
82
82
  castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
83
+ castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
84
+ castor_extractor/utils/batch_test.py,sha256=84JYXOxiTkZFAceVh0mzN6VtKxcqoFPbxkZfIDyLGlg,606
83
85
  castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
84
86
  castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHkaDy_uknKz8B0,2075
85
87
  castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
@@ -146,7 +148,7 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
146
148
  castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
147
149
  castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
148
150
  castor_extractor/visualization/domo/client/__init__.py,sha256=Do0fU4B8Hhlhahcv734gnJl_ryCztfTBDea7XNCKfB8,72
149
- castor_extractor/visualization/domo/client/client.py,sha256=vOMBY5dY6N3v55YJPdh9aoiddXnuLnGoFHLE5BeUKSg,9662
151
+ castor_extractor/visualization/domo/client/client.py,sha256=bgzXWUm-UnTIwgyJKaJkoHzQpDYwWCGCe97MsMFw6ng,9930
150
152
  castor_extractor/visualization/domo/client/credentials.py,sha256=4gnsk4Tpt3ggdUYbvyNPJEXeCyTy12s-X24P5hFdULg,873
151
153
  castor_extractor/visualization/domo/client/endpoints.py,sha256=eIE9oeZ_cmJSWWDuyxh6JaAOs3y5bTJQQ265HYgpulE,2775
152
154
  castor_extractor/visualization/domo/client/pagination.py,sha256=ukVkHVzoH4mfZ29H9YcnC2YrdVolP10wv25J6Q3ehRw,821
@@ -264,12 +266,12 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
264
266
  castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
265
267
  castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
266
268
  castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
267
- castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fIBsSbRTypBABsCoigO2dkKsw4Eu3GrsEPTDfjY8A80,4303
269
+ castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=VHNV1Q0EVKuiFKm1yKSx4tIuPGww4Mlw3yui2DgKe7I,4196
268
270
  castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
269
271
  castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
270
272
  castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
271
273
  castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
272
- castor_extractor/visualization/tableau/client/gql_queries.py,sha256=NISarYh33Ij7DhYxqjTdv681AHYpbft8kPwVUQbAZ7U,2190
274
+ castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
273
275
  castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
274
276
  castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
275
277
  castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
@@ -403,8 +405,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
403
405
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
404
406
  castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
405
407
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
406
- castor_extractor-0.24.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
407
- castor_extractor-0.24.4.dist-info/METADATA,sha256=eY2TPP3IDq9an2JJzoZcN-_rG5DJIGzbJOqEtGBhzd4,23543
408
- castor_extractor-0.24.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
409
- castor_extractor-0.24.4.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
410
- castor_extractor-0.24.4.dist-info/RECORD,,
408
+ castor_extractor-0.24.7.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
409
+ castor_extractor-0.24.7.dist-info/METADATA,sha256=qWp3OBv1FO123RJqz2YKTEd12WzhKoDmcxVZLhvzn6M,23831
410
+ castor_extractor-0.24.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
411
+ castor_extractor-0.24.7.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
412
+ castor_extractor-0.24.7.dist-info/RECORD,,