castor-extractor 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.10.1 - 2023-12-04
4
+
5
+ * Domo: fix pagination
6
+
3
7
  ## 0.10.0 - 2023-11-28
4
8
 
5
9
  * Looker : extract all Looker Explores, even if unused in Dashboards
@@ -128,7 +128,10 @@ class DomoClient:
128
128
  while pagination.needs_increment:
129
129
  results = self._get_many(
130
130
  endpoint=endpoint,
131
- params={"offset": pagination.offset},
131
+ params={
132
+ "offset": pagination.offset,
133
+ "limit": pagination.per_page,
134
+ },
132
135
  )
133
136
  all_results.extend(results)
134
137
  number_of_items = len(results)
@@ -1,23 +1,28 @@
1
1
  from dataclasses import dataclass
2
+ from typing import Optional
2
3
 
3
- PER_PAGE = 50
4
+ PER_PAGE = 50 # maximum value accepted by DOMO is 50
4
5
 
5
6
 
6
7
  @dataclass
7
8
  class Pagination:
8
9
  """Handles pagination within DOMO Api"""
9
10
 
10
- number_results: int = PER_PAGE # max init
11
+ number_results: Optional[int] = None
11
12
  offset: int = 0
12
13
  per_page: int = PER_PAGE
13
14
  should_stop: bool = False
14
15
 
15
16
  @property
16
17
  def needs_increment(self) -> bool:
18
+ if self.number_results is None:
19
+ return True # first iteration
20
+
17
21
  if (self.number_results < self.per_page) or self.should_stop:
18
22
  return False
23
+
19
24
  return True
20
25
 
21
26
  def increment_offset(self, number_results: int) -> None:
22
- self.offset += self.per_page
27
+ self.offset += number_results
23
28
  self.number_results = number_results
@@ -0,0 +1,22 @@
1
+ from .pagination import Pagination
2
+
3
+
4
+ def test_pagination():
5
+ per_page = 20
6
+
7
+ pagination = Pagination(per_page=per_page)
8
+
9
+ assert pagination.number_results is None
10
+ assert pagination.offset == 0
11
+
12
+ pagination.increment_offset(per_page)
13
+ assert pagination.offset == per_page
14
+ assert pagination.needs_increment
15
+
16
+ pagination.increment_offset(per_page)
17
+ assert pagination.offset == per_page * 2
18
+ assert pagination.needs_increment
19
+
20
+ pagination.increment_offset(5)
21
+ assert pagination.offset == per_page * 2 + 5
22
+ assert not pagination.needs_increment
@@ -1,8 +1,12 @@
1
1
  from typing import List, Optional, Set, Tuple
2
2
 
3
+ from google.api_core.exceptions import Forbidden
4
+ from google.api_core.page_iterator import Iterator as PageIterator
3
5
  from google.cloud.bigquery import Client as GoogleCloudClient # type: ignore
6
+ from google.cloud.bigquery.dataset import Dataset # type: ignore
4
7
  from google.oauth2.service_account import Credentials # type: ignore
5
8
 
9
+ from ...utils import retry
6
10
  from ..abstract import SqlalchemyClient
7
11
 
8
12
  BIGQUERY_URI = "bigquery://"
@@ -10,6 +14,9 @@ BIGQUERY_URI = "bigquery://"
10
14
  CREDENTIALS_INFO_KEY = "credentials_info"
11
15
  PROJECT_ID_KEY = "project_id"
12
16
 
17
+ _RETRY_NUMBER = 1
18
+ _RETRY_BASE_MS = 60_000
19
+
13
20
 
14
21
  class BigQueryClient(SqlalchemyClient):
15
22
  """Connect to BigQuery and run SQL queries"""
@@ -25,6 +32,9 @@ class BigQueryClient(SqlalchemyClient):
25
32
  self._db_allowed = db_allowed
26
33
  self._db_blocked = db_blocked
27
34
  self._dataset_blocked = dataset_blocked
35
+ self.client = self._client()
36
+ self._projects: List[str] | None = None
37
+ self._datasets: List[Dataset] | None = None
28
38
 
29
39
  @staticmethod
30
40
  def name() -> str:
@@ -51,7 +61,7 @@ class BigQueryClient(SqlalchemyClient):
51
61
  def _build_uri(self, credentials: dict) -> str:
52
62
  return BIGQUERY_URI
53
63
 
54
- def _google_cloud_client(self) -> GoogleCloudClient:
64
+ def _client(self) -> GoogleCloudClient:
55
65
  assert (
56
66
  CREDENTIALS_INFO_KEY in self._options
57
67
  ), "Missing BigQuery credentials in engine's options"
@@ -61,25 +71,47 @@ class BigQueryClient(SqlalchemyClient):
61
71
  credentials=Credentials.from_service_account_info(credentials),
62
72
  )
63
73
 
64
- def _list_datasets(self) -> List:
65
- client = self._google_cloud_client()
66
- return [
67
- dataset
68
- for project_id in self.get_projects()
69
- for dataset in client.list_datasets(project_id)
70
- if self._keep_dataset(dataset.dataset_id)
71
- ]
74
+ def _list_datasets(self) -> List[Dataset]:
75
+ """
76
+ Returns datasets available for the given GCP client
77
+ Cache the result in self._datasets to reduce number of API calls
78
+ """
79
+ if self._datasets is None:
80
+ self._datasets = [
81
+ dataset
82
+ for project_id in self.get_projects()
83
+ for dataset in self.client.list_datasets(project_id)
84
+ if self._keep_dataset(dataset.dataset_id)
85
+ ]
86
+ return self._datasets
87
+
88
+ @retry((Forbidden,), count=_RETRY_NUMBER, base_ms=_RETRY_BASE_MS)
89
+ def _list_projects(self) -> PageIterator:
90
+ """
91
+ Note: Calling list_projects from GoogleCloudClient causes some
92
+ ```
93
+ google.api_core.exceptions.Forbidden: 403 GET https://bigquery.googleapis.com/bigquery/v2/projects?prettyPrint=false
94
+ Quota exceeded: Your user exceeded quota for concurrent project.lists requests.
95
+ ````
96
+
97
+ This function aims to isolate the call with a custom retry strategy.
98
+ Note that google allows a retry parameter on client.list_projects but
99
+ that looks way too complex to customize.
100
+ """
101
+ return self.client.list_projects()
72
102
 
73
103
  def get_projects(self) -> List[str]:
74
104
  """
75
105
  Returns distinct project_id available for the given GCP client
106
+ Cache the result in self._projects to reduce number of API calls
76
107
  """
77
- client = self._google_cloud_client()
78
- return [
79
- p.project_id
80
- for p in client.list_projects()
81
- if self._keep_project(p.project_id)
82
- ]
108
+ if self._projects is None:
109
+ self._projects = [
110
+ p.project_id
111
+ for p in self._list_projects()
112
+ if self._keep_project(p.project_id)
113
+ ]
114
+ return self._projects
83
115
 
84
116
  def get_regions(self) -> Set[Tuple[str, str]]:
85
117
  """
@@ -26,8 +26,11 @@ class MockBigQueryClient(BigQueryClient):
26
26
  self._db_allowed = ["project_2", "project_1"]
27
27
  self._dataset_blocked = ["hidden_dataset"]
28
28
  self._db_blocked = ["hidden_project"]
29
+ self._projects = None
30
+ self._datasets = None
31
+ self.client = self._client()
29
32
 
30
- def _google_cloud_client(self) -> Mock:
33
+ def _client(self) -> Mock:
31
34
  fake_client = Mock()
32
35
  fake_client.list_projects = Mock(
33
36
  return_value=[
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.10.0
3
+ Version: 0.10.1
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=-SDIu5XMRtiE4Pp7-F8aCJteadjcjuxDwdvRIjKefBw,7349
1
+ CHANGELOG.md,sha256=YHDD-wLJh1LKqODJfg07WPSIj6pPmkI8F0za9PzKFiw,7397
2
2
  Dockerfile,sha256=TC6hFjG3mvnt1nkw2EpaS42hRYaGA2YIPKgWhVSKTWc,303
3
3
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
4
4
  README.md,sha256=EL6JpZxvaQFOYv5WFuSjZvSk9Hcpsf7alMlUC5IPFjA,3423
@@ -82,11 +82,12 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
82
82
  castor_extractor/visualization/domo/__init__.py,sha256=_mAYVfoVLizfLGF_f6ZiwBhdPpvoJY_diySf33dt3Jo,127
83
83
  castor_extractor/visualization/domo/assets.py,sha256=JI45W7n5z_opbuRrCwgzQJuN-VikuRrilUj5g5lX7Hk,184
84
84
  castor_extractor/visualization/domo/client/__init__.py,sha256=UDszV3IXNC9Wp_j55NZ-6ey2INo0TYtAg2QNIJOjglE,88
85
- castor_extractor/visualization/domo/client/client.py,sha256=QbbvkCgCzNg1SH0W5oE2NWaIBfQypAn07dV2BaLQX-o,8994
85
+ castor_extractor/visualization/domo/client/client.py,sha256=MN-qleOpSVawZRlVL8EiglEOMj0cdvMqEbgYYBfz86w,9083
86
86
  castor_extractor/visualization/domo/client/client_test.py,sha256=5Z_C2B0fs60aGnMF78llBnRkzehH4tZqRPXPkccLvBM,583
87
87
  castor_extractor/visualization/domo/client/credentials.py,sha256=CksQ9W9X6IGjTlYN0okwGAmURMRJKAjctxODAvAJUAo,1148
88
88
  castor_extractor/visualization/domo/client/endpoints.py,sha256=-B7mRKJ44Bg0hb3E5dQXvCVK6qHzizdeSQXsSwEJEIY,1812
89
- castor_extractor/visualization/domo/client/pagination.py,sha256=BT9ZIb-GYGWEQZpK_80aAYKQ-xrr7VlfsFbZv9rcejQ,565
89
+ castor_extractor/visualization/domo/client/pagination.py,sha256=E3WMK9Uw-u5qt9LCUzwKdKh9oSzyFEC0GgnRMFgxgrs,713
90
+ castor_extractor/visualization/domo/client/pagination_test.py,sha256=nV4yZWfus13QFCr-tlBUgwva21VqfpF6P-0ks_Awwis,581
90
91
  castor_extractor/visualization/domo/constants.py,sha256=AriJZPrCY5Z3HRUANrMu-4U0b7hQK_jRDcxiB-hbrQ4,233
91
92
  castor_extractor/visualization/domo/extract.py,sha256=GWWRfPEMt4SgzBGFaTcoOabsoOqLRFIEFAtgXwb8LDI,2567
92
93
  castor_extractor/visualization/looker/__init__.py,sha256=Xu5bJ3743kaP8szMMp2NXCgvM1EdOQgtic4utUlO9Cc,145
@@ -221,8 +222,8 @@ castor_extractor/warehouse/abstract/query.py,sha256=GAgeISCmAdrkTKzFGO79hQDf6SA6
221
222
  castor_extractor/warehouse/abstract/time_filter.py,sha256=bggIONfMmUxffkA6TwM3BsjfS2l9WFxPq8krfsau5pw,935
222
223
  castor_extractor/warehouse/abstract/time_filter_test.py,sha256=PIkegB7KOKBdpc6zIvmyl_CeQyADeFDplyQ8HTNU5LA,448
223
224
  castor_extractor/warehouse/bigquery/__init__.py,sha256=cQTw-nkKg3CQemgufknJHPssvUWCPURTMVb6q5hiIcs,125
224
- castor_extractor/warehouse/bigquery/client.py,sha256=EHwTIoKR6P-W-_rGT5mYGXHdia8d7XjlxwstTVNQYmY,3062
225
- castor_extractor/warehouse/bigquery/client_test.py,sha256=ws7JMYW_-7Ewo7XLHLibuOkemfTN87goZwYc-QufCMY,1430
225
+ castor_extractor/warehouse/bigquery/client.py,sha256=hxz8nYx8O60Ve48hDdQ0vEJEEpRgXdMXos7kJj4fI3w,4483
226
+ castor_extractor/warehouse/bigquery/client_test.py,sha256=Ym8e4d--0YQwiVcNUnXLx0X-X6ZznwNMBMbMaDS5oEA,1514
226
227
  castor_extractor/warehouse/bigquery/extract.py,sha256=jo_9sxsCFl0ZaL1VdQ9JZ5iEEGJQVm_ogJhfpmVgT3k,2810
227
228
  castor_extractor/warehouse/bigquery/queries/.sqlfluff,sha256=ce8UDW2k39v6RBVxgKqjOHHYMoGN9S9f7BCZNHHhox8,30
228
229
  castor_extractor/warehouse/bigquery/queries/column.sql,sha256=NxdTnHwomHTEGSc-UoXFKUwg59I9XAOwrSau7JUqGQE,1815
@@ -289,7 +290,7 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
289
290
  castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
290
291
  castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
291
292
  castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
292
- castor_extractor-0.10.0.dist-info/METADATA,sha256=XnrJ7dly9vVplF9ksw1thIrfr3Y215iEPCJC4nfs_-A,5880
293
- castor_extractor-0.10.0.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
294
- castor_extractor-0.10.0.dist-info/entry_points.txt,sha256=cvLvgE8Yi10sIiafUVL86XZPMUUyu9x11CF5PshAyiw,1045
295
- castor_extractor-0.10.0.dist-info/RECORD,,
293
+ castor_extractor-0.10.1.dist-info/METADATA,sha256=5lkBM0b76v9IbuKEBQ9lnq5vCeuP0Aqa0JyizNnbMc8,5880
294
+ castor_extractor-0.10.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
295
+ castor_extractor-0.10.1.dist-info/entry_points.txt,sha256=cvLvgE8Yi10sIiafUVL86XZPMUUyu9x11CF5PshAyiw,1045
296
+ castor_extractor-0.10.1.dist-info/RECORD,,