castor-extractor 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/visualization/domo/client/client.py +4 -1
- castor_extractor/visualization/domo/client/pagination.py +8 -3
- castor_extractor/visualization/domo/client/pagination_test.py +22 -0
- castor_extractor/warehouse/bigquery/client.py +47 -15
- castor_extractor/warehouse/bigquery/client_test.py +4 -1
- {castor_extractor-0.10.0.dist-info → castor_extractor-0.10.1.dist-info}/METADATA +1 -1
- {castor_extractor-0.10.0.dist-info → castor_extractor-0.10.1.dist-info}/RECORD +10 -9
- {castor_extractor-0.10.0.dist-info → castor_extractor-0.10.1.dist-info}/WHEEL +0 -0
- {castor_extractor-0.10.0.dist-info → castor_extractor-0.10.1.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -128,7 +128,10 @@ class DomoClient:
|
|
|
128
128
|
while pagination.needs_increment:
|
|
129
129
|
results = self._get_many(
|
|
130
130
|
endpoint=endpoint,
|
|
131
|
-
params={
|
|
131
|
+
params={
|
|
132
|
+
"offset": pagination.offset,
|
|
133
|
+
"limit": pagination.per_page,
|
|
134
|
+
},
|
|
132
135
|
)
|
|
133
136
|
all_results.extend(results)
|
|
134
137
|
number_of_items = len(results)
|
|
@@ -1,23 +1,28 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
|
-
PER_PAGE = 50
|
|
4
|
+
PER_PAGE = 50 # maximum value accepted by DOMO is 50
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
@dataclass
|
|
7
8
|
class Pagination:
|
|
8
9
|
"""Handles pagination within DOMO Api"""
|
|
9
10
|
|
|
10
|
-
number_results: int =
|
|
11
|
+
number_results: Optional[int] = None
|
|
11
12
|
offset: int = 0
|
|
12
13
|
per_page: int = PER_PAGE
|
|
13
14
|
should_stop: bool = False
|
|
14
15
|
|
|
15
16
|
@property
|
|
16
17
|
def needs_increment(self) -> bool:
|
|
18
|
+
if self.number_results is None:
|
|
19
|
+
return True # first iteration
|
|
20
|
+
|
|
17
21
|
if (self.number_results < self.per_page) or self.should_stop:
|
|
18
22
|
return False
|
|
23
|
+
|
|
19
24
|
return True
|
|
20
25
|
|
|
21
26
|
def increment_offset(self, number_results: int) -> None:
|
|
22
|
-
self.offset +=
|
|
27
|
+
self.offset += number_results
|
|
23
28
|
self.number_results = number_results
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .pagination import Pagination
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_pagination():
|
|
5
|
+
per_page = 20
|
|
6
|
+
|
|
7
|
+
pagination = Pagination(per_page=per_page)
|
|
8
|
+
|
|
9
|
+
assert pagination.number_results is None
|
|
10
|
+
assert pagination.offset == 0
|
|
11
|
+
|
|
12
|
+
pagination.increment_offset(per_page)
|
|
13
|
+
assert pagination.offset == per_page
|
|
14
|
+
assert pagination.needs_increment
|
|
15
|
+
|
|
16
|
+
pagination.increment_offset(per_page)
|
|
17
|
+
assert pagination.offset == per_page * 2
|
|
18
|
+
assert pagination.needs_increment
|
|
19
|
+
|
|
20
|
+
pagination.increment_offset(5)
|
|
21
|
+
assert pagination.offset == per_page * 2 + 5
|
|
22
|
+
assert not pagination.needs_increment
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
from typing import List, Optional, Set, Tuple
|
|
2
2
|
|
|
3
|
+
from google.api_core.exceptions import Forbidden
|
|
4
|
+
from google.api_core.page_iterator import Iterator as PageIterator
|
|
3
5
|
from google.cloud.bigquery import Client as GoogleCloudClient # type: ignore
|
|
6
|
+
from google.cloud.bigquery.dataset import Dataset # type: ignore
|
|
4
7
|
from google.oauth2.service_account import Credentials # type: ignore
|
|
5
8
|
|
|
9
|
+
from ...utils import retry
|
|
6
10
|
from ..abstract import SqlalchemyClient
|
|
7
11
|
|
|
8
12
|
BIGQUERY_URI = "bigquery://"
|
|
@@ -10,6 +14,9 @@ BIGQUERY_URI = "bigquery://"
|
|
|
10
14
|
CREDENTIALS_INFO_KEY = "credentials_info"
|
|
11
15
|
PROJECT_ID_KEY = "project_id"
|
|
12
16
|
|
|
17
|
+
_RETRY_NUMBER = 1
|
|
18
|
+
_RETRY_BASE_MS = 60_000
|
|
19
|
+
|
|
13
20
|
|
|
14
21
|
class BigQueryClient(SqlalchemyClient):
|
|
15
22
|
"""Connect to BigQuery and run SQL queries"""
|
|
@@ -25,6 +32,9 @@ class BigQueryClient(SqlalchemyClient):
|
|
|
25
32
|
self._db_allowed = db_allowed
|
|
26
33
|
self._db_blocked = db_blocked
|
|
27
34
|
self._dataset_blocked = dataset_blocked
|
|
35
|
+
self.client = self._client()
|
|
36
|
+
self._projects: List[str] | None = None
|
|
37
|
+
self._datasets: List[Dataset] | None = None
|
|
28
38
|
|
|
29
39
|
@staticmethod
|
|
30
40
|
def name() -> str:
|
|
@@ -51,7 +61,7 @@ class BigQueryClient(SqlalchemyClient):
|
|
|
51
61
|
def _build_uri(self, credentials: dict) -> str:
|
|
52
62
|
return BIGQUERY_URI
|
|
53
63
|
|
|
54
|
-
def
|
|
64
|
+
def _client(self) -> GoogleCloudClient:
|
|
55
65
|
assert (
|
|
56
66
|
CREDENTIALS_INFO_KEY in self._options
|
|
57
67
|
), "Missing BigQuery credentials in engine's options"
|
|
@@ -61,25 +71,47 @@ class BigQueryClient(SqlalchemyClient):
|
|
|
61
71
|
credentials=Credentials.from_service_account_info(credentials),
|
|
62
72
|
)
|
|
63
73
|
|
|
64
|
-
def _list_datasets(self) -> List:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
def _list_datasets(self) -> List[Dataset]:
|
|
75
|
+
"""
|
|
76
|
+
Returns datasets available for the given GCP client
|
|
77
|
+
Cache the result in self._datasets to reduce number of API calls
|
|
78
|
+
"""
|
|
79
|
+
if self._datasets is None:
|
|
80
|
+
self._datasets = [
|
|
81
|
+
dataset
|
|
82
|
+
for project_id in self.get_projects()
|
|
83
|
+
for dataset in self.client.list_datasets(project_id)
|
|
84
|
+
if self._keep_dataset(dataset.dataset_id)
|
|
85
|
+
]
|
|
86
|
+
return self._datasets
|
|
87
|
+
|
|
88
|
+
@retry((Forbidden,), count=_RETRY_NUMBER, base_ms=_RETRY_BASE_MS)
|
|
89
|
+
def _list_projects(self) -> PageIterator:
|
|
90
|
+
"""
|
|
91
|
+
Note: Calling list_projects from GoogleCloudClient causes some
|
|
92
|
+
```
|
|
93
|
+
google.api_core.exceptions.Forbidden: 403 GET https://bigquery.googleapis.com/bigquery/v2/projects?prettyPrint=false
|
|
94
|
+
Quota exceeded: Your user exceeded quota for concurrent project.lists requests.
|
|
95
|
+
````
|
|
96
|
+
|
|
97
|
+
This function aims to isolate the call with a custom retry strategy.
|
|
98
|
+
Note that google allows a retry parameter on client.list_projects but
|
|
99
|
+
that looks way too complex to customize.
|
|
100
|
+
"""
|
|
101
|
+
return self.client.list_projects()
|
|
72
102
|
|
|
73
103
|
def get_projects(self) -> List[str]:
|
|
74
104
|
"""
|
|
75
105
|
Returns distinct project_id available for the given GCP client
|
|
106
|
+
Cache the result in self._projects to reduce number of API calls
|
|
76
107
|
"""
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
108
|
+
if self._projects is None:
|
|
109
|
+
self._projects = [
|
|
110
|
+
p.project_id
|
|
111
|
+
for p in self._list_projects()
|
|
112
|
+
if self._keep_project(p.project_id)
|
|
113
|
+
]
|
|
114
|
+
return self._projects
|
|
83
115
|
|
|
84
116
|
def get_regions(self) -> Set[Tuple[str, str]]:
|
|
85
117
|
"""
|
|
@@ -26,8 +26,11 @@ class MockBigQueryClient(BigQueryClient):
|
|
|
26
26
|
self._db_allowed = ["project_2", "project_1"]
|
|
27
27
|
self._dataset_blocked = ["hidden_dataset"]
|
|
28
28
|
self._db_blocked = ["hidden_project"]
|
|
29
|
+
self._projects = None
|
|
30
|
+
self._datasets = None
|
|
31
|
+
self.client = self._client()
|
|
29
32
|
|
|
30
|
-
def
|
|
33
|
+
def _client(self) -> Mock:
|
|
31
34
|
fake_client = Mock()
|
|
32
35
|
fake_client.list_projects = Mock(
|
|
33
36
|
return_value=[
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256
|
|
1
|
+
CHANGELOG.md,sha256=YHDD-wLJh1LKqODJfg07WPSIj6pPmkI8F0za9PzKFiw,7397
|
|
2
2
|
Dockerfile,sha256=TC6hFjG3mvnt1nkw2EpaS42hRYaGA2YIPKgWhVSKTWc,303
|
|
3
3
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
4
4
|
README.md,sha256=EL6JpZxvaQFOYv5WFuSjZvSk9Hcpsf7alMlUC5IPFjA,3423
|
|
@@ -82,11 +82,12 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
82
82
|
castor_extractor/visualization/domo/__init__.py,sha256=_mAYVfoVLizfLGF_f6ZiwBhdPpvoJY_diySf33dt3Jo,127
|
|
83
83
|
castor_extractor/visualization/domo/assets.py,sha256=JI45W7n5z_opbuRrCwgzQJuN-VikuRrilUj5g5lX7Hk,184
|
|
84
84
|
castor_extractor/visualization/domo/client/__init__.py,sha256=UDszV3IXNC9Wp_j55NZ-6ey2INo0TYtAg2QNIJOjglE,88
|
|
85
|
-
castor_extractor/visualization/domo/client/client.py,sha256=
|
|
85
|
+
castor_extractor/visualization/domo/client/client.py,sha256=MN-qleOpSVawZRlVL8EiglEOMj0cdvMqEbgYYBfz86w,9083
|
|
86
86
|
castor_extractor/visualization/domo/client/client_test.py,sha256=5Z_C2B0fs60aGnMF78llBnRkzehH4tZqRPXPkccLvBM,583
|
|
87
87
|
castor_extractor/visualization/domo/client/credentials.py,sha256=CksQ9W9X6IGjTlYN0okwGAmURMRJKAjctxODAvAJUAo,1148
|
|
88
88
|
castor_extractor/visualization/domo/client/endpoints.py,sha256=-B7mRKJ44Bg0hb3E5dQXvCVK6qHzizdeSQXsSwEJEIY,1812
|
|
89
|
-
castor_extractor/visualization/domo/client/pagination.py,sha256=
|
|
89
|
+
castor_extractor/visualization/domo/client/pagination.py,sha256=E3WMK9Uw-u5qt9LCUzwKdKh9oSzyFEC0GgnRMFgxgrs,713
|
|
90
|
+
castor_extractor/visualization/domo/client/pagination_test.py,sha256=nV4yZWfus13QFCr-tlBUgwva21VqfpF6P-0ks_Awwis,581
|
|
90
91
|
castor_extractor/visualization/domo/constants.py,sha256=AriJZPrCY5Z3HRUANrMu-4U0b7hQK_jRDcxiB-hbrQ4,233
|
|
91
92
|
castor_extractor/visualization/domo/extract.py,sha256=GWWRfPEMt4SgzBGFaTcoOabsoOqLRFIEFAtgXwb8LDI,2567
|
|
92
93
|
castor_extractor/visualization/looker/__init__.py,sha256=Xu5bJ3743kaP8szMMp2NXCgvM1EdOQgtic4utUlO9Cc,145
|
|
@@ -221,8 +222,8 @@ castor_extractor/warehouse/abstract/query.py,sha256=GAgeISCmAdrkTKzFGO79hQDf6SA6
|
|
|
221
222
|
castor_extractor/warehouse/abstract/time_filter.py,sha256=bggIONfMmUxffkA6TwM3BsjfS2l9WFxPq8krfsau5pw,935
|
|
222
223
|
castor_extractor/warehouse/abstract/time_filter_test.py,sha256=PIkegB7KOKBdpc6zIvmyl_CeQyADeFDplyQ8HTNU5LA,448
|
|
223
224
|
castor_extractor/warehouse/bigquery/__init__.py,sha256=cQTw-nkKg3CQemgufknJHPssvUWCPURTMVb6q5hiIcs,125
|
|
224
|
-
castor_extractor/warehouse/bigquery/client.py,sha256=
|
|
225
|
-
castor_extractor/warehouse/bigquery/client_test.py,sha256=
|
|
225
|
+
castor_extractor/warehouse/bigquery/client.py,sha256=hxz8nYx8O60Ve48hDdQ0vEJEEpRgXdMXos7kJj4fI3w,4483
|
|
226
|
+
castor_extractor/warehouse/bigquery/client_test.py,sha256=Ym8e4d--0YQwiVcNUnXLx0X-X6ZznwNMBMbMaDS5oEA,1514
|
|
226
227
|
castor_extractor/warehouse/bigquery/extract.py,sha256=jo_9sxsCFl0ZaL1VdQ9JZ5iEEGJQVm_ogJhfpmVgT3k,2810
|
|
227
228
|
castor_extractor/warehouse/bigquery/queries/.sqlfluff,sha256=ce8UDW2k39v6RBVxgKqjOHHYMoGN9S9f7BCZNHHhox8,30
|
|
228
229
|
castor_extractor/warehouse/bigquery/queries/column.sql,sha256=NxdTnHwomHTEGSc-UoXFKUwg59I9XAOwrSau7JUqGQE,1815
|
|
@@ -289,7 +290,7 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
|
|
|
289
290
|
castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
|
|
290
291
|
castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
|
|
291
292
|
castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
|
|
292
|
-
castor_extractor-0.10.
|
|
293
|
-
castor_extractor-0.10.
|
|
294
|
-
castor_extractor-0.10.
|
|
295
|
-
castor_extractor-0.10.
|
|
293
|
+
castor_extractor-0.10.1.dist-info/METADATA,sha256=5lkBM0b76v9IbuKEBQ9lnq5vCeuP0Aqa0JyizNnbMc8,5880
|
|
294
|
+
castor_extractor-0.10.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
295
|
+
castor_extractor-0.10.1.dist-info/entry_points.txt,sha256=cvLvgE8Yi10sIiafUVL86XZPMUUyu9x11CF5PshAyiw,1045
|
|
296
|
+
castor_extractor-0.10.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|