castor-extractor 0.19.0__py3-none-any.whl → 0.19.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (83) hide show
  1. CHANGELOG.md +29 -2
  2. castor_extractor/file_checker/templates/generic_warehouse.py +1 -1
  3. castor_extractor/knowledge/notion/client/client.py +44 -80
  4. castor_extractor/knowledge/notion/client/client_test.py +9 -4
  5. castor_extractor/knowledge/notion/client/constants.py +1 -0
  6. castor_extractor/knowledge/notion/client/endpoints.py +1 -1
  7. castor_extractor/knowledge/notion/client/pagination.py +9 -5
  8. castor_extractor/quality/soda/assets.py +1 -1
  9. castor_extractor/quality/soda/client/client.py +30 -83
  10. castor_extractor/quality/soda/client/credentials.py +0 -11
  11. castor_extractor/quality/soda/client/endpoints.py +3 -6
  12. castor_extractor/quality/soda/client/pagination.py +25 -0
  13. castor_extractor/utils/__init__.py +13 -2
  14. castor_extractor/utils/client/__init__.py +14 -0
  15. castor_extractor/utils/client/api/__init__.py +5 -0
  16. castor_extractor/utils/client/api/auth.py +76 -0
  17. castor_extractor/utils/client/api/auth_test.py +49 -0
  18. castor_extractor/utils/client/api/client.py +153 -0
  19. castor_extractor/utils/client/api/client_test.py +47 -0
  20. castor_extractor/utils/client/api/pagination.py +83 -0
  21. castor_extractor/utils/client/api/pagination_test.py +51 -0
  22. castor_extractor/utils/{safe_request_test.py → client/api/safe_request_test.py} +4 -1
  23. castor_extractor/utils/client/api/utils.py +9 -0
  24. castor_extractor/utils/client/api/utils_test.py +16 -0
  25. castor_extractor/utils/collection.py +34 -2
  26. castor_extractor/utils/collection_test.py +17 -3
  27. castor_extractor/utils/pager/__init__.py +0 -1
  28. castor_extractor/utils/retry.py +44 -0
  29. castor_extractor/utils/retry_test.py +26 -1
  30. castor_extractor/utils/salesforce/client.py +44 -49
  31. castor_extractor/utils/salesforce/client_test.py +2 -2
  32. castor_extractor/utils/salesforce/pagination.py +33 -0
  33. castor_extractor/visualization/domo/client/client.py +10 -5
  34. castor_extractor/visualization/domo/client/credentials.py +1 -1
  35. castor_extractor/visualization/domo/client/endpoints.py +19 -7
  36. castor_extractor/visualization/looker/api/credentials.py +1 -1
  37. castor_extractor/visualization/metabase/client/api/client.py +26 -11
  38. castor_extractor/visualization/metabase/client/api/credentials.py +1 -1
  39. castor_extractor/visualization/metabase/client/db/credentials.py +1 -1
  40. castor_extractor/visualization/mode/client/credentials.py +1 -1
  41. castor_extractor/visualization/qlik/client/engine/credentials.py +1 -1
  42. castor_extractor/visualization/salesforce_reporting/client/rest.py +4 -3
  43. castor_extractor/visualization/sigma/client/client.py +106 -111
  44. castor_extractor/visualization/sigma/client/credentials.py +11 -1
  45. castor_extractor/visualization/sigma/client/endpoints.py +1 -1
  46. castor_extractor/visualization/sigma/client/pagination.py +22 -18
  47. castor_extractor/visualization/tableau/tests/unit/rest_api/auth_test.py +0 -1
  48. castor_extractor/visualization/tableau/tests/unit/rest_api/credentials_test.py +0 -3
  49. castor_extractor/visualization/tableau_revamp/assets.py +11 -0
  50. castor_extractor/visualization/tableau_revamp/client/client.py +71 -151
  51. castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +95 -0
  52. castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +128 -0
  53. castor_extractor/visualization/tableau_revamp/client/client_tsc.py +66 -0
  54. castor_extractor/visualization/tableau_revamp/client/{tsc_fields.py → rest_fields.py} +15 -2
  55. castor_extractor/visualization/tableau_revamp/constants.py +0 -2
  56. castor_extractor/visualization/tableau_revamp/extract.py +5 -11
  57. castor_extractor/warehouse/databricks/api_client.py +239 -0
  58. castor_extractor/warehouse/databricks/api_client_test.py +15 -0
  59. castor_extractor/warehouse/databricks/client.py +37 -490
  60. castor_extractor/warehouse/databricks/client_test.py +1 -99
  61. castor_extractor/warehouse/databricks/endpoints.py +28 -0
  62. castor_extractor/warehouse/databricks/lineage.py +141 -0
  63. castor_extractor/warehouse/databricks/lineage_test.py +34 -0
  64. castor_extractor/warehouse/databricks/pagination.py +22 -0
  65. castor_extractor/warehouse/databricks/sql_client.py +90 -0
  66. castor_extractor/warehouse/databricks/utils.py +44 -1
  67. castor_extractor/warehouse/databricks/utils_test.py +58 -1
  68. castor_extractor/warehouse/mysql/client.py +0 -2
  69. castor_extractor/warehouse/salesforce/client.py +12 -59
  70. castor_extractor/warehouse/salesforce/pagination.py +34 -0
  71. castor_extractor/warehouse/sqlserver/client.py +0 -1
  72. castor_extractor-0.19.6.dist-info/METADATA +903 -0
  73. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/RECORD +77 -60
  74. castor_extractor/utils/client/api.py +0 -87
  75. castor_extractor/utils/client/api_test.py +0 -24
  76. castor_extractor/utils/pager/pager_on_token.py +0 -52
  77. castor_extractor/utils/pager/pager_on_token_test.py +0 -73
  78. castor_extractor/visualization/sigma/client/client_test.py +0 -54
  79. castor_extractor-0.19.0.dist-info/METADATA +0 -207
  80. /castor_extractor/utils/{safe_request.py → client/api/safe_request.py} +0 -0
  81. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/LICENCE +0 -0
  82. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/WHEEL +0 -0
  83. {castor_extractor-0.19.0.dist-info → castor_extractor-0.19.6.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,14 +1,41 @@
1
+
1
2
  # Changelog
2
3
 
4
+ ## 0.19.6 - 2024-09-03
5
+
6
+ * Metabase: Adding error handler on API call
7
+
8
+ ## 0.19.5 - 2024-09-02
9
+
10
+ * Databricks/Salesforce: Remove deprecated client dependencies
11
+
12
+ ## 0.19.4 - 2024-08-29
13
+
14
+ * Tableau Pulse: extract Metrics and Subscriptions
15
+
16
+ ## 0.19.3 - 2024-08-27
17
+
18
+ - Sigma: Add SafeMode to SigmaClient
19
+
20
+ ## 0.19.2 - 2024-08-23
21
+
22
+ - Reworked APIClient to unify client's behaviours
23
+
24
+ Impacted Technologies: Sigma, Soda, Notion
25
+
26
+ ## 0.19.1 - 2024-08-23
27
+
28
+ * Domo: extract datasources via cards metadata endpoint
29
+
3
30
  ## 0.19.0 - 2024-08-21
4
31
 
5
- * Breaking change Looker CLI:
32
+ * Breaking change Looker CLI:
6
33
 
7
34
  `-u` and `--username` changed to `-c` `--client-id`
8
35
 
9
36
  `-p` and `--password` changed to `-s` `--client-secret`
10
37
 
11
- * Breaking change Metabase CLI:
38
+ * Breaking change Metabase CLI:
12
39
 
13
40
  `-u` and `--username` changed to `-u` `--user`
14
41
 
@@ -1,7 +1,7 @@
1
1
  from typing import Set
2
2
 
3
3
  from ..column import ColumnChecker
4
- from ..constants import COLUMN_TYPES, TABLE_TYPES
4
+ from ..constants import TABLE_TYPES
5
5
  from ..enums import DataType
6
6
  from ..file import FileTemplate
7
7
 
@@ -1,24 +1,13 @@
1
- import logging
1
+ from functools import partial
2
2
  from http import HTTPStatus
3
- from typing import Dict, Iterator
3
+ from typing import Dict, Iterator, Optional
4
4
 
5
- from pydantic import ValidationError
6
-
7
- from ....utils import RequestSafeMode, empty_iterator
8
- from ....utils.client.api import APIClient, HttpMethod
5
+ from ....utils import APIClient, BearerAuth, RequestSafeMode, fetch_all_pages
9
6
  from ..assets import NotionAsset
10
- from .constants import (
11
- CASTOR_NOTION_USER_AGENT,
12
- NOTION_BASE_URL,
13
- NOTION_TIMEOUT_MS,
14
- )
7
+ from .constants import CASTOR_NOTION_USER_AGENT, NOTION_BASE_URL, NOTION_VERSION
15
8
  from .credentials import NotionCredentials
16
- from .endpoints import EndpointFactory
17
- from .pagination import PaginatedResponse
18
-
19
- logger = logging.getLogger("__name__")
20
-
21
- NOTION_VERSION = "2021-08-16"
9
+ from .endpoints import NotionEndpointFactory
10
+ from .pagination import NotionPagination
22
11
 
23
12
  VOLUME_IGNORED = 10
24
13
  IGNORED_ERROR_CODES = (HTTPStatus.BAD_GATEWAY,)
@@ -26,83 +15,64 @@ NOTION_SAFE_MODE = RequestSafeMode(
26
15
  max_errors=VOLUME_IGNORED,
27
16
  status_codes=IGNORED_ERROR_CODES,
28
17
  )
18
+ NOTION_BASE_HEADERS = {
19
+ "Notion-Version": NOTION_VERSION,
20
+ "User-Agent": CASTOR_NOTION_USER_AGENT,
21
+ }
29
22
 
30
23
 
31
24
  def _search_filter(asset: str) -> Dict[str, Dict[str, str]]:
32
25
  return {"filter": {"value": asset, "property": "object"}}
33
26
 
34
27
 
35
- class NotionClient(APIClient):
36
- """Client fetching data from Notion"""
28
+ class NotionAuth(BearerAuth):
29
+ def __init__(self, token: str):
30
+ self.token = token
37
31
 
32
+ def fetch_token(self):
33
+ return self.token
34
+
35
+
36
+ class NotionClient(APIClient):
38
37
  def __init__(
39
38
  self,
40
39
  credentials: NotionCredentials,
41
- timeout: int = NOTION_TIMEOUT_MS,
42
- base_url: str = NOTION_BASE_URL,
43
- user_agent: str = CASTOR_NOTION_USER_AGENT,
44
- safe_mode: RequestSafeMode = NOTION_SAFE_MODE,
45
- ) -> None:
46
- base_headers = {
47
- "Notion-Version": NOTION_VERSION,
48
- "User-Agent": user_agent,
49
- }
50
-
40
+ safe_mode: Optional[RequestSafeMode] = None,
41
+ ):
42
+ auth = NotionAuth(token=credentials.token)
51
43
  super().__init__(
52
- host=base_url,
53
- token=credentials.token,
54
- headers=base_headers,
55
- timeout=timeout,
56
- safe_mode=safe_mode,
44
+ host=NOTION_BASE_URL,
45
+ auth=auth,
46
+ headers=NOTION_BASE_HEADERS,
47
+ safe_mode=safe_mode or NOTION_SAFE_MODE,
57
48
  )
58
49
 
59
- def _request(
60
- self, method: HttpMethod, endpoint: str, params: dict
61
- ) -> Iterator[dict]:
62
- """
63
- API call to Notion:
64
- - If result is paginated, yield all pages
65
- - If not, yield only the response payload
66
- """
67
- built_url = self.build_url(self._host, endpoint)
68
- response_payload = self._call(
69
- method=method,
70
- url=built_url,
71
- params=params if params and method == "GET" else None,
72
- data=params if params and method == "POST" else None,
73
- )
74
- try:
75
- paginated_response = PaginatedResponse(**response_payload)
76
- yield from paginated_response.results
77
-
78
- if not paginated_response.has_more:
79
- return empty_iterator()
80
-
81
- yield from self._request(
82
- method=method,
83
- endpoint=endpoint,
84
- params=paginated_response.start_cursor,
85
- )
86
-
87
- except ValidationError:
88
- yield response_payload
50
+ def users(self) -> Iterator[dict]:
51
+ request = partial(self._get, endpoint=NotionEndpointFactory.users())
52
+ yield from fetch_all_pages(request, NotionPagination)
89
53
 
90
54
  def _page_listing(self) -> Iterator[dict]:
91
- return self._request(
92
- method="POST",
93
- endpoint=EndpointFactory.search(),
94
- params=_search_filter("page"),
55
+ request = partial(
56
+ self._post,
57
+ endpoint=NotionEndpointFactory.search(),
58
+ data=_search_filter("page"),
95
59
  )
60
+ yield from fetch_all_pages(request, NotionPagination)
96
61
 
97
62
  def _blocks(self, block_id: str) -> Iterator[dict]:
98
- return self._request("GET", EndpointFactory.blocks(block_id), {})
63
+ request = partial(
64
+ self._get,
65
+ endpoint=NotionEndpointFactory.blocks(block_id),
66
+ )
67
+ yield from fetch_all_pages(request, NotionPagination)
99
68
 
100
69
  def databases(self) -> Iterator[dict]:
101
- return self._request(
102
- method="POST",
103
- endpoint=EndpointFactory.search(),
104
- params=_search_filter("database"),
70
+ request = partial(
71
+ self._post,
72
+ endpoint=NotionEndpointFactory.search(),
73
+ data=_search_filter("database"),
105
74
  )
75
+ yield from fetch_all_pages(request, NotionPagination)
106
76
 
107
77
  def recursive_blocks(self, block_id: str) -> Iterator[dict]:
108
78
  """Fetch recursively all children blocks of a given block or page"""
@@ -116,9 +86,7 @@ class NotionClient(APIClient):
116
86
 
117
87
  def pages(self) -> Iterator[dict]:
118
88
  """Fetch all pages with its whole content"""
119
- pages = list(self._page_listing())
120
- logger.info(f"Extracting {len(pages)} pages ...")
121
- for page in pages:
89
+ for page in self._page_listing():
122
90
  if page.get("object") == "database":
123
91
  # Notion Search API filter for page doesn't work
124
92
  continue
@@ -126,10 +94,6 @@ class NotionClient(APIClient):
126
94
  page["child_blocks"] = content
127
95
  yield page
128
96
 
129
- def users(self) -> Iterator[dict]:
130
- """Fetch all users"""
131
- return self._request("GET", EndpointFactory.users(), {})
132
-
133
97
  def fetch(self, asset: NotionAsset) -> Iterator[dict]:
134
98
  """Returns the needed metadata for the queried asset"""
135
99
  if asset == NotionAsset.PAGES:
@@ -1,7 +1,10 @@
1
+ from functools import partial
1
2
  from unittest.mock import patch
2
3
 
4
+ from ....utils import fetch_all_pages
3
5
  from .client import NotionClient
4
6
  from .credentials import NotionCredentials
7
+ from .pagination import NotionPagination
5
8
 
6
9
  MOCK_PAGINATED_RESPONSE = {
7
10
  "results": [{"result_id": 1}],
@@ -16,7 +19,7 @@ MOCK_PAGINATED_RESPONSE_2 = {
16
19
  MOCK_RESPONSE = {"result_id": 3}
17
20
 
18
21
 
19
- @patch.object(NotionClient, "_call")
22
+ @patch.object(NotionClient, "_get")
20
23
  def test_NotionClient__request(mock_call):
21
24
  mock_call.side_effect = [
22
25
  MOCK_PAGINATED_RESPONSE,
@@ -25,11 +28,13 @@ def test_NotionClient__request(mock_call):
25
28
  ]
26
29
 
27
30
  client = NotionClient(NotionCredentials(token="MockToken"))
28
- response = list(client._request("GET", "fake_endpoint", {}))
31
+ get_call = partial(client._get, endpoint="FAKE")
32
+
33
+ response = list(fetch_all_pages(get_call, NotionPagination))
29
34
  assert response == [{"result_id": 1}, {"result_id": 2}]
30
35
 
31
- response = list(client._request("GET", "fake_endpoint", {}))
32
- assert response == [{"result_id": 3}]
36
+ response = get_call()
37
+ assert response == {"result_id": 3}
33
38
 
34
39
 
35
40
  MOCK_BLOCK = [{"object": "block", "id": "1", "has_children": True}]
@@ -1,3 +1,4 @@
1
1
  NOTION_BASE_URL = "https://api.notion.com/v1/"
2
2
  CASTOR_NOTION_USER_AGENT = "castor-extractor"
3
3
  NOTION_TIMEOUT_MS = 300
4
+ NOTION_VERSION = "2021-08-16"
@@ -1,4 +1,4 @@
1
- class EndpointFactory:
1
+ class NotionEndpointFactory:
2
2
  """Wrapper class around all endpoints we're using"""
3
3
 
4
4
  BLOCKS = "blocks"
@@ -1,16 +1,20 @@
1
1
  from typing import Optional
2
2
 
3
- from pydantic.dataclasses import dataclass
3
+ from ....utils import PaginationModel
4
4
 
5
5
 
6
- @dataclass
7
- class PaginatedResponse:
6
+ class NotionPagination(PaginationModel):
8
7
  """Class to handle paginated results"""
9
8
 
10
9
  results: list
11
10
  next_cursor: Optional[str]
12
11
  has_more: bool
13
12
 
14
- @property
15
- def start_cursor(self) -> dict:
13
+ def is_last(self) -> bool:
14
+ return not self.has_more
15
+
16
+ def next_page_payload(self) -> dict:
16
17
  return {"start_cursor": self.next_cursor}
18
+
19
+ def page_results(self) -> list:
20
+ return self.results
@@ -4,5 +4,5 @@ from ...types import ExternalAsset
4
4
  class SodaAsset(ExternalAsset):
5
5
  """Soda assets"""
6
6
 
7
- CHECK_RESULTS = "check_results"
7
+ CHECKS = "checks"
8
8
  DATASETS = "datasets"
@@ -1,99 +1,46 @@
1
- from time import sleep
2
- from typing import Any, Iterator, Optional
3
-
4
- import requests
5
-
6
- from ....utils import format_date, yesterday
7
- from ....utils.client.api import DEFAULT_TIMEOUT_S, HttpMethod
1
+ from functools import partial
2
+ from typing import Iterator
3
+
4
+ from ....utils import (
5
+ APIClient,
6
+ BasicAuth,
7
+ fetch_all_pages,
8
+ )
8
9
  from ..assets import SodaAsset
9
10
  from .credentials import SodaCredentials
10
11
  from .endpoints import SodaEndpointFactory
12
+ from .pagination import SodaCloudPagination
11
13
 
12
- _REPORTING_PAGE_SIZE = 400
13
- _CLOUD_PAGE_SIZE = 100
14
+ _CLOUD_API = "https://cloud.soda.io/api/v1/"
14
15
  _REQUESTS_PER_MINUTE = 10
15
16
  _SECONDS_PER_MINUTE = 60
16
- _RATE_LIMIT_S = (_SECONDS_PER_MINUTE // _REQUESTS_PER_MINUTE) + 1
17
+ _RATE_LIMIT_MS = (_SECONDS_PER_MINUTE // _REQUESTS_PER_MINUTE) + 1
17
18
 
19
+ HEADERS = {"Content-Type": "application/json"}
18
20
 
19
- class SodaClient:
20
- def __init__(
21
- self,
22
- credentials: SodaCredentials,
23
- ):
24
- self._timeout = DEFAULT_TIMEOUT_S
25
- self._reporting_headers = credentials.reporting_headers
26
- self._auth = (credentials.api_key, credentials.secret)
27
21
 
28
- def _call(
29
- self,
30
- url: str,
31
- headers: dict,
32
- method: HttpMethod = "GET",
33
- *,
34
- params: Optional[dict] = None,
35
- data: Optional[dict] = None,
36
- auth: Optional[tuple] = None,
37
- ) -> Any:
38
- response = requests.request(
39
- method,
40
- url,
41
- headers=headers,
42
- params=params,
43
- json=data,
44
- timeout=self._timeout,
45
- auth=auth,
22
+ class SodaClient(APIClient):
23
+ def __init__(self, credentials: SodaCredentials):
24
+ cloud_auth = BasicAuth(
25
+ username=credentials.api_key, password=credentials.secret
46
26
  )
47
- response.raise_for_status()
48
-
49
- return response.json()
27
+ super().__init__(host=_CLOUD_API, auth=cloud_auth, headers=HEADERS)
50
28
 
51
- def _get_results_paginated(self, url: str, additional: dict) -> Iterator:
52
- page_number = 1
53
- next_page = True
54
- while next_page:
55
- json_data = {
56
- **{"page": page_number, "size": _REPORTING_PAGE_SIZE},
57
- **additional,
58
- }
59
- _check_results_page = self._call(
60
- url=url,
61
- method="POST",
62
- data=json_data,
63
- headers=self._reporting_headers,
64
- )
65
- yield from _check_results_page["data"]
66
-
67
- next_page = len(_check_results_page["data"]) == _REPORTING_PAGE_SIZE
68
- page_number += 1
69
-
70
- def _datasets(self) -> Iterator[dict]:
71
- url = SodaEndpointFactory.datasets()
72
- next_page = True
73
- page_number = 0
74
- while next_page:
75
- data = self._call(
76
- url=url,
77
- method="GET",
78
- headers={},
79
- params={"size": _CLOUD_PAGE_SIZE, "page": page_number},
80
- auth=self._auth,
81
- )
82
- yield from data["content"]
83
- next_page = not data["last"]
84
- page_number += 1
85
- sleep(_RATE_LIMIT_S)
29
+ def datasets(self) -> Iterator[dict]:
30
+ request = partial(self._get, endpoint=SodaEndpointFactory.datasets())
31
+ yield from fetch_all_pages(
32
+ request, SodaCloudPagination, rate_limit=_RATE_LIMIT_MS
33
+ )
86
34
 
87
- def _check_results(self) -> Iterator:
88
- url = SodaEndpointFactory.check_results()
89
- _date = format_date(timestamp=yesterday())
90
- return self._get_results_paginated(
91
- url, additional={"from_datetime": _date}
35
+ def checks(self) -> Iterator[dict]:
36
+ request = partial(self._get, endpoint=SodaEndpointFactory.checks())
37
+ yield from fetch_all_pages(
38
+ request, SodaCloudPagination, rate_limit=_RATE_LIMIT_MS
92
39
  )
93
40
 
94
- def fetch(self, asset: SodaAsset) -> Iterator[dict]:
41
+ def fetch(self, asset: SodaAsset) -> Iterator:
95
42
  if asset == SodaAsset.DATASETS:
96
- yield from self._datasets()
97
- if asset == SodaAsset.CHECK_RESULTS:
98
- yield from self._check_results()
43
+ return self.datasets()
44
+ if asset == SodaAsset.CHECKS:
45
+ return self.checks()
99
46
  raise ValueError(f"The asset {asset}, is not supported")
@@ -1,8 +1,5 @@
1
- from typing import Dict
2
-
3
1
  from pydantic import Field
4
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
5
- from requests.auth import HTTPBasicAuth
6
3
 
7
4
  SODA_ENV_PREFIX = "CASTOR_SODA_"
8
5
 
@@ -18,11 +15,3 @@ class SodaCredentials(BaseSettings):
18
15
 
19
16
  api_key: str = Field(repr=False)
20
17
  secret: str = Field(repr=False)
21
-
22
- @property
23
- def reporting_headers(self) -> Dict[str, str]:
24
- return {
25
- "Content-Type": "application/json",
26
- "API_KEY_ID": self.api_key,
27
- "API_KEY_SECRET": self.secret,
28
- }
@@ -1,13 +1,10 @@
1
1
  class SodaEndpointFactory:
2
2
  """Wrapper class around all endpoints we're using"""
3
3
 
4
- CLOUD_API = "https://cloud.soda.io/api/v1"
5
- REPORTING_API = "https://reporting.cloud.soda.io/v1"
6
-
7
4
  @classmethod
8
5
  def datasets(cls) -> str:
9
- return f"{cls.CLOUD_API}/datasets"
6
+ return "datasets"
10
7
 
11
8
  @classmethod
12
- def check_results(cls) -> str:
13
- return f"{cls.REPORTING_API}/quality/check_results"
9
+ def checks(cls) -> str:
10
+ return "checks"
@@ -0,0 +1,25 @@
1
+ from typing import List
2
+
3
+ from ....utils import PaginationModel
4
+
5
+ _CLOUD_PAGE_SIZE = 100
6
+ _CLOUD_FIRST_PAGE = 0
7
+
8
+
9
+ class SodaCloudPagination(PaginationModel):
10
+ content: List[dict]
11
+ last: bool
12
+
13
+ def is_last(self) -> bool:
14
+ return self.last
15
+
16
+ def next_page_payload(self) -> dict:
17
+ current_page = (
18
+ self.current_page_payload[self.fetch_by.value]["page"]
19
+ if self.current_page_payload
20
+ else _CLOUD_FIRST_PAGE
21
+ )
22
+ return {"page": current_page + 1, "size": _CLOUD_PAGE_SIZE}
23
+
24
+ def page_results(self) -> list:
25
+ return self.content
@@ -1,12 +1,24 @@
1
1
  from .argument_parser import parse_filled_arguments
2
2
  from .client import (
3
3
  AbstractSourceClient,
4
+ APIClient,
5
+ Auth,
6
+ BasicAuth,
7
+ BearerAuth,
8
+ CustomAuth,
4
9
  ExtractionQuery,
10
+ FetchNextPageBy,
11
+ PaginationModel,
5
12
  PostgresClient,
13
+ RequestSafeMode,
14
+ ResponseJson,
6
15
  SqlalchemyClient,
16
+ build_url,
17
+ fetch_all_pages,
18
+ handle_response,
7
19
  uri_encode,
8
20
  )
9
- from .collection import empty_iterator, group_by, mapping_from_rows
21
+ from .collection import deduplicate, empty_iterator, group_by, mapping_from_rows
10
22
  from .constants import OUTPUT_DIR
11
23
  from .deprecate import deprecate_python
12
24
  from .env import from_env
@@ -24,7 +36,6 @@ from .pager import (
24
36
  )
25
37
  from .retry import RetryStrategy, retry
26
38
  from .safe import SafeMode, safe_mode
27
- from .safe_request import RequestSafeMode, ResponseJson, handle_response
28
39
  from .store import AbstractStorage, LocalStorage
29
40
  from .string import decode_when_bytes, string_to_tuple
30
41
  from .time import (
@@ -1,4 +1,18 @@
1
1
  from .abstract import AbstractSourceClient, SqlalchemyClient
2
+ from .api import (
3
+ APIClient,
4
+ Auth,
5
+ BasicAuth,
6
+ BearerAuth,
7
+ CustomAuth,
8
+ FetchNextPageBy,
9
+ PaginationModel,
10
+ RequestSafeMode,
11
+ ResponseJson,
12
+ build_url,
13
+ fetch_all_pages,
14
+ handle_response,
15
+ )
2
16
  from .postgres import PostgresClient
3
17
  from .query import ExtractionQuery
4
18
  from .uri import uri_encode
@@ -0,0 +1,5 @@
1
+ from .auth import Auth, BasicAuth, BearerAuth, CustomAuth
2
+ from .client import APIClient
3
+ from .pagination import FetchNextPageBy, PaginationModel, fetch_all_pages
4
+ from .safe_request import RequestSafeMode, ResponseJson, handle_response
5
+ from .utils import build_url
@@ -0,0 +1,76 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import Dict, Optional, Union
4
+
5
+ from requests.auth import AuthBase, HTTPBasicAuth
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class BasicAuth(HTTPBasicAuth):
11
+ """
12
+ Authentication for API using basic auth method
13
+
14
+ - Instantiate with username and password
15
+ - pass the Auth class to the APIClient
16
+ """
17
+
18
+ def refresh_token(self):
19
+ pass
20
+
21
+
22
+ class CustomAuth(AuthBase, ABC):
23
+ """
24
+ Authentication for API using custom auth method
25
+
26
+ You need to:
27
+ - implement the `_authentication_header()` method
28
+ - pass the Auth class to the APIClient
29
+ """
30
+
31
+ def refresh_token(self):
32
+ """Method to refresh the token if token expires"""
33
+ pass
34
+
35
+ @abstractmethod
36
+ def _authentication_header(self) -> Dict[str, str]:
37
+ pass
38
+
39
+ def __call__(self, r):
40
+ r.headers = {**r.headers, **self._authentication_header()}
41
+ return r
42
+
43
+
44
+ class BearerAuth:
45
+ """
46
+ Authentication for API using Bearer tokens
47
+
48
+ You need to:
49
+ - implement the `fetch_token()` method
50
+ - pass the Auth class to the APIClient
51
+ """
52
+
53
+ _token: Optional[str] = None
54
+ authentication_key = "Authorization"
55
+
56
+ @abstractmethod
57
+ def fetch_token(self) -> Optional[str]:
58
+ """Method that should return the bearer token"""
59
+ pass
60
+
61
+ def refresh_token(self):
62
+ """Method to refresh the token if token expires"""
63
+ self._fetch_token(force_refresh=True)
64
+
65
+ def _fetch_token(self, force_refresh: bool = False) -> Optional[str]:
66
+ if not self._token or force_refresh:
67
+ logger.info("Refreshing authentication token...")
68
+ self._token = self.fetch_token()
69
+ return f"Bearer {self._token}"
70
+
71
+ def __call__(self, r):
72
+ r.headers[self.authentication_key] = self._fetch_token()
73
+ return r
74
+
75
+
76
+ Auth = Union[BasicAuth, CustomAuth, BearerAuth]