castor-extractor 0.21.7__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (131) hide show
  1. CHANGELOG.md +8 -0
  2. castor_extractor/commands/__init__.py +0 -3
  3. castor_extractor/commands/file_check.py +1 -2
  4. castor_extractor/file_checker/column.py +5 -5
  5. castor_extractor/file_checker/file.py +7 -7
  6. castor_extractor/file_checker/file_test.py +2 -2
  7. castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
  8. castor_extractor/knowledge/confluence/client/client.py +2 -1
  9. castor_extractor/knowledge/confluence/extract.py +3 -2
  10. castor_extractor/knowledge/notion/client/client.py +3 -2
  11. castor_extractor/knowledge/notion/extract.py +3 -2
  12. castor_extractor/quality/soda/client/client.py +2 -1
  13. castor_extractor/quality/soda/client/pagination.py +1 -3
  14. castor_extractor/types.py +3 -3
  15. castor_extractor/uploader/env.py +2 -2
  16. castor_extractor/uploader/upload.py +4 -3
  17. castor_extractor/uploader/utils.py +1 -1
  18. castor_extractor/utils/client/abstract.py +2 -1
  19. castor_extractor/utils/client/api/auth.py +2 -2
  20. castor_extractor/utils/client/api/auth_test.py +2 -2
  21. castor_extractor/utils/client/api/client.py +8 -3
  22. castor_extractor/utils/client/api/pagination.py +3 -2
  23. castor_extractor/utils/client/api/safe_request.py +5 -5
  24. castor_extractor/utils/collection.py +7 -11
  25. castor_extractor/utils/dbt/client.py +3 -3
  26. castor_extractor/utils/dbt/client_test.py +2 -2
  27. castor_extractor/utils/deprecate.py +1 -2
  28. castor_extractor/utils/files.py +5 -5
  29. castor_extractor/utils/formatter.py +5 -4
  30. castor_extractor/utils/json_stream_write.py +2 -1
  31. castor_extractor/utils/object.py +2 -1
  32. castor_extractor/utils/pager/pager.py +2 -4
  33. castor_extractor/utils/pager/pager_on_id.py +2 -1
  34. castor_extractor/utils/pager/pager_on_id_test.py +5 -5
  35. castor_extractor/utils/pager/pager_test.py +3 -3
  36. castor_extractor/utils/retry.py +4 -3
  37. castor_extractor/utils/retry_test.py +2 -3
  38. castor_extractor/utils/safe.py +3 -3
  39. castor_extractor/utils/salesforce/client.py +2 -1
  40. castor_extractor/utils/salesforce/credentials.py +1 -3
  41. castor_extractor/utils/store.py +2 -1
  42. castor_extractor/utils/string.py +2 -2
  43. castor_extractor/utils/string_test.py +1 -3
  44. castor_extractor/utils/type.py +3 -2
  45. castor_extractor/utils/validation.py +4 -4
  46. castor_extractor/utils/write.py +2 -2
  47. castor_extractor/visualization/domo/client/client.py +8 -7
  48. castor_extractor/visualization/domo/client/credentials.py +2 -2
  49. castor_extractor/visualization/domo/client/endpoints.py +2 -2
  50. castor_extractor/visualization/domo/extract.py +3 -2
  51. castor_extractor/visualization/looker/api/client.py +17 -16
  52. castor_extractor/visualization/looker/api/utils.py +2 -2
  53. castor_extractor/visualization/looker/assets.py +1 -3
  54. castor_extractor/visualization/looker/extract.py +4 -3
  55. castor_extractor/visualization/looker/fields.py +3 -3
  56. castor_extractor/visualization/looker/multithreading.py +3 -3
  57. castor_extractor/visualization/metabase/assets.py +1 -3
  58. castor_extractor/visualization/metabase/client/api/client.py +8 -7
  59. castor_extractor/visualization/metabase/extract.py +3 -2
  60. castor_extractor/visualization/metabase/types.py +1 -3
  61. castor_extractor/visualization/mode/client/client.py +6 -6
  62. castor_extractor/visualization/mode/extract.py +2 -2
  63. castor_extractor/visualization/powerbi/assets.py +1 -3
  64. castor_extractor/visualization/powerbi/client/client.py +12 -11
  65. castor_extractor/visualization/powerbi/client/credentials.py +3 -3
  66. castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
  67. castor_extractor/visualization/powerbi/extract.py +3 -2
  68. castor_extractor/visualization/qlik/assets.py +1 -3
  69. castor_extractor/visualization/qlik/client/constants.py +1 -3
  70. castor_extractor/visualization/qlik/client/engine/error.py +1 -3
  71. castor_extractor/visualization/qlik/client/master.py +3 -3
  72. castor_extractor/visualization/qlik/client/rest.py +12 -12
  73. castor_extractor/visualization/qlik/extract.py +4 -3
  74. castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
  75. castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
  76. castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
  77. castor_extractor/visualization/sigma/client/client.py +11 -8
  78. castor_extractor/visualization/sigma/client/credentials.py +1 -3
  79. castor_extractor/visualization/sigma/client/pagination.py +1 -1
  80. castor_extractor/visualization/sigma/extract.py +3 -2
  81. castor_extractor/visualization/tableau/assets.py +1 -2
  82. castor_extractor/visualization/tableau/client/client.py +1 -2
  83. castor_extractor/visualization/tableau/client/client_utils.py +3 -2
  84. castor_extractor/visualization/tableau/client/credentials.py +3 -3
  85. castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
  86. castor_extractor/visualization/tableau/extract.py +2 -2
  87. castor_extractor/visualization/tableau/gql_fields.py +3 -3
  88. castor_extractor/visualization/tableau/tsc_fields.py +1 -2
  89. castor_extractor/visualization/tableau/types.py +3 -3
  90. castor_extractor/visualization/tableau_revamp/client/client.py +6 -1
  91. castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +56 -9
  92. castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
  93. castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
  94. castor_extractor/visualization/tableau_revamp/client/errors.py +5 -0
  95. castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
  96. castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
  97. castor_extractor/visualization/tableau_revamp/extract.py +2 -2
  98. castor_extractor/visualization/thoughtspot/client/client.py +3 -2
  99. castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
  100. castor_extractor/visualization/thoughtspot/extract.py +3 -2
  101. castor_extractor/warehouse/abstract/asset.py +4 -5
  102. castor_extractor/warehouse/abstract/extract.py +4 -3
  103. castor_extractor/warehouse/abstract/query.py +4 -4
  104. castor_extractor/warehouse/bigquery/client.py +8 -8
  105. castor_extractor/warehouse/bigquery/extract.py +1 -1
  106. castor_extractor/warehouse/bigquery/query.py +2 -2
  107. castor_extractor/warehouse/bigquery/types.py +2 -4
  108. castor_extractor/warehouse/databricks/api_client.py +15 -14
  109. castor_extractor/warehouse/databricks/client.py +16 -16
  110. castor_extractor/warehouse/databricks/extract.py +4 -4
  111. castor_extractor/warehouse/databricks/format.py +12 -12
  112. castor_extractor/warehouse/databricks/lineage.py +11 -11
  113. castor_extractor/warehouse/databricks/pagination.py +2 -2
  114. castor_extractor/warehouse/databricks/types.py +4 -4
  115. castor_extractor/warehouse/databricks/utils.py +5 -4
  116. castor_extractor/warehouse/mysql/query.py +2 -2
  117. castor_extractor/warehouse/postgres/query.py +2 -2
  118. castor_extractor/warehouse/redshift/client.py +1 -1
  119. castor_extractor/warehouse/redshift/query.py +2 -2
  120. castor_extractor/warehouse/salesforce/client.py +8 -8
  121. castor_extractor/warehouse/salesforce/extract.py +3 -4
  122. castor_extractor/warehouse/salesforce/format.py +8 -7
  123. castor_extractor/warehouse/salesforce/format_test.py +2 -4
  124. castor_extractor/warehouse/snowflake/query.py +5 -5
  125. castor_extractor/warehouse/sqlserver/client.py +1 -1
  126. castor_extractor/warehouse/sqlserver/query.py +2 -2
  127. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/METADATA +11 -6
  128. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/RECORD +131 -131
  129. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/LICENCE +0 -0
  130. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/WHEEL +0 -0
  131. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import logging
2
+ from collections.abc import Iterator
2
3
  from http import HTTPStatus
3
- from typing import Any, Dict, Iterator, List, Optional, Set, cast
4
+ from typing import Any, Optional, cast
4
5
 
5
6
  import requests
6
7
  from requests import HTTPError
@@ -78,7 +79,7 @@ class ApiClient:
78
79
  @staticmethod
79
80
  def _answer(response: Any):
80
81
  answer = response
81
- if isinstance(answer, Dict) and DATA_KEY in answer:
82
+ if isinstance(answer, dict) and DATA_KEY in answer:
82
83
  # v0.41 of Metabase introduced embedded data for certain calls
83
84
  # {'data': [{ }, ...] , 'total': 15, 'limit': None, 'offset': None}"
84
85
  return answer[DATA_KEY]
@@ -123,7 +124,7 @@ class ApiClient:
123
124
  def _fetch_ids(self, asset: MetabaseAsset) -> IdsType:
124
125
  ids: IdsType = []
125
126
  results = self._call(endpoint=asset.name.lower())
126
- for res in cast(List, results):
127
+ for res in cast(list, results):
127
128
  assert isinstance(res, dict)
128
129
  ids.append(res["id"])
129
130
  return ids
@@ -135,7 +136,7 @@ class ApiClient:
135
136
  if not collection:
136
137
  continue
137
138
 
138
- seen_dashboard_ids: Set[int] = set()
139
+ seen_dashboard_ids: set[int] = set()
139
140
 
140
141
  for dashboard in cast(SerializedAsset, collection):
141
142
  if dashboard.get("model") != "dashboard":
@@ -149,7 +150,7 @@ class ApiClient:
149
150
 
150
151
  if dashboard_id not in seen_dashboard_ids:
151
152
  seen_dashboard_ids.add(dashboard_id)
152
- yield cast(Dict, self._call(f"dashboard/{dashboard_id}"))
153
+ yield cast(dict, self._call(f"dashboard/{dashboard_id}"))
153
154
 
154
155
  @staticmethod
155
156
  def _collection_specifics(collections: SerializedAsset) -> SerializedAsset:
@@ -170,7 +171,7 @@ class ApiClient:
170
171
  return databases
171
172
 
172
173
  @staticmethod
173
- def _dashboard_cards(dashboards: SerializedAsset) -> Iterator[Dict]:
174
+ def _dashboard_cards(dashboards: SerializedAsset) -> Iterator[dict]:
174
175
  for d in dashboards:
175
176
  d_cards = d.get(CARDS_KEY) or d.get(CARDS_KEY_DEPRECATED) or []
176
177
  yield from d_cards
@@ -186,7 +187,7 @@ class ApiClient:
186
187
 
187
188
  else:
188
189
  answer = self._call(asset.name.lower())
189
- assets = cast(List, answer)
190
+ assets = cast(list, answer)
190
191
 
191
192
  if asset == MetabaseAsset.DATABASE:
192
193
  assets = self._database_specifics(assets)
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Iterable, Tuple, Union
2
+ from collections.abc import Iterable
3
+ from typing import Union
3
4
 
4
5
  from ...utils import (
5
6
  OUTPUT_DIR,
@@ -20,7 +21,7 @@ ClientMetabase = Union[DbClient, ApiClient]
20
21
 
21
22
  def iterate_all_data(
22
23
  client: ClientMetabase,
23
- ) -> Iterable[Tuple[MetabaseAsset, list]]:
24
+ ) -> Iterable[tuple[MetabaseAsset, list]]:
24
25
  """Iterate over the extracted Data From metabase"""
25
26
 
26
27
  yield MetabaseAsset.USER, deep_serialize(client.fetch(MetabaseAsset.USER))
@@ -1,3 +1 @@
1
- from typing import List
2
-
3
- IdsType = List[str]
1
+ IdsType = list[str]
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, List, Optional, cast
2
+ from typing import Optional, cast
3
3
 
4
4
  import requests
5
5
  from requests.auth import HTTPBasicAuth
@@ -29,8 +29,8 @@ logger = logging.getLogger(__name__)
29
29
 
30
30
  URL_TEMPLATE = "{host}/api"
31
31
 
32
- RawData = List[Dict]
33
- Tokens = Optional[List[str]]
32
+ RawData = list[dict]
33
+ Tokens = Optional[list[str]]
34
34
 
35
35
 
36
36
  class Client:
@@ -76,8 +76,8 @@ class Client:
76
76
 
77
77
  # most of calls return data in ["_embedded"]["resource_name"] node
78
78
  try:
79
- embedded = cast(Dict, result["_embedded"])
80
- return cast(List, embedded[resource_name])
79
+ embedded = cast(dict, result["_embedded"])
80
+ return cast(list, embedded[resource_name])
81
81
  except (ValueError, KeyError):
82
82
  raise UnexpectedApiResponseError(resource_name, result)
83
83
 
@@ -168,7 +168,7 @@ class Client:
168
168
  resource_name=mb["member_username"],
169
169
  with_workspace=False,
170
170
  )
171
- members.append(cast(Dict, result))
171
+ members.append(cast(dict, result))
172
172
  return members
173
173
 
174
174
  @staticmethod
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, Tuple
2
+ from collections.abc import Iterable
3
3
 
4
4
  from ...utils import (
5
5
  OUTPUT_DIR,
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
18
18
 
19
19
  def iterate_all_data(
20
20
  client: Client,
21
- ) -> Iterable[Tuple[Asset, list]]:
21
+ ) -> Iterable[tuple[Asset, list]]:
22
22
  """Iterate over the extracted Data From Mode Analytics"""
23
23
 
24
24
  datasources = client.fetch(Asset.DATASOURCE)
@@ -1,5 +1,3 @@
1
- from typing import Set
2
-
3
1
  from ...types import ExternalAsset, classproperty
4
2
 
5
3
 
@@ -18,7 +16,7 @@ class PowerBiAsset(ExternalAsset):
18
16
  USERS = "users"
19
17
 
20
18
  @classproperty
21
- def optional(cls) -> Set["PowerBiAsset"]:
19
+ def optional(cls) -> set["PowerBiAsset"]:
22
20
  return {
23
21
  PowerBiAsset.DATASET_FIELDS,
24
22
  PowerBiAsset.PAGES,
@@ -1,8 +1,9 @@
1
1
  import logging
2
+ from collections.abc import Iterator
2
3
  from datetime import date
3
4
  from functools import partial
4
5
  from time import sleep
5
- from typing import Dict, Iterator, List, Optional, Union
6
+ from typing import Optional, Union
6
7
 
7
8
  import requests
8
9
  from requests import HTTPError
@@ -40,7 +41,7 @@ class PowerbiClient(APIClient):
40
41
  timeout=POWERBI_DEFAULT_TIMEOUT_S,
41
42
  )
42
43
 
43
- def _activity_events(self, day: Optional[date] = None) -> Iterator[Dict]:
44
+ def _activity_events(self, day: Optional[date] = None) -> Iterator[dict]:
44
45
  """
45
46
  Returns a list of activity events for the organization.
46
47
  https://learn.microsoft.com/en-us/power-bi/admin/service-admin-auditing#activityevents-rest-api
@@ -52,21 +53,21 @@ class PowerbiClient(APIClient):
52
53
  )
53
54
  yield from fetch_all_pages(request, PowerBiPagination)
54
55
 
55
- def _datasets(self) -> Iterator[Dict]:
56
+ def _datasets(self) -> Iterator[dict]:
56
57
  """
57
58
  Returns a list of datasets for the organization.
58
59
  https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
59
60
  """
60
61
  yield from self._get(PowerBiEndpointFactory.datasets())[Keys.VALUE]
61
62
 
62
- def _dashboards(self) -> Iterator[Dict]:
63
+ def _dashboards(self) -> Iterator[dict]:
63
64
  """
64
65
  Returns a list of dashboards for the organization.
65
66
  https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
66
67
  """
67
68
  yield from self._get(PowerBiEndpointFactory.dashboards())[Keys.VALUE]
68
69
 
69
- def _reports(self) -> Iterator[Dict]:
70
+ def _reports(self) -> Iterator[dict]:
70
71
  """
71
72
  Returns a list of reports for the organization.
72
73
  https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
@@ -87,12 +88,12 @@ class PowerbiClient(APIClient):
87
88
 
88
89
  return reports
89
90
 
90
- def _workspace_ids(self) -> List[str]:
91
+ def _workspace_ids(self) -> list[str]:
91
92
  """
92
93
  Get workspaces ids from powerBI admin API.
93
94
  more: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-modified-workspaces
94
95
  """
95
- params: Dict[str, Union[bool, str]] = {
96
+ params: dict[str, Union[bool, str]] = {
96
97
  Keys.INACTIVE_WORKSPACES: True,
97
98
  Keys.PERSONAL_WORKSPACES: True,
98
99
  }
@@ -104,7 +105,7 @@ class PowerbiClient(APIClient):
104
105
 
105
106
  return [x[Keys.ID] for x in response]
106
107
 
107
- def _get_scan_result(self, scan_id: int) -> Iterator[Dict]:
108
+ def _get_scan_result(self, scan_id: int) -> Iterator[dict]:
108
109
  endpoint = PowerBiEndpointFactory.metadata_scan_result(scan_id)
109
110
  yield from self._get(endpoint)[Keys.WORKSPACES]
110
111
 
@@ -136,7 +137,7 @@ class PowerbiClient(APIClient):
136
137
  logger.warning(f"Scan {scan_id} timed out")
137
138
  return False
138
139
 
139
- def _create_scan(self, workspaces_ids: List[str]) -> int:
140
+ def _create_scan(self, workspaces_ids: list[str]) -> int:
140
141
  """
141
142
  Tells the Power BI API to start an asynchronous metadata scan.
142
143
  Returns the scan's ID.
@@ -157,7 +158,7 @@ class PowerbiClient(APIClient):
157
158
  )
158
159
  return scan_id[Keys.ID]
159
160
 
160
- def _metadata(self) -> Iterator[Dict]:
161
+ def _metadata(self) -> Iterator[dict]:
161
162
  """
162
163
  Fetch metadata by workspace. The metadata scanning is asynchronous and
163
164
  requires the following steps:
@@ -183,7 +184,7 @@ class PowerbiClient(APIClient):
183
184
  asset: PowerBiAsset,
184
185
  *,
185
186
  day: Optional[date] = None,
186
- ) -> Iterator[Dict]:
187
+ ) -> Iterator[dict]:
187
188
  """
188
189
  Given a PowerBi asset, returns the corresponding data using the
189
190
  appropriate client.
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from pydantic import Field, field_validator
4
4
  from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -19,9 +19,9 @@ class PowerbiCredentials(BaseSettings):
19
19
  client_id: str
20
20
  tenant_id: str
21
21
  secret: str = Field(repr=False)
22
- scopes: List[str] = [DEFAULT_SCOPE]
22
+ scopes: list[str] = [DEFAULT_SCOPE]
23
23
 
24
24
  @field_validator("scopes", mode="before")
25
25
  @classmethod
26
- def _check_scopes(cls, scopes: Optional[List[str]]) -> List[str]:
26
+ def _check_scopes(cls, scopes: Optional[list[str]]) -> list[str]:
27
27
  return scopes if scopes is not None else [DEFAULT_SCOPE]
@@ -1,5 +1,5 @@
1
1
  from datetime import date, datetime
2
- from typing import Optional, Tuple
2
+ from typing import Optional
3
3
 
4
4
  from ....utils import at_midnight, format_date, yesterday
5
5
 
@@ -7,7 +7,7 @@ _CLIENT_APP_BASE = "https://login.microsoftonline.com"
7
7
  _REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
8
8
 
9
9
 
10
- def _time_filter(day: Optional[date]) -> Tuple[datetime, datetime]:
10
+ def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
11
11
  target_day = day or yesterday()
12
12
  start = at_midnight(target_day)
13
13
  end = datetime.combine(target_day, datetime.max.time())
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Iterable, Tuple, Union
2
+ from collections.abc import Iterable
3
+ from typing import Union
3
4
 
4
5
  from ...utils import (
5
6
  OUTPUT_DIR,
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
18
19
 
19
20
  def iterate_all_data(
20
21
  client: PowerbiClient,
21
- ) -> Iterable[Tuple[PowerBiAsset, Union[list, dict]]]:
22
+ ) -> Iterable[tuple[PowerBiAsset, Union[list, dict]]]:
22
23
  for asset in PowerBiAsset:
23
24
  if asset in METADATA_ASSETS + REPORTS_ASSETS:
24
25
  continue
@@ -1,5 +1,3 @@
1
- from typing import Dict, Tuple
2
-
3
1
  from ...types import ExternalAsset
4
2
 
5
3
 
@@ -14,7 +12,7 @@ class QlikAsset(ExternalAsset):
14
12
  CONNECTIONS = "connections"
15
13
 
16
14
 
17
- EXPORTED_FIELDS: Dict[QlikAsset, Tuple[str, ...]] = {
15
+ EXPORTED_FIELDS: dict[QlikAsset, tuple[str, ...]] = {
18
16
  QlikAsset.SPACES: (
19
17
  "id",
20
18
  "type",
@@ -1,5 +1,3 @@
1
- from typing import Dict
2
-
3
1
  from ..assets import QlikAsset
4
2
 
5
3
  APP_EXTERNAL_ID_KEY = "resourceId"
@@ -17,7 +15,7 @@ RESPONSE_DICT_EXPECTED_MSG = _RESPONSE_BASE_EXPECTED_MSG.format(type="dict")
17
15
  RESPONSE_LIST_EXPECTED_MSG = _RESPONSE_BASE_EXPECTED_MSG.format(type="list")
18
16
 
19
17
 
20
- ASSET_PATHS: Dict[QlikAsset, str] = {
18
+ ASSET_PATHS: dict[QlikAsset, str] = {
21
19
  QlikAsset.CONNECTIONS: "data-connections",
22
20
  QlikAsset.SPACES: "spaces",
23
21
  QlikAsset.USERS: "users",
@@ -1,5 +1,3 @@
1
- from typing import Dict, Type
2
-
3
1
  from .constants import (
4
2
  ACCESS_DENIED_ERROR_CODE,
5
3
  APP_SIZE_EXCEEDED_ERROR_CODE,
@@ -48,7 +46,7 @@ class QlikResponseKeyError(Exception):
48
46
  """
49
47
 
50
48
 
51
- ERROR_CODE_MAPPING: Dict[int, Type[JsonRpcError]] = {
49
+ ERROR_CODE_MAPPING: dict[int, type[JsonRpcError]] = {
52
50
  ACCESS_DENIED_ERROR_CODE: AccessDeniedError,
53
51
  APP_SIZE_EXCEEDED_ERROR_CODE: AppSizeExceededError,
54
52
  PERSISTENCE_READ_FAILED_ERROR_CODE: PersistenceReadFailedError,
@@ -1,4 +1,4 @@
1
- from typing import Callable, List, Optional
1
+ from typing import Callable, Optional
2
2
 
3
3
  from tqdm import tqdm # type: ignore
4
4
 
@@ -7,7 +7,7 @@ from .constants import APP_EXTERNAL_ID_KEY, SCOPED_ASSETS
7
7
  from .engine import EngineApiClient, QlikCredentials
8
8
  from .rest import RestApiClient
9
9
 
10
- ListedData = List[dict]
10
+ ListedData = list[dict]
11
11
 
12
12
 
13
13
  class MissingAppsScopeError(Exception):
@@ -54,7 +54,7 @@ class QlikMasterClient:
54
54
  def __init__(
55
55
  self,
56
56
  credentials: QlikCredentials,
57
- except_http_error_statuses: Optional[List[int]] = None,
57
+ except_http_error_statuses: Optional[list[int]] = None,
58
58
  display_progress: bool = True,
59
59
  ):
60
60
  self._server_url = credentials.base_url
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import List, Optional, Union
2
+ from typing import Optional, Union
3
3
  from urllib.parse import urljoin
4
4
 
5
5
  import requests
@@ -20,7 +20,7 @@ from .engine import QlikCredentials
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
23
- Response = Union[dict, List[dict]]
23
+ Response = Union[dict, list[dict]]
24
24
 
25
25
 
26
26
  def _session() -> requests.Session:
@@ -62,7 +62,7 @@ class RestApiClient:
62
62
  def __init__(
63
63
  self,
64
64
  credentials: QlikCredentials,
65
- except_http_error_statuses: Optional[List[int]] = None,
65
+ except_http_error_statuses: Optional[list[int]] = None,
66
66
  ):
67
67
  self._server_url = credentials.base_url
68
68
  self._api_key = credentials.api_key
@@ -111,10 +111,10 @@ class RestApiClient:
111
111
  except HTTPError as error:
112
112
  return self._handle_http_error(error)
113
113
 
114
- def _pager(self, first_page_url: str) -> List[dict]:
114
+ def _pager(self, first_page_url: str) -> list[dict]:
115
115
  current_page_url = first_page_url
116
116
 
117
- data: List[dict] = []
117
+ data: list[dict] = []
118
118
  while current_page_url:
119
119
  response = self._call(current_page_url)
120
120
  if not response:
@@ -130,7 +130,7 @@ class RestApiClient:
130
130
  current_page_url = next_page_url
131
131
  return data
132
132
 
133
- def get(self, asset: QlikAsset) -> List[dict]:
133
+ def get(self, asset: QlikAsset) -> list[dict]:
134
134
  """
135
135
  Calls the route corresponding to the asset and returns the list of
136
136
  corresponding data
@@ -144,7 +144,7 @@ class RestApiClient:
144
144
 
145
145
  return [_filter_fields(row) for row in data]
146
146
 
147
- def get_with_scope(self, asset: QlikAsset, app_id: str) -> List[dict]:
147
+ def get_with_scope(self, asset: QlikAsset, app_id: str) -> list[dict]:
148
148
  """
149
149
  Calls the route corresponding to the asset scoped on an app_id and
150
150
  returns the corresponding data
@@ -157,7 +157,7 @@ class RestApiClient:
157
157
  assert isinstance(response, list), RESPONSE_LIST_EXPECTED_MSG
158
158
  return response
159
159
 
160
- def data_connections(self) -> List[dict]:
160
+ def data_connections(self) -> list[dict]:
161
161
  """
162
162
  Returns the list of data Connections
163
163
 
@@ -165,7 +165,7 @@ class RestApiClient:
165
165
  """
166
166
  return self.get(QlikAsset.CONNECTIONS)
167
167
 
168
- def spaces(self) -> List[dict]:
168
+ def spaces(self) -> list[dict]:
169
169
  """
170
170
  Returns the list of Spaces
171
171
 
@@ -173,7 +173,7 @@ class RestApiClient:
173
173
  """
174
174
  return self.get(QlikAsset.SPACES)
175
175
 
176
- def users(self) -> List[dict]:
176
+ def users(self) -> list[dict]:
177
177
  """
178
178
  Returns the list of Users
179
179
 
@@ -181,7 +181,7 @@ class RestApiClient:
181
181
  """
182
182
  return self.get(QlikAsset.USERS)
183
183
 
184
- def apps(self) -> List[dict]:
184
+ def apps(self) -> list[dict]:
185
185
  """
186
186
  Returns the list of Apps
187
187
 
@@ -189,7 +189,7 @@ class RestApiClient:
189
189
  """
190
190
  return self.get(QlikAsset.APPS)
191
191
 
192
- def data_lineage(self, app_id: str) -> List[dict]:
192
+ def data_lineage(self, app_id: str) -> list[dict]:
193
193
  """
194
194
  Returns the data lineage for a given source
195
195
 
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Iterable, List, Optional, Tuple, Union
2
+ from collections.abc import Iterable
3
+ from typing import Optional, Union
3
4
 
4
5
  from ...utils import (
5
6
  OUTPUT_DIR,
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
18
19
 
19
20
  def iterate_all_data(
20
21
  client: QlikClient,
21
- ) -> Iterable[Tuple[QlikAsset, Union[list, dict]]]:
22
+ ) -> Iterable[tuple[QlikAsset, Union[list, dict]]]:
22
23
  """Iterate over the extracted data from Qlik"""
23
24
 
24
25
  logger.info("Extracting CONNECTIONS from REST API")
@@ -47,7 +48,7 @@ def iterate_all_data(
47
48
 
48
49
 
49
50
  def extract_all(
50
- except_http_error_statuses: Optional[List[int]] = None, **kwargs
51
+ except_http_error_statuses: Optional[list[int]] = None, **kwargs
51
52
  ) -> None:
52
53
  """
53
54
  Extract data from Qlik REST API
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Dict, Iterator, List, Optional
2
+ from collections.abc import Iterator
3
+ from typing import Optional
3
4
 
4
5
  from ....utils import build_url
5
6
  from ....utils.salesforce import SalesforceBaseClient
@@ -49,7 +50,7 @@ class SalesforceReportingClient(SalesforceBaseClient):
49
50
  url = self._get_asset_url(asset_type, asset)
50
51
  yield {**asset, "Url": url}
51
52
 
52
- def fetch(self, asset: SalesforceReportingAsset) -> List[Dict]:
53
+ def fetch(self, asset: SalesforceReportingAsset) -> list[dict]:
53
54
  """
54
55
  Fetch Salesforce Reporting assets
55
56
  """
@@ -1,8 +1,6 @@
1
- from typing import Dict
2
-
3
1
  from ..assets import SalesforceReportingAsset
4
2
 
5
- queries: Dict[SalesforceReportingAsset, str] = {
3
+ queries: dict[SalesforceReportingAsset, str] = {
6
4
  SalesforceReportingAsset.DASHBOARDS: """
7
5
  SELECT
8
6
  CreatedBy.Id,
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Iterable, Tuple, Union
2
+ from collections.abc import Iterable
3
+ from typing import Union
3
4
 
4
5
  from ...utils import (
5
6
  OUTPUT_DIR,
@@ -19,7 +20,7 @@ logger = logging.getLogger(__name__)
19
20
 
20
21
  def iterate_all_data(
21
22
  client: SalesforceReportingClient,
22
- ) -> Iterable[Tuple[str, Union[list, dict]]]:
23
+ ) -> Iterable[tuple[str, Union[list, dict]]]:
23
24
  """Iterate over the extracted data from Salesforce"""
24
25
 
25
26
  for asset in SalesforceReportingAsset:
@@ -1,6 +1,7 @@
1
+ from collections.abc import Iterator
1
2
  from functools import partial
2
3
  from http import HTTPStatus
3
- from typing import Callable, Dict, Iterator, List, Optional, Tuple
4
+ from typing import Callable, Optional
4
5
 
5
6
  import requests
6
7
 
@@ -19,7 +20,7 @@ from .pagination import SIGMA_API_LIMIT, SigmaPagination
19
20
 
20
21
  _CONTENT_TYPE = "application/x-www-form-urlencoded"
21
22
 
22
- _DATA_ELEMENTS: Tuple[str, ...] = (
23
+ _DATA_ELEMENTS: tuple[str, ...] = (
23
24
  "input-table",
24
25
  "pivot-table",
25
26
  "table",
@@ -28,6 +29,7 @@ _DATA_ELEMENTS: Tuple[str, ...] = (
28
29
  )
29
30
 
30
31
  _AUTH_TIMEOUT_S = 60
32
+ _SIGMA_TIMEOUT = 120
31
33
 
32
34
  _SIGMA_HEADERS = {
33
35
  "Content-Type": _CONTENT_TYPE,
@@ -48,7 +50,7 @@ SIGMA_SAFE_MODE = RequestSafeMode(
48
50
 
49
51
 
50
52
  class SigmaBearerAuth(BearerAuth):
51
- def __init__(self, host: str, token_payload: Dict[str, str]):
53
+ def __init__(self, host: str, token_payload: dict[str, str]):
52
54
  auth_endpoint = SigmaEndpointFactory.authentication()
53
55
  self.authentication_url = build_url(host, auth_endpoint)
54
56
  self.token_payload = token_payload
@@ -75,6 +77,7 @@ class SigmaClient(APIClient):
75
77
  host=credentials.host,
76
78
  auth=auth,
77
79
  headers=_SIGMA_HEADERS,
80
+ timeout=_SIGMA_TIMEOUT,
78
81
  safe_mode=safe_mode or SIGMA_SAFE_MODE,
79
82
  )
80
83
 
@@ -116,7 +119,7 @@ class SigmaClient(APIClient):
116
119
  "page_id": page_id,
117
120
  }
118
121
 
119
- def _get_all_elements(self, workbooks: List[dict]) -> Iterator[dict]:
122
+ def _get_all_elements(self, workbooks: list[dict]) -> Iterator[dict]:
120
123
  for workbook in workbooks:
121
124
  workbook_id = workbook["workbookId"]
122
125
 
@@ -130,7 +133,7 @@ class SigmaClient(APIClient):
130
133
  page=page, workbook_id=workbook_id
131
134
  )
132
135
 
133
- def _get_all_lineages(self, elements: List[dict]) -> Iterator[dict]:
136
+ def _get_all_lineages(self, elements: list[dict]) -> Iterator[dict]:
134
137
  for element in elements:
135
138
  workbook_id = element["workbook_id"]
136
139
  element_id = element["elementId"]
@@ -143,7 +146,7 @@ class SigmaClient(APIClient):
143
146
  "element_id": element_id,
144
147
  }
145
148
 
146
- def _get_all_queries(self, workbooks: List[dict]) -> Iterator[dict]:
149
+ def _get_all_queries(self, workbooks: list[dict]) -> Iterator[dict]:
147
150
  for workbook in workbooks:
148
151
  workbook_id = workbook["workbookId"]
149
152
  request = self._get_paginated(
@@ -157,8 +160,8 @@ class SigmaClient(APIClient):
157
160
  def fetch(
158
161
  self,
159
162
  asset: SigmaAsset,
160
- workbooks: Optional[List[dict]] = None,
161
- elements: Optional[List[dict]] = None,
163
+ workbooks: Optional[list[dict]] = None,
164
+ elements: Optional[list[dict]] = None,
162
165
  ) -> Iterator[dict]:
163
166
  """Returns the needed metadata for the queried asset"""
164
167
  if asset == SigmaAsset.DATASETS:
@@ -1,5 +1,3 @@
1
- from typing import Dict
2
-
3
1
  from pydantic import Field
4
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
5
3
 
@@ -21,7 +19,7 @@ class SigmaCredentials(BaseSettings):
21
19
  grant_type: str = "client_credentials"
22
20
 
23
21
  @property
24
- def token_payload(self) -> Dict[str, str]:
22
+ def token_payload(self) -> dict[str, str]:
25
23
  return {
26
24
  "grant_type": self.grant_type,
27
25
  "client_id": self.client_id,
@@ -5,7 +5,7 @@ from pydantic.alias_generators import to_camel
5
5
 
6
6
  from ....utils import PaginationModel
7
7
 
8
- SIGMA_API_LIMIT = 500 # default number of records per page
8
+ SIGMA_API_LIMIT = 200 # default number of records per page
9
9
 
10
10
 
11
11
  class SigmaPagination(PaginationModel):
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Iterable, Iterator, Tuple, Union
2
+ from collections.abc import Iterable, Iterator
3
+ from typing import Union
3
4
 
4
5
  from ...utils import (
5
6
  OUTPUT_DIR,
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
18
19
 
19
20
  def iterate_all_data(
20
21
  client: SigmaClient,
21
- ) -> Iterable[Tuple[SigmaAsset, Union[list, Iterator, dict]]]:
22
+ ) -> Iterable[tuple[SigmaAsset, Union[list, Iterator, dict]]]:
22
23
  """Iterate over the extracted data from Sigma"""
23
24
 
24
25
  logger.info("Extracting DATASETS from API")