castor-extractor 0.21.9__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/commands/__init__.py +0 -3
- castor_extractor/commands/file_check.py +1 -2
- castor_extractor/file_checker/column.py +5 -5
- castor_extractor/file_checker/file.py +7 -7
- castor_extractor/file_checker/file_test.py +2 -2
- castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
- castor_extractor/knowledge/confluence/client/client.py +2 -1
- castor_extractor/knowledge/confluence/extract.py +3 -2
- castor_extractor/knowledge/notion/client/client.py +3 -2
- castor_extractor/knowledge/notion/extract.py +3 -2
- castor_extractor/quality/soda/client/client.py +2 -1
- castor_extractor/quality/soda/client/pagination.py +1 -3
- castor_extractor/types.py +3 -3
- castor_extractor/uploader/env.py +2 -2
- castor_extractor/uploader/upload.py +4 -3
- castor_extractor/uploader/utils.py +1 -1
- castor_extractor/utils/client/abstract.py +2 -1
- castor_extractor/utils/client/api/auth.py +2 -2
- castor_extractor/utils/client/api/auth_test.py +2 -2
- castor_extractor/utils/client/api/client.py +3 -3
- castor_extractor/utils/client/api/pagination.py +3 -2
- castor_extractor/utils/client/api/safe_request.py +5 -5
- castor_extractor/utils/collection.py +7 -11
- castor_extractor/utils/dbt/client.py +3 -3
- castor_extractor/utils/dbt/client_test.py +2 -2
- castor_extractor/utils/deprecate.py +1 -2
- castor_extractor/utils/files.py +5 -5
- castor_extractor/utils/formatter.py +5 -4
- castor_extractor/utils/json_stream_write.py +2 -1
- castor_extractor/utils/object.py +2 -1
- castor_extractor/utils/pager/pager.py +2 -4
- castor_extractor/utils/pager/pager_on_id.py +2 -1
- castor_extractor/utils/pager/pager_on_id_test.py +5 -5
- castor_extractor/utils/pager/pager_test.py +3 -3
- castor_extractor/utils/retry.py +4 -3
- castor_extractor/utils/retry_test.py +2 -3
- castor_extractor/utils/safe.py +3 -3
- castor_extractor/utils/salesforce/client.py +2 -1
- castor_extractor/utils/salesforce/credentials.py +1 -3
- castor_extractor/utils/store.py +2 -1
- castor_extractor/utils/string.py +2 -2
- castor_extractor/utils/string_test.py +1 -3
- castor_extractor/utils/type.py +3 -2
- castor_extractor/utils/validation.py +4 -4
- castor_extractor/utils/write.py +2 -2
- castor_extractor/visualization/domo/client/client.py +8 -7
- castor_extractor/visualization/domo/client/credentials.py +2 -2
- castor_extractor/visualization/domo/client/endpoints.py +2 -2
- castor_extractor/visualization/domo/extract.py +3 -2
- castor_extractor/visualization/looker/api/client.py +17 -16
- castor_extractor/visualization/looker/api/utils.py +2 -2
- castor_extractor/visualization/looker/assets.py +1 -3
- castor_extractor/visualization/looker/extract.py +4 -3
- castor_extractor/visualization/looker/fields.py +3 -3
- castor_extractor/visualization/looker/multithreading.py +3 -3
- castor_extractor/visualization/metabase/assets.py +1 -3
- castor_extractor/visualization/metabase/client/api/client.py +8 -7
- castor_extractor/visualization/metabase/extract.py +3 -2
- castor_extractor/visualization/metabase/types.py +1 -3
- castor_extractor/visualization/mode/client/client.py +6 -6
- castor_extractor/visualization/mode/extract.py +2 -2
- castor_extractor/visualization/powerbi/assets.py +1 -3
- castor_extractor/visualization/powerbi/client/client.py +12 -11
- castor_extractor/visualization/powerbi/client/credentials.py +3 -3
- castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +3 -2
- castor_extractor/visualization/qlik/assets.py +1 -3
- castor_extractor/visualization/qlik/client/constants.py +1 -3
- castor_extractor/visualization/qlik/client/engine/error.py +1 -3
- castor_extractor/visualization/qlik/client/master.py +3 -3
- castor_extractor/visualization/qlik/client/rest.py +12 -12
- castor_extractor/visualization/qlik/extract.py +4 -3
- castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
- castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
- castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
- castor_extractor/visualization/sigma/client/client.py +9 -8
- castor_extractor/visualization/sigma/client/credentials.py +1 -3
- castor_extractor/visualization/sigma/extract.py +3 -2
- castor_extractor/visualization/tableau/assets.py +1 -2
- castor_extractor/visualization/tableau/client/client.py +1 -2
- castor_extractor/visualization/tableau/client/client_utils.py +3 -2
- castor_extractor/visualization/tableau/client/credentials.py +3 -3
- castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
- castor_extractor/visualization/tableau/extract.py +2 -2
- castor_extractor/visualization/tableau/gql_fields.py +3 -3
- castor_extractor/visualization/tableau/tsc_fields.py +1 -2
- castor_extractor/visualization/tableau/types.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
- castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
- castor_extractor/visualization/tableau_revamp/extract.py +2 -2
- castor_extractor/visualization/thoughtspot/client/client.py +3 -2
- castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
- castor_extractor/visualization/thoughtspot/extract.py +3 -2
- castor_extractor/warehouse/abstract/asset.py +4 -5
- castor_extractor/warehouse/abstract/extract.py +4 -3
- castor_extractor/warehouse/abstract/query.py +4 -4
- castor_extractor/warehouse/bigquery/client.py +8 -8
- castor_extractor/warehouse/bigquery/extract.py +1 -1
- castor_extractor/warehouse/bigquery/query.py +2 -2
- castor_extractor/warehouse/bigquery/types.py +2 -4
- castor_extractor/warehouse/databricks/api_client.py +15 -14
- castor_extractor/warehouse/databricks/client.py +16 -16
- castor_extractor/warehouse/databricks/extract.py +4 -4
- castor_extractor/warehouse/databricks/format.py +12 -12
- castor_extractor/warehouse/databricks/lineage.py +11 -11
- castor_extractor/warehouse/databricks/pagination.py +2 -2
- castor_extractor/warehouse/databricks/types.py +4 -4
- castor_extractor/warehouse/databricks/utils.py +5 -4
- castor_extractor/warehouse/mysql/query.py +2 -2
- castor_extractor/warehouse/postgres/query.py +2 -2
- castor_extractor/warehouse/redshift/client.py +1 -1
- castor_extractor/warehouse/redshift/query.py +2 -2
- castor_extractor/warehouse/salesforce/client.py +8 -8
- castor_extractor/warehouse/salesforce/extract.py +3 -4
- castor_extractor/warehouse/salesforce/format.py +8 -7
- castor_extractor/warehouse/salesforce/format_test.py +2 -4
- castor_extractor/warehouse/snowflake/query.py +5 -5
- castor_extractor/warehouse/sqlserver/client.py +1 -1
- castor_extractor/warehouse/sqlserver/query.py +2 -2
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/METADATA +7 -6
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/RECORD +128 -128
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/LICENCE +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/WHEEL +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional, cast
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
from requests import HTTPError
|
|
@@ -78,7 +79,7 @@ class ApiClient:
|
|
|
78
79
|
@staticmethod
|
|
79
80
|
def _answer(response: Any):
|
|
80
81
|
answer = response
|
|
81
|
-
if isinstance(answer,
|
|
82
|
+
if isinstance(answer, dict) and DATA_KEY in answer:
|
|
82
83
|
# v0.41 of Metabase introduced embedded data for certain calls
|
|
83
84
|
# {'data': [{ }, ...] , 'total': 15, 'limit': None, 'offset': None}"
|
|
84
85
|
return answer[DATA_KEY]
|
|
@@ -123,7 +124,7 @@ class ApiClient:
|
|
|
123
124
|
def _fetch_ids(self, asset: MetabaseAsset) -> IdsType:
|
|
124
125
|
ids: IdsType = []
|
|
125
126
|
results = self._call(endpoint=asset.name.lower())
|
|
126
|
-
for res in cast(
|
|
127
|
+
for res in cast(list, results):
|
|
127
128
|
assert isinstance(res, dict)
|
|
128
129
|
ids.append(res["id"])
|
|
129
130
|
return ids
|
|
@@ -135,7 +136,7 @@ class ApiClient:
|
|
|
135
136
|
if not collection:
|
|
136
137
|
continue
|
|
137
138
|
|
|
138
|
-
seen_dashboard_ids:
|
|
139
|
+
seen_dashboard_ids: set[int] = set()
|
|
139
140
|
|
|
140
141
|
for dashboard in cast(SerializedAsset, collection):
|
|
141
142
|
if dashboard.get("model") != "dashboard":
|
|
@@ -149,7 +150,7 @@ class ApiClient:
|
|
|
149
150
|
|
|
150
151
|
if dashboard_id not in seen_dashboard_ids:
|
|
151
152
|
seen_dashboard_ids.add(dashboard_id)
|
|
152
|
-
yield cast(
|
|
153
|
+
yield cast(dict, self._call(f"dashboard/{dashboard_id}"))
|
|
153
154
|
|
|
154
155
|
@staticmethod
|
|
155
156
|
def _collection_specifics(collections: SerializedAsset) -> SerializedAsset:
|
|
@@ -170,7 +171,7 @@ class ApiClient:
|
|
|
170
171
|
return databases
|
|
171
172
|
|
|
172
173
|
@staticmethod
|
|
173
|
-
def _dashboard_cards(dashboards: SerializedAsset) -> Iterator[
|
|
174
|
+
def _dashboard_cards(dashboards: SerializedAsset) -> Iterator[dict]:
|
|
174
175
|
for d in dashboards:
|
|
175
176
|
d_cards = d.get(CARDS_KEY) or d.get(CARDS_KEY_DEPRECATED) or []
|
|
176
177
|
yield from d_cards
|
|
@@ -186,7 +187,7 @@ class ApiClient:
|
|
|
186
187
|
|
|
187
188
|
else:
|
|
188
189
|
answer = self._call(asset.name.lower())
|
|
189
|
-
assets = cast(
|
|
190
|
+
assets = cast(list, answer)
|
|
190
191
|
|
|
191
192
|
if asset == MetabaseAsset.DATABASE:
|
|
192
193
|
assets = self._database_specifics(assets)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -20,7 +21,7 @@ ClientMetabase = Union[DbClient, ApiClient]
|
|
|
20
21
|
|
|
21
22
|
def iterate_all_data(
|
|
22
23
|
client: ClientMetabase,
|
|
23
|
-
) -> Iterable[
|
|
24
|
+
) -> Iterable[tuple[MetabaseAsset, list]]:
|
|
24
25
|
"""Iterate over the extracted Data From metabase"""
|
|
25
26
|
|
|
26
27
|
yield MetabaseAsset.USER, deep_serialize(client.fetch(MetabaseAsset.USER))
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, cast
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
from requests.auth import HTTPBasicAuth
|
|
@@ -29,8 +29,8 @@ logger = logging.getLogger(__name__)
|
|
|
29
29
|
|
|
30
30
|
URL_TEMPLATE = "{host}/api"
|
|
31
31
|
|
|
32
|
-
RawData =
|
|
33
|
-
Tokens = Optional[
|
|
32
|
+
RawData = list[dict]
|
|
33
|
+
Tokens = Optional[list[str]]
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class Client:
|
|
@@ -76,8 +76,8 @@ class Client:
|
|
|
76
76
|
|
|
77
77
|
# most of calls return data in ["_embedded"]["resource_name"] node
|
|
78
78
|
try:
|
|
79
|
-
embedded = cast(
|
|
80
|
-
return cast(
|
|
79
|
+
embedded = cast(dict, result["_embedded"])
|
|
80
|
+
return cast(list, embedded[resource_name])
|
|
81
81
|
except (ValueError, KeyError):
|
|
82
82
|
raise UnexpectedApiResponseError(resource_name, result)
|
|
83
83
|
|
|
@@ -168,7 +168,7 @@ class Client:
|
|
|
168
168
|
resource_name=mb["member_username"],
|
|
169
169
|
with_workspace=False,
|
|
170
170
|
)
|
|
171
|
-
members.append(cast(
|
|
171
|
+
members.append(cast(dict, result))
|
|
172
172
|
return members
|
|
173
173
|
|
|
174
174
|
@staticmethod
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
18
|
|
|
19
19
|
def iterate_all_data(
|
|
20
20
|
client: Client,
|
|
21
|
-
) -> Iterable[
|
|
21
|
+
) -> Iterable[tuple[Asset, list]]:
|
|
22
22
|
"""Iterate over the extracted Data From Mode Analytics"""
|
|
23
23
|
|
|
24
24
|
datasources = client.fetch(Asset.DATASOURCE)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Set
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset, classproperty
|
|
4
2
|
|
|
5
3
|
|
|
@@ -18,7 +16,7 @@ class PowerBiAsset(ExternalAsset):
|
|
|
18
16
|
USERS = "users"
|
|
19
17
|
|
|
20
18
|
@classproperty
|
|
21
|
-
def optional(cls) ->
|
|
19
|
+
def optional(cls) -> set["PowerBiAsset"]:
|
|
22
20
|
return {
|
|
23
21
|
PowerBiAsset.DATASET_FIELDS,
|
|
24
22
|
PowerBiAsset.PAGES,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from datetime import date
|
|
3
4
|
from functools import partial
|
|
4
5
|
from time import sleep
|
|
5
|
-
from typing import
|
|
6
|
+
from typing import Optional, Union
|
|
6
7
|
|
|
7
8
|
import requests
|
|
8
9
|
from requests import HTTPError
|
|
@@ -40,7 +41,7 @@ class PowerbiClient(APIClient):
|
|
|
40
41
|
timeout=POWERBI_DEFAULT_TIMEOUT_S,
|
|
41
42
|
)
|
|
42
43
|
|
|
43
|
-
def _activity_events(self, day: Optional[date] = None) -> Iterator[
|
|
44
|
+
def _activity_events(self, day: Optional[date] = None) -> Iterator[dict]:
|
|
44
45
|
"""
|
|
45
46
|
Returns a list of activity events for the organization.
|
|
46
47
|
https://learn.microsoft.com/en-us/power-bi/admin/service-admin-auditing#activityevents-rest-api
|
|
@@ -52,21 +53,21 @@ class PowerbiClient(APIClient):
|
|
|
52
53
|
)
|
|
53
54
|
yield from fetch_all_pages(request, PowerBiPagination)
|
|
54
55
|
|
|
55
|
-
def _datasets(self) -> Iterator[
|
|
56
|
+
def _datasets(self) -> Iterator[dict]:
|
|
56
57
|
"""
|
|
57
58
|
Returns a list of datasets for the organization.
|
|
58
59
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
|
|
59
60
|
"""
|
|
60
61
|
yield from self._get(PowerBiEndpointFactory.datasets())[Keys.VALUE]
|
|
61
62
|
|
|
62
|
-
def _dashboards(self) -> Iterator[
|
|
63
|
+
def _dashboards(self) -> Iterator[dict]:
|
|
63
64
|
"""
|
|
64
65
|
Returns a list of dashboards for the organization.
|
|
65
66
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
|
|
66
67
|
"""
|
|
67
68
|
yield from self._get(PowerBiEndpointFactory.dashboards())[Keys.VALUE]
|
|
68
69
|
|
|
69
|
-
def _reports(self) -> Iterator[
|
|
70
|
+
def _reports(self) -> Iterator[dict]:
|
|
70
71
|
"""
|
|
71
72
|
Returns a list of reports for the organization.
|
|
72
73
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
|
|
@@ -87,12 +88,12 @@ class PowerbiClient(APIClient):
|
|
|
87
88
|
|
|
88
89
|
return reports
|
|
89
90
|
|
|
90
|
-
def _workspace_ids(self) ->
|
|
91
|
+
def _workspace_ids(self) -> list[str]:
|
|
91
92
|
"""
|
|
92
93
|
Get workspaces ids from powerBI admin API.
|
|
93
94
|
more: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-modified-workspaces
|
|
94
95
|
"""
|
|
95
|
-
params:
|
|
96
|
+
params: dict[str, Union[bool, str]] = {
|
|
96
97
|
Keys.INACTIVE_WORKSPACES: True,
|
|
97
98
|
Keys.PERSONAL_WORKSPACES: True,
|
|
98
99
|
}
|
|
@@ -104,7 +105,7 @@ class PowerbiClient(APIClient):
|
|
|
104
105
|
|
|
105
106
|
return [x[Keys.ID] for x in response]
|
|
106
107
|
|
|
107
|
-
def _get_scan_result(self, scan_id: int) -> Iterator[
|
|
108
|
+
def _get_scan_result(self, scan_id: int) -> Iterator[dict]:
|
|
108
109
|
endpoint = PowerBiEndpointFactory.metadata_scan_result(scan_id)
|
|
109
110
|
yield from self._get(endpoint)[Keys.WORKSPACES]
|
|
110
111
|
|
|
@@ -136,7 +137,7 @@ class PowerbiClient(APIClient):
|
|
|
136
137
|
logger.warning(f"Scan {scan_id} timed out")
|
|
137
138
|
return False
|
|
138
139
|
|
|
139
|
-
def _create_scan(self, workspaces_ids:
|
|
140
|
+
def _create_scan(self, workspaces_ids: list[str]) -> int:
|
|
140
141
|
"""
|
|
141
142
|
Tells the Power BI API to start an asynchronous metadata scan.
|
|
142
143
|
Returns the scan's ID.
|
|
@@ -157,7 +158,7 @@ class PowerbiClient(APIClient):
|
|
|
157
158
|
)
|
|
158
159
|
return scan_id[Keys.ID]
|
|
159
160
|
|
|
160
|
-
def _metadata(self) -> Iterator[
|
|
161
|
+
def _metadata(self) -> Iterator[dict]:
|
|
161
162
|
"""
|
|
162
163
|
Fetch metadata by workspace. The metadata scanning is asynchronous and
|
|
163
164
|
requires the following steps:
|
|
@@ -183,7 +184,7 @@ class PowerbiClient(APIClient):
|
|
|
183
184
|
asset: PowerBiAsset,
|
|
184
185
|
*,
|
|
185
186
|
day: Optional[date] = None,
|
|
186
|
-
) -> Iterator[
|
|
187
|
+
) -> Iterator[dict]:
|
|
187
188
|
"""
|
|
188
189
|
Given a PowerBi asset, returns the corresponding data using the
|
|
189
190
|
appropriate client.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, field_validator
|
|
4
4
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
@@ -19,9 +19,9 @@ class PowerbiCredentials(BaseSettings):
|
|
|
19
19
|
client_id: str
|
|
20
20
|
tenant_id: str
|
|
21
21
|
secret: str = Field(repr=False)
|
|
22
|
-
scopes:
|
|
22
|
+
scopes: list[str] = [DEFAULT_SCOPE]
|
|
23
23
|
|
|
24
24
|
@field_validator("scopes", mode="before")
|
|
25
25
|
@classmethod
|
|
26
|
-
def _check_scopes(cls, scopes: Optional[
|
|
26
|
+
def _check_scopes(cls, scopes: Optional[list[str]]) -> list[str]:
|
|
27
27
|
return scopes if scopes is not None else [DEFAULT_SCOPE]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import date, datetime
|
|
2
|
-
from typing import Optional
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
from ....utils import at_midnight, format_date, yesterday
|
|
5
5
|
|
|
@@ -7,7 +7,7 @@ _CLIENT_APP_BASE = "https://login.microsoftonline.com"
|
|
|
7
7
|
_REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def _time_filter(day: Optional[date]) ->
|
|
10
|
+
def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
|
|
11
11
|
target_day = day or yesterday()
|
|
12
12
|
start = at_midnight(target_day)
|
|
13
13
|
end = datetime.combine(target_day, datetime.max.time())
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: PowerbiClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[PowerBiAsset, Union[list, dict]]]:
|
|
22
23
|
for asset in PowerBiAsset:
|
|
23
24
|
if asset in METADATA_ASSETS + REPORTS_ASSETS:
|
|
24
25
|
continue
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Tuple
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset
|
|
4
2
|
|
|
5
3
|
|
|
@@ -14,7 +12,7 @@ class QlikAsset(ExternalAsset):
|
|
|
14
12
|
CONNECTIONS = "connections"
|
|
15
13
|
|
|
16
14
|
|
|
17
|
-
EXPORTED_FIELDS:
|
|
15
|
+
EXPORTED_FIELDS: dict[QlikAsset, tuple[str, ...]] = {
|
|
18
16
|
QlikAsset.SPACES: (
|
|
19
17
|
"id",
|
|
20
18
|
"type",
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
from ..assets import QlikAsset
|
|
4
2
|
|
|
5
3
|
APP_EXTERNAL_ID_KEY = "resourceId"
|
|
@@ -17,7 +15,7 @@ RESPONSE_DICT_EXPECTED_MSG = _RESPONSE_BASE_EXPECTED_MSG.format(type="dict")
|
|
|
17
15
|
RESPONSE_LIST_EXPECTED_MSG = _RESPONSE_BASE_EXPECTED_MSG.format(type="list")
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
ASSET_PATHS:
|
|
18
|
+
ASSET_PATHS: dict[QlikAsset, str] = {
|
|
21
19
|
QlikAsset.CONNECTIONS: "data-connections",
|
|
22
20
|
QlikAsset.SPACES: "spaces",
|
|
23
21
|
QlikAsset.USERS: "users",
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Type
|
|
2
|
-
|
|
3
1
|
from .constants import (
|
|
4
2
|
ACCESS_DENIED_ERROR_CODE,
|
|
5
3
|
APP_SIZE_EXCEEDED_ERROR_CODE,
|
|
@@ -48,7 +46,7 @@ class QlikResponseKeyError(Exception):
|
|
|
48
46
|
"""
|
|
49
47
|
|
|
50
48
|
|
|
51
|
-
ERROR_CODE_MAPPING:
|
|
49
|
+
ERROR_CODE_MAPPING: dict[int, type[JsonRpcError]] = {
|
|
52
50
|
ACCESS_DENIED_ERROR_CODE: AccessDeniedError,
|
|
53
51
|
APP_SIZE_EXCEEDED_ERROR_CODE: AppSizeExceededError,
|
|
54
52
|
PERSISTENCE_READ_FAILED_ERROR_CODE: PersistenceReadFailedError,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Callable,
|
|
1
|
+
from typing import Callable, Optional
|
|
2
2
|
|
|
3
3
|
from tqdm import tqdm # type: ignore
|
|
4
4
|
|
|
@@ -7,7 +7,7 @@ from .constants import APP_EXTERNAL_ID_KEY, SCOPED_ASSETS
|
|
|
7
7
|
from .engine import EngineApiClient, QlikCredentials
|
|
8
8
|
from .rest import RestApiClient
|
|
9
9
|
|
|
10
|
-
ListedData =
|
|
10
|
+
ListedData = list[dict]
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MissingAppsScopeError(Exception):
|
|
@@ -54,7 +54,7 @@ class QlikMasterClient:
|
|
|
54
54
|
def __init__(
|
|
55
55
|
self,
|
|
56
56
|
credentials: QlikCredentials,
|
|
57
|
-
except_http_error_statuses: Optional[
|
|
57
|
+
except_http_error_statuses: Optional[list[int]] = None,
|
|
58
58
|
display_progress: bool = True,
|
|
59
59
|
):
|
|
60
60
|
self._server_url = credentials.base_url
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, Union
|
|
3
3
|
from urllib.parse import urljoin
|
|
4
4
|
|
|
5
5
|
import requests
|
|
@@ -20,7 +20,7 @@ from .engine import QlikCredentials
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
-
Response = Union[dict,
|
|
23
|
+
Response = Union[dict, list[dict]]
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def _session() -> requests.Session:
|
|
@@ -62,7 +62,7 @@ class RestApiClient:
|
|
|
62
62
|
def __init__(
|
|
63
63
|
self,
|
|
64
64
|
credentials: QlikCredentials,
|
|
65
|
-
except_http_error_statuses: Optional[
|
|
65
|
+
except_http_error_statuses: Optional[list[int]] = None,
|
|
66
66
|
):
|
|
67
67
|
self._server_url = credentials.base_url
|
|
68
68
|
self._api_key = credentials.api_key
|
|
@@ -111,10 +111,10 @@ class RestApiClient:
|
|
|
111
111
|
except HTTPError as error:
|
|
112
112
|
return self._handle_http_error(error)
|
|
113
113
|
|
|
114
|
-
def _pager(self, first_page_url: str) ->
|
|
114
|
+
def _pager(self, first_page_url: str) -> list[dict]:
|
|
115
115
|
current_page_url = first_page_url
|
|
116
116
|
|
|
117
|
-
data:
|
|
117
|
+
data: list[dict] = []
|
|
118
118
|
while current_page_url:
|
|
119
119
|
response = self._call(current_page_url)
|
|
120
120
|
if not response:
|
|
@@ -130,7 +130,7 @@ class RestApiClient:
|
|
|
130
130
|
current_page_url = next_page_url
|
|
131
131
|
return data
|
|
132
132
|
|
|
133
|
-
def get(self, asset: QlikAsset) ->
|
|
133
|
+
def get(self, asset: QlikAsset) -> list[dict]:
|
|
134
134
|
"""
|
|
135
135
|
Calls the route corresponding to the asset and returns the list of
|
|
136
136
|
corresponding data
|
|
@@ -144,7 +144,7 @@ class RestApiClient:
|
|
|
144
144
|
|
|
145
145
|
return [_filter_fields(row) for row in data]
|
|
146
146
|
|
|
147
|
-
def get_with_scope(self, asset: QlikAsset, app_id: str) ->
|
|
147
|
+
def get_with_scope(self, asset: QlikAsset, app_id: str) -> list[dict]:
|
|
148
148
|
"""
|
|
149
149
|
Calls the route corresponding to the asset scoped on an app_id and
|
|
150
150
|
returns the corresponding data
|
|
@@ -157,7 +157,7 @@ class RestApiClient:
|
|
|
157
157
|
assert isinstance(response, list), RESPONSE_LIST_EXPECTED_MSG
|
|
158
158
|
return response
|
|
159
159
|
|
|
160
|
-
def data_connections(self) ->
|
|
160
|
+
def data_connections(self) -> list[dict]:
|
|
161
161
|
"""
|
|
162
162
|
Returns the list of data Connections
|
|
163
163
|
|
|
@@ -165,7 +165,7 @@ class RestApiClient:
|
|
|
165
165
|
"""
|
|
166
166
|
return self.get(QlikAsset.CONNECTIONS)
|
|
167
167
|
|
|
168
|
-
def spaces(self) ->
|
|
168
|
+
def spaces(self) -> list[dict]:
|
|
169
169
|
"""
|
|
170
170
|
Returns the list of Spaces
|
|
171
171
|
|
|
@@ -173,7 +173,7 @@ class RestApiClient:
|
|
|
173
173
|
"""
|
|
174
174
|
return self.get(QlikAsset.SPACES)
|
|
175
175
|
|
|
176
|
-
def users(self) ->
|
|
176
|
+
def users(self) -> list[dict]:
|
|
177
177
|
"""
|
|
178
178
|
Returns the list of Users
|
|
179
179
|
|
|
@@ -181,7 +181,7 @@ class RestApiClient:
|
|
|
181
181
|
"""
|
|
182
182
|
return self.get(QlikAsset.USERS)
|
|
183
183
|
|
|
184
|
-
def apps(self) ->
|
|
184
|
+
def apps(self) -> list[dict]:
|
|
185
185
|
"""
|
|
186
186
|
Returns the list of Apps
|
|
187
187
|
|
|
@@ -189,7 +189,7 @@ class RestApiClient:
|
|
|
189
189
|
"""
|
|
190
190
|
return self.get(QlikAsset.APPS)
|
|
191
191
|
|
|
192
|
-
def data_lineage(self, app_id: str) ->
|
|
192
|
+
def data_lineage(self, app_id: str) -> list[dict]:
|
|
193
193
|
"""
|
|
194
194
|
Returns the data lineage for a given source
|
|
195
195
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Optional, Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: QlikClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[QlikAsset, Union[list, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Qlik"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting CONNECTIONS from REST API")
|
|
@@ -47,7 +48,7 @@ def iterate_all_data(
|
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
def extract_all(
|
|
50
|
-
except_http_error_statuses: Optional[
|
|
51
|
+
except_http_error_statuses: Optional[list[int]] = None, **kwargs
|
|
51
52
|
) -> None:
|
|
52
53
|
"""
|
|
53
54
|
Extract data from Qlik REST API
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
from ....utils import build_url
|
|
5
6
|
from ....utils.salesforce import SalesforceBaseClient
|
|
@@ -49,7 +50,7 @@ class SalesforceReportingClient(SalesforceBaseClient):
|
|
|
49
50
|
url = self._get_asset_url(asset_type, asset)
|
|
50
51
|
yield {**asset, "Url": url}
|
|
51
52
|
|
|
52
|
-
def fetch(self, asset: SalesforceReportingAsset) ->
|
|
53
|
+
def fetch(self, asset: SalesforceReportingAsset) -> list[dict]:
|
|
53
54
|
"""
|
|
54
55
|
Fetch Salesforce Reporting assets
|
|
55
56
|
"""
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -19,7 +20,7 @@ logger = logging.getLogger(__name__)
|
|
|
19
20
|
|
|
20
21
|
def iterate_all_data(
|
|
21
22
|
client: SalesforceReportingClient,
|
|
22
|
-
) -> Iterable[
|
|
23
|
+
) -> Iterable[tuple[str, Union[list, dict]]]:
|
|
23
24
|
"""Iterate over the extracted data from Salesforce"""
|
|
24
25
|
|
|
25
26
|
for asset in SalesforceReportingAsset:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
1
2
|
from functools import partial
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import Callable,
|
|
4
|
+
from typing import Callable, Optional
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
|
|
@@ -19,7 +20,7 @@ from .pagination import SIGMA_API_LIMIT, SigmaPagination
|
|
|
19
20
|
|
|
20
21
|
_CONTENT_TYPE = "application/x-www-form-urlencoded"
|
|
21
22
|
|
|
22
|
-
_DATA_ELEMENTS:
|
|
23
|
+
_DATA_ELEMENTS: tuple[str, ...] = (
|
|
23
24
|
"input-table",
|
|
24
25
|
"pivot-table",
|
|
25
26
|
"table",
|
|
@@ -49,7 +50,7 @@ SIGMA_SAFE_MODE = RequestSafeMode(
|
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
class SigmaBearerAuth(BearerAuth):
|
|
52
|
-
def __init__(self, host: str, token_payload:
|
|
53
|
+
def __init__(self, host: str, token_payload: dict[str, str]):
|
|
53
54
|
auth_endpoint = SigmaEndpointFactory.authentication()
|
|
54
55
|
self.authentication_url = build_url(host, auth_endpoint)
|
|
55
56
|
self.token_payload = token_payload
|
|
@@ -118,7 +119,7 @@ class SigmaClient(APIClient):
|
|
|
118
119
|
"page_id": page_id,
|
|
119
120
|
}
|
|
120
121
|
|
|
121
|
-
def _get_all_elements(self, workbooks:
|
|
122
|
+
def _get_all_elements(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
122
123
|
for workbook in workbooks:
|
|
123
124
|
workbook_id = workbook["workbookId"]
|
|
124
125
|
|
|
@@ -132,7 +133,7 @@ class SigmaClient(APIClient):
|
|
|
132
133
|
page=page, workbook_id=workbook_id
|
|
133
134
|
)
|
|
134
135
|
|
|
135
|
-
def _get_all_lineages(self, elements:
|
|
136
|
+
def _get_all_lineages(self, elements: list[dict]) -> Iterator[dict]:
|
|
136
137
|
for element in elements:
|
|
137
138
|
workbook_id = element["workbook_id"]
|
|
138
139
|
element_id = element["elementId"]
|
|
@@ -145,7 +146,7 @@ class SigmaClient(APIClient):
|
|
|
145
146
|
"element_id": element_id,
|
|
146
147
|
}
|
|
147
148
|
|
|
148
|
-
def _get_all_queries(self, workbooks:
|
|
149
|
+
def _get_all_queries(self, workbooks: list[dict]) -> Iterator[dict]:
|
|
149
150
|
for workbook in workbooks:
|
|
150
151
|
workbook_id = workbook["workbookId"]
|
|
151
152
|
request = self._get_paginated(
|
|
@@ -159,8 +160,8 @@ class SigmaClient(APIClient):
|
|
|
159
160
|
def fetch(
|
|
160
161
|
self,
|
|
161
162
|
asset: SigmaAsset,
|
|
162
|
-
workbooks: Optional[
|
|
163
|
-
elements: Optional[
|
|
163
|
+
workbooks: Optional[list[dict]] = None,
|
|
164
|
+
elements: Optional[list[dict]] = None,
|
|
164
165
|
) -> Iterator[dict]:
|
|
165
166
|
"""Returns the needed metadata for the queried asset"""
|
|
166
167
|
if asset == SigmaAsset.DATASETS:
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
from pydantic import Field
|
|
4
2
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
3
|
|
|
@@ -21,7 +19,7 @@ class SigmaCredentials(BaseSettings):
|
|
|
21
19
|
grant_type: str = "client_credentials"
|
|
22
20
|
|
|
23
21
|
@property
|
|
24
|
-
def token_payload(self) ->
|
|
22
|
+
def token_payload(self) -> dict[str, str]:
|
|
25
23
|
return {
|
|
26
24
|
"grant_type": self.grant_type,
|
|
27
25
|
"client_id": self.client_id,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: SigmaClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[SigmaAsset, Union[list, Iterator, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Sigma"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting DATASETS from API")
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import Set
|
|
3
2
|
|
|
4
3
|
from ...types import ExternalAsset, classproperty
|
|
5
4
|
|
|
@@ -24,7 +23,7 @@ class TableauAsset(ExternalAsset):
|
|
|
24
23
|
WORKBOOK_TO_DATASOURCE = "workbooks_to_datasource"
|
|
25
24
|
|
|
26
25
|
@classproperty
|
|
27
|
-
def optional(cls) ->
|
|
26
|
+
def optional(cls) -> set["TableauAsset"]:
|
|
28
27
|
return {
|
|
29
28
|
TableauAsset.DASHBOARD,
|
|
30
29
|
TableauAsset.DASHBOARD_SHEET,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import List
|
|
3
2
|
|
|
4
3
|
import tableauserverclient as TSC # type: ignore
|
|
5
4
|
|
|
@@ -43,7 +42,7 @@ class ApiClient:
|
|
|
43
42
|
self._page_size = PAGE_SIZE
|
|
44
43
|
self._server.version = TABLEAU_SERVER_VERSION
|
|
45
44
|
self._safe_mode = bool(kwargs.get("safe_mode"))
|
|
46
|
-
self.errors:
|
|
45
|
+
self.errors: list[str] = []
|
|
47
46
|
|
|
48
47
|
@staticmethod
|
|
49
48
|
def name() -> str:
|