castor-extractor 0.21.9__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/commands/__init__.py +0 -3
- castor_extractor/commands/file_check.py +1 -2
- castor_extractor/file_checker/column.py +5 -5
- castor_extractor/file_checker/file.py +7 -7
- castor_extractor/file_checker/file_test.py +2 -2
- castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
- castor_extractor/knowledge/confluence/client/client.py +2 -1
- castor_extractor/knowledge/confluence/extract.py +3 -2
- castor_extractor/knowledge/notion/client/client.py +3 -2
- castor_extractor/knowledge/notion/extract.py +3 -2
- castor_extractor/quality/soda/client/client.py +2 -1
- castor_extractor/quality/soda/client/pagination.py +1 -3
- castor_extractor/types.py +3 -3
- castor_extractor/uploader/env.py +2 -2
- castor_extractor/uploader/upload.py +4 -3
- castor_extractor/uploader/utils.py +1 -1
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/client/abstract.py +2 -1
- castor_extractor/utils/client/api/auth.py +2 -2
- castor_extractor/utils/client/api/auth_test.py +2 -2
- castor_extractor/utils/client/api/client.py +3 -3
- castor_extractor/utils/client/api/pagination.py +3 -2
- castor_extractor/utils/client/api/safe_request.py +5 -5
- castor_extractor/utils/collection.py +7 -11
- castor_extractor/utils/dbt/client.py +3 -3
- castor_extractor/utils/dbt/client_test.py +2 -2
- castor_extractor/utils/deprecate.py +1 -2
- castor_extractor/utils/files.py +5 -5
- castor_extractor/utils/formatter.py +5 -4
- castor_extractor/utils/json_stream_write.py +2 -1
- castor_extractor/utils/object.py +2 -1
- castor_extractor/utils/pager/pager.py +2 -4
- castor_extractor/utils/pager/pager_on_id.py +2 -1
- castor_extractor/utils/pager/pager_on_id_test.py +5 -5
- castor_extractor/utils/pager/pager_test.py +3 -3
- castor_extractor/utils/retry.py +4 -3
- castor_extractor/utils/retry_test.py +2 -3
- castor_extractor/utils/safe.py +3 -3
- castor_extractor/utils/salesforce/client.py +2 -1
- castor_extractor/utils/salesforce/credentials.py +1 -3
- castor_extractor/utils/store.py +2 -1
- castor_extractor/utils/string.py +2 -2
- castor_extractor/utils/string_test.py +1 -3
- castor_extractor/utils/time.py +4 -0
- castor_extractor/utils/time_test.py +8 -1
- castor_extractor/utils/type.py +3 -2
- castor_extractor/utils/validation.py +4 -4
- castor_extractor/utils/write.py +2 -2
- castor_extractor/visualization/domo/client/client.py +8 -7
- castor_extractor/visualization/domo/client/credentials.py +2 -2
- castor_extractor/visualization/domo/client/endpoints.py +2 -2
- castor_extractor/visualization/domo/extract.py +3 -2
- castor_extractor/visualization/looker/api/client.py +17 -16
- castor_extractor/visualization/looker/api/utils.py +2 -2
- castor_extractor/visualization/looker/assets.py +1 -3
- castor_extractor/visualization/looker/extract.py +4 -3
- castor_extractor/visualization/looker/fields.py +3 -3
- castor_extractor/visualization/looker/multithreading.py +3 -3
- castor_extractor/visualization/looker_studio/__init__.py +6 -0
- castor_extractor/visualization/looker_studio/assets.py +6 -0
- castor_extractor/visualization/looker_studio/client/__init__.py +3 -0
- castor_extractor/visualization/looker_studio/client/admin_sdk_client.py +90 -0
- castor_extractor/visualization/looker_studio/client/client.py +37 -0
- castor_extractor/visualization/looker_studio/client/credentials.py +20 -0
- castor_extractor/visualization/looker_studio/client/endpoints.py +18 -0
- castor_extractor/visualization/looker_studio/client/enums.py +8 -0
- castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py +102 -0
- castor_extractor/visualization/looker_studio/client/pagination.py +31 -0
- castor_extractor/visualization/looker_studio/client/scopes.py +6 -0
- castor_extractor/visualization/metabase/assets.py +1 -3
- castor_extractor/visualization/metabase/client/api/client.py +8 -7
- castor_extractor/visualization/metabase/extract.py +3 -2
- castor_extractor/visualization/metabase/types.py +1 -3
- castor_extractor/visualization/mode/client/client.py +6 -6
- castor_extractor/visualization/mode/extract.py +2 -2
- castor_extractor/visualization/powerbi/assets.py +1 -3
- castor_extractor/visualization/powerbi/client/client.py +12 -11
- castor_extractor/visualization/powerbi/client/credentials.py +3 -3
- castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +3 -2
- castor_extractor/visualization/qlik/assets.py +1 -3
- castor_extractor/visualization/qlik/client/constants.py +1 -3
- castor_extractor/visualization/qlik/client/engine/error.py +1 -3
- castor_extractor/visualization/qlik/client/master.py +3 -3
- castor_extractor/visualization/qlik/client/rest.py +12 -12
- castor_extractor/visualization/qlik/extract.py +4 -3
- castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
- castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
- castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
- castor_extractor/visualization/sigma/client/client.py +9 -8
- castor_extractor/visualization/sigma/client/credentials.py +1 -3
- castor_extractor/visualization/sigma/extract.py +3 -2
- castor_extractor/visualization/tableau/assets.py +1 -2
- castor_extractor/visualization/tableau/client/client.py +1 -2
- castor_extractor/visualization/tableau/client/client_utils.py +3 -2
- castor_extractor/visualization/tableau/client/credentials.py +3 -3
- castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
- castor_extractor/visualization/tableau/extract.py +2 -2
- castor_extractor/visualization/tableau/gql_fields.py +3 -3
- castor_extractor/visualization/tableau/tsc_fields.py +1 -2
- castor_extractor/visualization/tableau/types.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
- castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
- castor_extractor/visualization/tableau_revamp/extract.py +2 -2
- castor_extractor/visualization/thoughtspot/client/client.py +3 -2
- castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
- castor_extractor/visualization/thoughtspot/extract.py +3 -2
- castor_extractor/warehouse/abstract/asset.py +4 -5
- castor_extractor/warehouse/abstract/extract.py +4 -3
- castor_extractor/warehouse/abstract/query.py +4 -4
- castor_extractor/warehouse/bigquery/client.py +8 -8
- castor_extractor/warehouse/bigquery/extract.py +1 -1
- castor_extractor/warehouse/bigquery/query.py +2 -2
- castor_extractor/warehouse/bigquery/types.py +2 -4
- castor_extractor/warehouse/databricks/api_client.py +15 -14
- castor_extractor/warehouse/databricks/client.py +16 -16
- castor_extractor/warehouse/databricks/extract.py +4 -4
- castor_extractor/warehouse/databricks/format.py +12 -12
- castor_extractor/warehouse/databricks/lineage.py +11 -11
- castor_extractor/warehouse/databricks/pagination.py +2 -2
- castor_extractor/warehouse/databricks/types.py +4 -4
- castor_extractor/warehouse/databricks/utils.py +5 -4
- castor_extractor/warehouse/mysql/query.py +2 -2
- castor_extractor/warehouse/postgres/query.py +2 -2
- castor_extractor/warehouse/redshift/client.py +1 -1
- castor_extractor/warehouse/redshift/query.py +2 -2
- castor_extractor/warehouse/salesforce/client.py +8 -8
- castor_extractor/warehouse/salesforce/extract.py +3 -4
- castor_extractor/warehouse/salesforce/format.py +19 -11
- castor_extractor/warehouse/salesforce/format_test.py +24 -10
- castor_extractor/warehouse/snowflake/query.py +5 -5
- castor_extractor/warehouse/sqlserver/client.py +1 -1
- castor_extractor/warehouse/sqlserver/query.py +2 -2
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/METADATA +13 -6
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/RECORD +142 -131
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/LICENCE +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/WHEEL +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from typing import Iterator, Optional
|
|
2
|
+
|
|
3
|
+
from google.oauth2.service_account import Credentials
|
|
4
|
+
from googleapiclient import discovery # type: ignore
|
|
5
|
+
|
|
6
|
+
from ....utils import (
|
|
7
|
+
at_midnight,
|
|
8
|
+
current_date,
|
|
9
|
+
fetch_all_pages,
|
|
10
|
+
format_rfc_3339_date,
|
|
11
|
+
past_date,
|
|
12
|
+
)
|
|
13
|
+
from .credentials import LookerStudioCredentials
|
|
14
|
+
from .pagination import LookerStudioPagination
|
|
15
|
+
from .scopes import SCOPES
|
|
16
|
+
|
|
17
|
+
USER_EMAIL_FIELD = "primaryEmail"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AdminSDKClient:
|
|
21
|
+
"""
|
|
22
|
+
Client to call the Report API and Directory API.
|
|
23
|
+
The service account must impersonate and admin account.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, credentials: LookerStudioCredentials):
|
|
27
|
+
self._credentials = Credentials.from_service_account_info(
|
|
28
|
+
credentials.model_dump(),
|
|
29
|
+
scopes=SCOPES,
|
|
30
|
+
subject=credentials.admin_email, # impersonates an admin
|
|
31
|
+
)
|
|
32
|
+
self.directory_api = discovery.build(
|
|
33
|
+
"admin", "directory_v1", credentials=self._credentials
|
|
34
|
+
)
|
|
35
|
+
self.report_api = discovery.build(
|
|
36
|
+
"admin", "reports_v1", credentials=self._credentials
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def list_users(self) -> Iterator[dict]:
|
|
40
|
+
"""
|
|
41
|
+
Lists all users in the domain; only the primaryEmail field is selected.
|
|
42
|
+
Note:
|
|
43
|
+
* `my_customer` is an alias to represent the account's `customerId`
|
|
44
|
+
* `domain_public` allows non-admins to list users. This is technically
|
|
45
|
+
not necessary here because an admin account is impersonated, but it
|
|
46
|
+
avoids tapping into unnecessary data & serves for future reference.
|
|
47
|
+
See
|
|
48
|
+
https://googleapis.github.io/google-api-python-client/docs/dyn/admin_directory_v1.users.html#list
|
|
49
|
+
https://developers.google.com/admin-sdk/directory/reference/rest/v1/users/list
|
|
50
|
+
https://developers.google.com/admin-sdk/directory/v1/guides/manage-users#retrieve_users_non_admin
|
|
51
|
+
https://stackoverflow.com/a/71083443/14448410
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def _users(pagination_params: Optional[dict] = None) -> dict:
|
|
55
|
+
parameters = {
|
|
56
|
+
"viewType": "domain_public",
|
|
57
|
+
"customer": "my_customer",
|
|
58
|
+
"fields": f"users({USER_EMAIL_FIELD}), nextPageToken",
|
|
59
|
+
**(pagination_params or {}),
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return self.directory_api.users().list(**parameters).execute()
|
|
63
|
+
|
|
64
|
+
yield from fetch_all_pages(_users, LookerStudioPagination)
|
|
65
|
+
|
|
66
|
+
def list_view_events(self) -> Iterator[dict]:
|
|
67
|
+
"""
|
|
68
|
+
Lists all Data Studio View events of the past day.
|
|
69
|
+
See
|
|
70
|
+
https://googleapis.github.io/google-api-python-client/docs/dyn/admin_reports_v1.activities.html
|
|
71
|
+
https://developers.google.com/admin-sdk/reports/reference/rest/v1/activities/list
|
|
72
|
+
https://developers.google.com/admin-sdk/reports/v1/appendix/activity/data-studio#VIEW
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def _activity(pagination_params: Optional[dict] = None) -> dict:
|
|
76
|
+
yesterday = format_rfc_3339_date(at_midnight(past_date(1)))
|
|
77
|
+
today = format_rfc_3339_date(at_midnight(current_date()))
|
|
78
|
+
|
|
79
|
+
parameters = {
|
|
80
|
+
"userKey": "all",
|
|
81
|
+
"applicationName": "data_studio",
|
|
82
|
+
"eventName": "VIEW",
|
|
83
|
+
"startTime": yesterday,
|
|
84
|
+
"endTime": today,
|
|
85
|
+
**(pagination_params or {}),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return self.report_api.activities().list(**parameters).execute()
|
|
89
|
+
|
|
90
|
+
yield from fetch_all_pages(_activity, LookerStudioPagination)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Iterator
|
|
2
|
+
|
|
3
|
+
from .. import LookerStudioAsset
|
|
4
|
+
from .admin_sdk_client import USER_EMAIL_FIELD, AdminSDKClient
|
|
5
|
+
from .credentials import LookerStudioCredentials
|
|
6
|
+
from .looker_studio_api_client import LookerStudioAPIClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LookerStudioClient:
|
|
10
|
+
"""
|
|
11
|
+
Acts as a wrapper class to fetch Looker Studio assets, which requires
|
|
12
|
+
coordinating calls between the Admin SDK API and the Looker Studio API.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, credentials: LookerStudioCredentials):
|
|
16
|
+
self.admin_sdk_client = AdminSDKClient(credentials)
|
|
17
|
+
self.looker_studio_client = LookerStudioAPIClient(credentials)
|
|
18
|
+
|
|
19
|
+
def _get_assets(self) -> Iterator[dict]:
|
|
20
|
+
"""
|
|
21
|
+
Extracts reports and data sources user by user.
|
|
22
|
+
"""
|
|
23
|
+
users = self.admin_sdk_client.list_users()
|
|
24
|
+
|
|
25
|
+
for user in users:
|
|
26
|
+
email = user[USER_EMAIL_FIELD]
|
|
27
|
+
yield from self.looker_studio_client.fetch_user_assets(email)
|
|
28
|
+
|
|
29
|
+
def fetch(self, asset: LookerStudioAsset) -> Iterator[dict]:
|
|
30
|
+
if asset == LookerStudioAsset.VIEW_ACTIVITY:
|
|
31
|
+
yield from self.admin_sdk_client.list_view_events()
|
|
32
|
+
|
|
33
|
+
elif asset == LookerStudioAsset.ASSETS:
|
|
34
|
+
yield from self._get_assets()
|
|
35
|
+
|
|
36
|
+
else:
|
|
37
|
+
raise ValueError(f"The asset {asset}, is not supported")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from pydantic import BaseModel, SecretStr, field_serializer
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LookerStudioCredentials(BaseModel):
|
|
5
|
+
admin_email: str
|
|
6
|
+
auth_provider_x509_cert_url: str
|
|
7
|
+
auth_uri: str
|
|
8
|
+
client_email: str
|
|
9
|
+
client_id: str
|
|
10
|
+
client_x509_cert_url: str
|
|
11
|
+
private_key: SecretStr
|
|
12
|
+
private_key_id: str
|
|
13
|
+
project_id: str
|
|
14
|
+
token_uri: str
|
|
15
|
+
type: str
|
|
16
|
+
|
|
17
|
+
@field_serializer("private_key")
|
|
18
|
+
def dump_secret(self, pk):
|
|
19
|
+
"""When using model_dump, show private_key value"""
|
|
20
|
+
return pk.get_secret_value()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class LookerStudioAPIEndpoint:
|
|
2
|
+
BASE_PATH = "https://datastudio.googleapis.com"
|
|
3
|
+
|
|
4
|
+
@classmethod
|
|
5
|
+
def search(cls) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Search a user's assets.
|
|
8
|
+
See https://developers.google.com/looker-studio/integrate/api/reference/assets/search
|
|
9
|
+
"""
|
|
10
|
+
return f"{cls.BASE_PATH}/v1/assets:search"
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def permissions(cls, asset_name: str) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Get the permissions of an asset. The user must be the owner of the asset.
|
|
16
|
+
See https://developers.google.com/looker-studio/integrate/api/reference/permissions/get
|
|
17
|
+
"""
|
|
18
|
+
return f"{cls.BASE_PATH}/v1/assets/{asset_name}/permissions"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from functools import partial
|
|
2
|
+
from typing import Iterator, Optional
|
|
3
|
+
|
|
4
|
+
from google.auth.transport.requests import Request
|
|
5
|
+
from google.oauth2.service_account import Credentials
|
|
6
|
+
|
|
7
|
+
from ....utils import (
|
|
8
|
+
APIClient,
|
|
9
|
+
BearerAuth,
|
|
10
|
+
fetch_all_pages,
|
|
11
|
+
)
|
|
12
|
+
from .credentials import LookerStudioCredentials
|
|
13
|
+
from .endpoints import LookerStudioAPIEndpoint
|
|
14
|
+
from .enums import LookerStudioAssetType
|
|
15
|
+
from .pagination import LookerStudioPagination
|
|
16
|
+
from .scopes import SCOPES
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LookerStudioAPIAuth(BearerAuth):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
credentials: LookerStudioCredentials,
|
|
23
|
+
subject: Optional[str] = None,
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Instantiates the service account credentials.
|
|
27
|
+
If a `subject` email is passed, the service account will impersonate
|
|
28
|
+
that user and make requests on that user's behalf.
|
|
29
|
+
"""
|
|
30
|
+
self._credentials = Credentials.from_service_account_info(
|
|
31
|
+
credentials.model_dump(), scopes=SCOPES
|
|
32
|
+
)
|
|
33
|
+
if subject:
|
|
34
|
+
self._credentials = self._credentials.with_subject(subject)
|
|
35
|
+
|
|
36
|
+
def fetch_token(self):
|
|
37
|
+
self._credentials.refresh(Request())
|
|
38
|
+
return self._credentials.token
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LookerStudioAPIClient(APIClient):
|
|
42
|
+
def __init__(self, credentials: LookerStudioCredentials):
|
|
43
|
+
auth = LookerStudioAPIAuth(credentials=credentials)
|
|
44
|
+
super().__init__(auth=auth)
|
|
45
|
+
|
|
46
|
+
self._credentials = credentials
|
|
47
|
+
|
|
48
|
+
def _is_private_asset(self, asset_name: str) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Returns True if the asset is not viewable by anyone other than the owner.
|
|
51
|
+
|
|
52
|
+
The permissions dict contains `Role: Member[]` key-value pairs and has
|
|
53
|
+
at least one key-value pair to define the asset's unique OWNER.
|
|
54
|
+
If another key is present, it means the asset was shared with
|
|
55
|
+
another person or group.
|
|
56
|
+
|
|
57
|
+
See also https://developers.google.com/looker-studio/integrate/api/reference/types#Permissions
|
|
58
|
+
"""
|
|
59
|
+
data = self._get(LookerStudioAPIEndpoint.permissions(asset_name))
|
|
60
|
+
permissions = data["permissions"]
|
|
61
|
+
return len(permissions.keys()) == 1
|
|
62
|
+
|
|
63
|
+
def _user_assets(
|
|
64
|
+
self, asset_type: LookerStudioAssetType, user_email: str
|
|
65
|
+
) -> Iterator[dict]:
|
|
66
|
+
"""
|
|
67
|
+
Yields all assets of the given type, owned by the given user and visible
|
|
68
|
+
by other members.
|
|
69
|
+
"""
|
|
70
|
+
request = partial(
|
|
71
|
+
self._get,
|
|
72
|
+
LookerStudioAPIEndpoint.search(),
|
|
73
|
+
params={"assetTypes": [asset_type.value]},
|
|
74
|
+
)
|
|
75
|
+
assets = fetch_all_pages(request, LookerStudioPagination)
|
|
76
|
+
|
|
77
|
+
for asset in assets:
|
|
78
|
+
asset_name = asset["name"]
|
|
79
|
+
owner = asset["owner"]
|
|
80
|
+
if owner == user_email and not self._is_private_asset(asset_name):
|
|
81
|
+
yield asset
|
|
82
|
+
|
|
83
|
+
def _impersonate_user(self, user_email: str):
|
|
84
|
+
self._auth = LookerStudioAPIAuth(
|
|
85
|
+
credentials=self._credentials, subject=user_email
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def fetch_user_assets(self, user_email: str) -> Iterator[dict]:
|
|
89
|
+
"""Yields assets (reports and data sources) shared by the given user."""
|
|
90
|
+
self._impersonate_user(user_email)
|
|
91
|
+
|
|
92
|
+
reports = self._user_assets(
|
|
93
|
+
asset_type=LookerStudioAssetType.REPORT,
|
|
94
|
+
user_email=user_email,
|
|
95
|
+
)
|
|
96
|
+
data_sources = self._user_assets(
|
|
97
|
+
asset_type=LookerStudioAssetType.DATA_SOURCE,
|
|
98
|
+
user_email=user_email,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
yield from reports
|
|
102
|
+
yield from data_sources
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import AliasChoices, ConfigDict, Field
|
|
4
|
+
from pydantic.alias_generators import to_camel
|
|
5
|
+
|
|
6
|
+
from ....utils import PaginationModel
|
|
7
|
+
|
|
8
|
+
NEXT_PAGE_KEY = "pageToken"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LookerStudioPagination(PaginationModel):
|
|
12
|
+
items: list = Field(
|
|
13
|
+
default_factory=list,
|
|
14
|
+
validation_alias=AliasChoices("items", "users", "assets"),
|
|
15
|
+
)
|
|
16
|
+
next_page_token: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
model_config = ConfigDict(
|
|
19
|
+
alias_generator=to_camel,
|
|
20
|
+
populate_by_name=True,
|
|
21
|
+
from_attributes=True,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def is_last(self) -> bool:
|
|
25
|
+
return self.next_page_token is None
|
|
26
|
+
|
|
27
|
+
def next_page_payload(self) -> dict:
|
|
28
|
+
return {NEXT_PAGE_KEY: self.next_page_token}
|
|
29
|
+
|
|
30
|
+
def page_results(self) -> list:
|
|
31
|
+
return self.items
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Tuple
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset
|
|
4
2
|
|
|
5
3
|
|
|
@@ -15,7 +13,7 @@ class MetabaseAsset(ExternalAsset):
|
|
|
15
13
|
DASHBOARD_CARDS = "dashboard_cards"
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
EXPORTED_FIELDS:
|
|
16
|
+
EXPORTED_FIELDS: dict[MetabaseAsset, tuple[str, ...]] = {
|
|
19
17
|
MetabaseAsset.COLLECTION: (
|
|
20
18
|
"id",
|
|
21
19
|
"name",
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional, cast
|
|
4
5
|
|
|
5
6
|
import requests
|
|
6
7
|
from requests import HTTPError
|
|
@@ -78,7 +79,7 @@ class ApiClient:
|
|
|
78
79
|
@staticmethod
|
|
79
80
|
def _answer(response: Any):
|
|
80
81
|
answer = response
|
|
81
|
-
if isinstance(answer,
|
|
82
|
+
if isinstance(answer, dict) and DATA_KEY in answer:
|
|
82
83
|
# v0.41 of Metabase introduced embedded data for certain calls
|
|
83
84
|
# {'data': [{ }, ...] , 'total': 15, 'limit': None, 'offset': None}"
|
|
84
85
|
return answer[DATA_KEY]
|
|
@@ -123,7 +124,7 @@ class ApiClient:
|
|
|
123
124
|
def _fetch_ids(self, asset: MetabaseAsset) -> IdsType:
|
|
124
125
|
ids: IdsType = []
|
|
125
126
|
results = self._call(endpoint=asset.name.lower())
|
|
126
|
-
for res in cast(
|
|
127
|
+
for res in cast(list, results):
|
|
127
128
|
assert isinstance(res, dict)
|
|
128
129
|
ids.append(res["id"])
|
|
129
130
|
return ids
|
|
@@ -135,7 +136,7 @@ class ApiClient:
|
|
|
135
136
|
if not collection:
|
|
136
137
|
continue
|
|
137
138
|
|
|
138
|
-
seen_dashboard_ids:
|
|
139
|
+
seen_dashboard_ids: set[int] = set()
|
|
139
140
|
|
|
140
141
|
for dashboard in cast(SerializedAsset, collection):
|
|
141
142
|
if dashboard.get("model") != "dashboard":
|
|
@@ -149,7 +150,7 @@ class ApiClient:
|
|
|
149
150
|
|
|
150
151
|
if dashboard_id not in seen_dashboard_ids:
|
|
151
152
|
seen_dashboard_ids.add(dashboard_id)
|
|
152
|
-
yield cast(
|
|
153
|
+
yield cast(dict, self._call(f"dashboard/{dashboard_id}"))
|
|
153
154
|
|
|
154
155
|
@staticmethod
|
|
155
156
|
def _collection_specifics(collections: SerializedAsset) -> SerializedAsset:
|
|
@@ -170,7 +171,7 @@ class ApiClient:
|
|
|
170
171
|
return databases
|
|
171
172
|
|
|
172
173
|
@staticmethod
|
|
173
|
-
def _dashboard_cards(dashboards: SerializedAsset) -> Iterator[
|
|
174
|
+
def _dashboard_cards(dashboards: SerializedAsset) -> Iterator[dict]:
|
|
174
175
|
for d in dashboards:
|
|
175
176
|
d_cards = d.get(CARDS_KEY) or d.get(CARDS_KEY_DEPRECATED) or []
|
|
176
177
|
yield from d_cards
|
|
@@ -186,7 +187,7 @@ class ApiClient:
|
|
|
186
187
|
|
|
187
188
|
else:
|
|
188
189
|
answer = self._call(asset.name.lower())
|
|
189
|
-
assets = cast(
|
|
190
|
+
assets = cast(list, answer)
|
|
190
191
|
|
|
191
192
|
if asset == MetabaseAsset.DATABASE:
|
|
192
193
|
assets = self._database_specifics(assets)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -20,7 +21,7 @@ ClientMetabase = Union[DbClient, ApiClient]
|
|
|
20
21
|
|
|
21
22
|
def iterate_all_data(
|
|
22
23
|
client: ClientMetabase,
|
|
23
|
-
) -> Iterable[
|
|
24
|
+
) -> Iterable[tuple[MetabaseAsset, list]]:
|
|
24
25
|
"""Iterate over the extracted Data From metabase"""
|
|
25
26
|
|
|
26
27
|
yield MetabaseAsset.USER, deep_serialize(client.fetch(MetabaseAsset.USER))
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, cast
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
from requests.auth import HTTPBasicAuth
|
|
@@ -29,8 +29,8 @@ logger = logging.getLogger(__name__)
|
|
|
29
29
|
|
|
30
30
|
URL_TEMPLATE = "{host}/api"
|
|
31
31
|
|
|
32
|
-
RawData =
|
|
33
|
-
Tokens = Optional[
|
|
32
|
+
RawData = list[dict]
|
|
33
|
+
Tokens = Optional[list[str]]
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class Client:
|
|
@@ -76,8 +76,8 @@ class Client:
|
|
|
76
76
|
|
|
77
77
|
# most of calls return data in ["_embedded"]["resource_name"] node
|
|
78
78
|
try:
|
|
79
|
-
embedded = cast(
|
|
80
|
-
return cast(
|
|
79
|
+
embedded = cast(dict, result["_embedded"])
|
|
80
|
+
return cast(list, embedded[resource_name])
|
|
81
81
|
except (ValueError, KeyError):
|
|
82
82
|
raise UnexpectedApiResponseError(resource_name, result)
|
|
83
83
|
|
|
@@ -168,7 +168,7 @@ class Client:
|
|
|
168
168
|
resource_name=mb["member_username"],
|
|
169
169
|
with_workspace=False,
|
|
170
170
|
)
|
|
171
|
-
members.append(cast(
|
|
171
|
+
members.append(cast(dict, result))
|
|
172
172
|
return members
|
|
173
173
|
|
|
174
174
|
@staticmethod
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
3
|
|
|
4
4
|
from ...utils import (
|
|
5
5
|
OUTPUT_DIR,
|
|
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
18
|
|
|
19
19
|
def iterate_all_data(
|
|
20
20
|
client: Client,
|
|
21
|
-
) -> Iterable[
|
|
21
|
+
) -> Iterable[tuple[Asset, list]]:
|
|
22
22
|
"""Iterate over the extracted Data From Mode Analytics"""
|
|
23
23
|
|
|
24
24
|
datasources = client.fetch(Asset.DATASOURCE)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Set
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset, classproperty
|
|
4
2
|
|
|
5
3
|
|
|
@@ -18,7 +16,7 @@ class PowerBiAsset(ExternalAsset):
|
|
|
18
16
|
USERS = "users"
|
|
19
17
|
|
|
20
18
|
@classproperty
|
|
21
|
-
def optional(cls) ->
|
|
19
|
+
def optional(cls) -> set["PowerBiAsset"]:
|
|
22
20
|
return {
|
|
23
21
|
PowerBiAsset.DATASET_FIELDS,
|
|
24
22
|
PowerBiAsset.PAGES,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from datetime import date
|
|
3
4
|
from functools import partial
|
|
4
5
|
from time import sleep
|
|
5
|
-
from typing import
|
|
6
|
+
from typing import Optional, Union
|
|
6
7
|
|
|
7
8
|
import requests
|
|
8
9
|
from requests import HTTPError
|
|
@@ -40,7 +41,7 @@ class PowerbiClient(APIClient):
|
|
|
40
41
|
timeout=POWERBI_DEFAULT_TIMEOUT_S,
|
|
41
42
|
)
|
|
42
43
|
|
|
43
|
-
def _activity_events(self, day: Optional[date] = None) -> Iterator[
|
|
44
|
+
def _activity_events(self, day: Optional[date] = None) -> Iterator[dict]:
|
|
44
45
|
"""
|
|
45
46
|
Returns a list of activity events for the organization.
|
|
46
47
|
https://learn.microsoft.com/en-us/power-bi/admin/service-admin-auditing#activityevents-rest-api
|
|
@@ -52,21 +53,21 @@ class PowerbiClient(APIClient):
|
|
|
52
53
|
)
|
|
53
54
|
yield from fetch_all_pages(request, PowerBiPagination)
|
|
54
55
|
|
|
55
|
-
def _datasets(self) -> Iterator[
|
|
56
|
+
def _datasets(self) -> Iterator[dict]:
|
|
56
57
|
"""
|
|
57
58
|
Returns a list of datasets for the organization.
|
|
58
59
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin
|
|
59
60
|
"""
|
|
60
61
|
yield from self._get(PowerBiEndpointFactory.datasets())[Keys.VALUE]
|
|
61
62
|
|
|
62
|
-
def _dashboards(self) -> Iterator[
|
|
63
|
+
def _dashboards(self) -> Iterator[dict]:
|
|
63
64
|
"""
|
|
64
65
|
Returns a list of dashboards for the organization.
|
|
65
66
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/dashboards-get-dashboards-as-admin
|
|
66
67
|
"""
|
|
67
68
|
yield from self._get(PowerBiEndpointFactory.dashboards())[Keys.VALUE]
|
|
68
69
|
|
|
69
|
-
def _reports(self) -> Iterator[
|
|
70
|
+
def _reports(self) -> Iterator[dict]:
|
|
70
71
|
"""
|
|
71
72
|
Returns a list of reports for the organization.
|
|
72
73
|
https://learn.microsoft.com/en-us/rest/api/power-bi/admin/reports-get-reports-as-admin
|
|
@@ -87,12 +88,12 @@ class PowerbiClient(APIClient):
|
|
|
87
88
|
|
|
88
89
|
return reports
|
|
89
90
|
|
|
90
|
-
def _workspace_ids(self) ->
|
|
91
|
+
def _workspace_ids(self) -> list[str]:
|
|
91
92
|
"""
|
|
92
93
|
Get workspaces ids from powerBI admin API.
|
|
93
94
|
more: https://learn.microsoft.com/en-us/rest/api/power-bi/admin/workspace-info-get-modified-workspaces
|
|
94
95
|
"""
|
|
95
|
-
params:
|
|
96
|
+
params: dict[str, Union[bool, str]] = {
|
|
96
97
|
Keys.INACTIVE_WORKSPACES: True,
|
|
97
98
|
Keys.PERSONAL_WORKSPACES: True,
|
|
98
99
|
}
|
|
@@ -104,7 +105,7 @@ class PowerbiClient(APIClient):
|
|
|
104
105
|
|
|
105
106
|
return [x[Keys.ID] for x in response]
|
|
106
107
|
|
|
107
|
-
def _get_scan_result(self, scan_id: int) -> Iterator[
|
|
108
|
+
def _get_scan_result(self, scan_id: int) -> Iterator[dict]:
|
|
108
109
|
endpoint = PowerBiEndpointFactory.metadata_scan_result(scan_id)
|
|
109
110
|
yield from self._get(endpoint)[Keys.WORKSPACES]
|
|
110
111
|
|
|
@@ -136,7 +137,7 @@ class PowerbiClient(APIClient):
|
|
|
136
137
|
logger.warning(f"Scan {scan_id} timed out")
|
|
137
138
|
return False
|
|
138
139
|
|
|
139
|
-
def _create_scan(self, workspaces_ids:
|
|
140
|
+
def _create_scan(self, workspaces_ids: list[str]) -> int:
|
|
140
141
|
"""
|
|
141
142
|
Tells the Power BI API to start an asynchronous metadata scan.
|
|
142
143
|
Returns the scan's ID.
|
|
@@ -157,7 +158,7 @@ class PowerbiClient(APIClient):
|
|
|
157
158
|
)
|
|
158
159
|
return scan_id[Keys.ID]
|
|
159
160
|
|
|
160
|
-
def _metadata(self) -> Iterator[
|
|
161
|
+
def _metadata(self) -> Iterator[dict]:
|
|
161
162
|
"""
|
|
162
163
|
Fetch metadata by workspace. The metadata scanning is asynchronous and
|
|
163
164
|
requires the following steps:
|
|
@@ -183,7 +184,7 @@ class PowerbiClient(APIClient):
|
|
|
183
184
|
asset: PowerBiAsset,
|
|
184
185
|
*,
|
|
185
186
|
day: Optional[date] = None,
|
|
186
|
-
) -> Iterator[
|
|
187
|
+
) -> Iterator[dict]:
|
|
187
188
|
"""
|
|
188
189
|
Given a PowerBi asset, returns the corresponding data using the
|
|
189
190
|
appropriate client.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, field_validator
|
|
4
4
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
@@ -19,9 +19,9 @@ class PowerbiCredentials(BaseSettings):
|
|
|
19
19
|
client_id: str
|
|
20
20
|
tenant_id: str
|
|
21
21
|
secret: str = Field(repr=False)
|
|
22
|
-
scopes:
|
|
22
|
+
scopes: list[str] = [DEFAULT_SCOPE]
|
|
23
23
|
|
|
24
24
|
@field_validator("scopes", mode="before")
|
|
25
25
|
@classmethod
|
|
26
|
-
def _check_scopes(cls, scopes: Optional[
|
|
26
|
+
def _check_scopes(cls, scopes: Optional[list[str]]) -> list[str]:
|
|
27
27
|
return scopes if scopes is not None else [DEFAULT_SCOPE]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import date, datetime
|
|
2
|
-
from typing import Optional
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
from ....utils import at_midnight, format_date, yesterday
|
|
5
5
|
|
|
@@ -7,7 +7,7 @@ _CLIENT_APP_BASE = "https://login.microsoftonline.com"
|
|
|
7
7
|
_REST_API_BASE_PATH = "https://api.powerbi.com/v1.0/myorg"
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def _time_filter(day: Optional[date]) ->
|
|
10
|
+
def _time_filter(day: Optional[date]) -> tuple[datetime, datetime]:
|
|
11
11
|
target_day = day or yesterday()
|
|
12
12
|
start = at_midnight(target_day)
|
|
13
13
|
end = datetime.combine(target_day, datetime.max.time())
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: PowerbiClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[PowerBiAsset, Union[list, dict]]]:
|
|
22
23
|
for asset in PowerBiAsset:
|
|
23
24
|
if asset in METADATA_ASSETS + REPORTS_ASSETS:
|
|
24
25
|
continue
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Tuple
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset
|
|
4
2
|
|
|
5
3
|
|
|
@@ -14,7 +12,7 @@ class QlikAsset(ExternalAsset):
|
|
|
14
12
|
CONNECTIONS = "connections"
|
|
15
13
|
|
|
16
14
|
|
|
17
|
-
EXPORTED_FIELDS:
|
|
15
|
+
EXPORTED_FIELDS: dict[QlikAsset, tuple[str, ...]] = {
|
|
18
16
|
QlikAsset.SPACES: (
|
|
19
17
|
"id",
|
|
20
18
|
"type",
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
from ..assets import QlikAsset
|
|
4
2
|
|
|
5
3
|
APP_EXTERNAL_ID_KEY = "resourceId"
|
|
@@ -17,7 +15,7 @@ RESPONSE_DICT_EXPECTED_MSG = _RESPONSE_BASE_EXPECTED_MSG.format(type="dict")
|
|
|
17
15
|
RESPONSE_LIST_EXPECTED_MSG = _RESPONSE_BASE_EXPECTED_MSG.format(type="list")
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
ASSET_PATHS:
|
|
18
|
+
ASSET_PATHS: dict[QlikAsset, str] = {
|
|
21
19
|
QlikAsset.CONNECTIONS: "data-connections",
|
|
22
20
|
QlikAsset.SPACES: "spaces",
|
|
23
21
|
QlikAsset.USERS: "users",
|