castor-extractor 0.22.0__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/time.py +4 -0
- castor_extractor/utils/time_test.py +8 -1
- castor_extractor/visualization/looker_studio/__init__.py +6 -0
- castor_extractor/visualization/looker_studio/assets.py +6 -0
- castor_extractor/visualization/looker_studio/client/__init__.py +3 -0
- castor_extractor/visualization/looker_studio/client/admin_sdk_client.py +90 -0
- castor_extractor/visualization/looker_studio/client/client.py +37 -0
- castor_extractor/visualization/looker_studio/client/credentials.py +20 -0
- castor_extractor/visualization/looker_studio/client/endpoints.py +18 -0
- castor_extractor/visualization/looker_studio/client/enums.py +8 -0
- castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py +102 -0
- castor_extractor/visualization/looker_studio/client/pagination.py +31 -0
- castor_extractor/visualization/looker_studio/client/scopes.py +6 -0
- castor_extractor/warehouse/salesforce/format.py +12 -5
- castor_extractor/warehouse/salesforce/format_test.py +22 -6
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.1.dist-info}/METADATA +7 -1
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.1.dist-info}/RECORD +22 -11
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.1.dist-info}/LICENCE +0 -0
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.1.dist-info}/WHEEL +0 -0
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.1.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
castor_extractor/utils/time.py
CHANGED
|
@@ -63,5 +63,9 @@ def format_date(timestamp: Union[datetime, date]) -> str:
|
|
|
63
63
|
return timestamp.strftime(ISO_FORMAT)
|
|
64
64
|
|
|
65
65
|
|
|
66
|
+
def format_rfc_3339_date(timestamp: datetime) -> str:
|
|
67
|
+
return timestamp.isoformat(timespec="seconds") + "Z"
|
|
68
|
+
|
|
69
|
+
|
|
66
70
|
def yesterday() -> date:
|
|
67
71
|
return current_date() - timedelta(days=1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datetime import date, datetime
|
|
2
2
|
|
|
3
|
-
from .time import at_midnight, date_after, timestamp_ms
|
|
3
|
+
from .time import at_midnight, date_after, format_rfc_3339_date, timestamp_ms
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def test_at_midnight():
|
|
@@ -17,3 +17,10 @@ def test_timestamp_ms():
|
|
|
17
17
|
result = timestamp_ms(dt)
|
|
18
18
|
expected = 670636800000
|
|
19
19
|
assert result == expected
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_format_rfc_3339_date():
|
|
23
|
+
dt = datetime(1995, 4, 3, 2, 1)
|
|
24
|
+
result = format_rfc_3339_date(dt)
|
|
25
|
+
expected = "1995-04-03T02:01:00Z"
|
|
26
|
+
assert result == expected
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from typing import Iterator, Optional
|
|
2
|
+
|
|
3
|
+
from google.oauth2.service_account import Credentials
|
|
4
|
+
from googleapiclient import discovery # type: ignore
|
|
5
|
+
|
|
6
|
+
from ....utils import (
|
|
7
|
+
at_midnight,
|
|
8
|
+
current_date,
|
|
9
|
+
fetch_all_pages,
|
|
10
|
+
format_rfc_3339_date,
|
|
11
|
+
past_date,
|
|
12
|
+
)
|
|
13
|
+
from .credentials import LookerStudioCredentials
|
|
14
|
+
from .pagination import LookerStudioPagination
|
|
15
|
+
from .scopes import SCOPES
|
|
16
|
+
|
|
17
|
+
USER_EMAIL_FIELD = "primaryEmail"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AdminSDKClient:
|
|
21
|
+
"""
|
|
22
|
+
Client to call the Report API and Directory API.
|
|
23
|
+
The service account must impersonate and admin account.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, credentials: LookerStudioCredentials):
|
|
27
|
+
self._credentials = Credentials.from_service_account_info(
|
|
28
|
+
credentials.model_dump(),
|
|
29
|
+
scopes=SCOPES,
|
|
30
|
+
subject=credentials.admin_email, # impersonates an admin
|
|
31
|
+
)
|
|
32
|
+
self.directory_api = discovery.build(
|
|
33
|
+
"admin", "directory_v1", credentials=self._credentials
|
|
34
|
+
)
|
|
35
|
+
self.report_api = discovery.build(
|
|
36
|
+
"admin", "reports_v1", credentials=self._credentials
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def list_users(self) -> Iterator[dict]:
|
|
40
|
+
"""
|
|
41
|
+
Lists all users in the domain; only the primaryEmail field is selected.
|
|
42
|
+
Note:
|
|
43
|
+
* `my_customer` is an alias to represent the account's `customerId`
|
|
44
|
+
* `domain_public` allows non-admins to list users. This is technically
|
|
45
|
+
not necessary here because an admin account is impersonated, but it
|
|
46
|
+
avoids tapping into unnecessary data & serves for future reference.
|
|
47
|
+
See
|
|
48
|
+
https://googleapis.github.io/google-api-python-client/docs/dyn/admin_directory_v1.users.html#list
|
|
49
|
+
https://developers.google.com/admin-sdk/directory/reference/rest/v1/users/list
|
|
50
|
+
https://developers.google.com/admin-sdk/directory/v1/guides/manage-users#retrieve_users_non_admin
|
|
51
|
+
https://stackoverflow.com/a/71083443/14448410
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def _users(pagination_params: Optional[dict] = None) -> dict:
|
|
55
|
+
parameters = {
|
|
56
|
+
"viewType": "domain_public",
|
|
57
|
+
"customer": "my_customer",
|
|
58
|
+
"fields": f"users({USER_EMAIL_FIELD}), nextPageToken",
|
|
59
|
+
**(pagination_params or {}),
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return self.directory_api.users().list(**parameters).execute()
|
|
63
|
+
|
|
64
|
+
yield from fetch_all_pages(_users, LookerStudioPagination)
|
|
65
|
+
|
|
66
|
+
def list_view_events(self) -> Iterator[dict]:
|
|
67
|
+
"""
|
|
68
|
+
Lists all Data Studio View events of the past day.
|
|
69
|
+
See
|
|
70
|
+
https://googleapis.github.io/google-api-python-client/docs/dyn/admin_reports_v1.activities.html
|
|
71
|
+
https://developers.google.com/admin-sdk/reports/reference/rest/v1/activities/list
|
|
72
|
+
https://developers.google.com/admin-sdk/reports/v1/appendix/activity/data-studio#VIEW
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def _activity(pagination_params: Optional[dict] = None) -> dict:
|
|
76
|
+
yesterday = format_rfc_3339_date(at_midnight(past_date(1)))
|
|
77
|
+
today = format_rfc_3339_date(at_midnight(current_date()))
|
|
78
|
+
|
|
79
|
+
parameters = {
|
|
80
|
+
"userKey": "all",
|
|
81
|
+
"applicationName": "data_studio",
|
|
82
|
+
"eventName": "VIEW",
|
|
83
|
+
"startTime": yesterday,
|
|
84
|
+
"endTime": today,
|
|
85
|
+
**(pagination_params or {}),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return self.report_api.activities().list(**parameters).execute()
|
|
89
|
+
|
|
90
|
+
yield from fetch_all_pages(_activity, LookerStudioPagination)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Iterator
|
|
2
|
+
|
|
3
|
+
from .. import LookerStudioAsset
|
|
4
|
+
from .admin_sdk_client import USER_EMAIL_FIELD, AdminSDKClient
|
|
5
|
+
from .credentials import LookerStudioCredentials
|
|
6
|
+
from .looker_studio_api_client import LookerStudioAPIClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LookerStudioClient:
|
|
10
|
+
"""
|
|
11
|
+
Acts as a wrapper class to fetch Looker Studio assets, which requires
|
|
12
|
+
coordinating calls between the Admin SDK API and the Looker Studio API.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, credentials: LookerStudioCredentials):
|
|
16
|
+
self.admin_sdk_client = AdminSDKClient(credentials)
|
|
17
|
+
self.looker_studio_client = LookerStudioAPIClient(credentials)
|
|
18
|
+
|
|
19
|
+
def _get_assets(self) -> Iterator[dict]:
|
|
20
|
+
"""
|
|
21
|
+
Extracts reports and data sources user by user.
|
|
22
|
+
"""
|
|
23
|
+
users = self.admin_sdk_client.list_users()
|
|
24
|
+
|
|
25
|
+
for user in users:
|
|
26
|
+
email = user[USER_EMAIL_FIELD]
|
|
27
|
+
yield from self.looker_studio_client.fetch_user_assets(email)
|
|
28
|
+
|
|
29
|
+
def fetch(self, asset: LookerStudioAsset) -> Iterator[dict]:
|
|
30
|
+
if asset == LookerStudioAsset.VIEW_ACTIVITY:
|
|
31
|
+
yield from self.admin_sdk_client.list_view_events()
|
|
32
|
+
|
|
33
|
+
elif asset == LookerStudioAsset.ASSETS:
|
|
34
|
+
yield from self._get_assets()
|
|
35
|
+
|
|
36
|
+
else:
|
|
37
|
+
raise ValueError(f"The asset {asset}, is not supported")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from pydantic import BaseModel, SecretStr, field_serializer
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LookerStudioCredentials(BaseModel):
|
|
5
|
+
admin_email: str
|
|
6
|
+
auth_provider_x509_cert_url: str
|
|
7
|
+
auth_uri: str
|
|
8
|
+
client_email: str
|
|
9
|
+
client_id: str
|
|
10
|
+
client_x509_cert_url: str
|
|
11
|
+
private_key: SecretStr
|
|
12
|
+
private_key_id: str
|
|
13
|
+
project_id: str
|
|
14
|
+
token_uri: str
|
|
15
|
+
type: str
|
|
16
|
+
|
|
17
|
+
@field_serializer("private_key")
|
|
18
|
+
def dump_secret(self, pk):
|
|
19
|
+
"""When using model_dump, show private_key value"""
|
|
20
|
+
return pk.get_secret_value()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
class LookerStudioAPIEndpoint:
|
|
2
|
+
BASE_PATH = "https://datastudio.googleapis.com"
|
|
3
|
+
|
|
4
|
+
@classmethod
|
|
5
|
+
def search(cls) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Search a user's assets.
|
|
8
|
+
See https://developers.google.com/looker-studio/integrate/api/reference/assets/search
|
|
9
|
+
"""
|
|
10
|
+
return f"{cls.BASE_PATH}/v1/assets:search"
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def permissions(cls, asset_name: str) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Get the permissions of an asset. The user must be the owner of the asset.
|
|
16
|
+
See https://developers.google.com/looker-studio/integrate/api/reference/permissions/get
|
|
17
|
+
"""
|
|
18
|
+
return f"{cls.BASE_PATH}/v1/assets/{asset_name}/permissions"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from functools import partial
|
|
2
|
+
from typing import Iterator, Optional
|
|
3
|
+
|
|
4
|
+
from google.auth.transport.requests import Request
|
|
5
|
+
from google.oauth2.service_account import Credentials
|
|
6
|
+
|
|
7
|
+
from ....utils import (
|
|
8
|
+
APIClient,
|
|
9
|
+
BearerAuth,
|
|
10
|
+
fetch_all_pages,
|
|
11
|
+
)
|
|
12
|
+
from .credentials import LookerStudioCredentials
|
|
13
|
+
from .endpoints import LookerStudioAPIEndpoint
|
|
14
|
+
from .enums import LookerStudioAssetType
|
|
15
|
+
from .pagination import LookerStudioPagination
|
|
16
|
+
from .scopes import SCOPES
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LookerStudioAPIAuth(BearerAuth):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
credentials: LookerStudioCredentials,
|
|
23
|
+
subject: Optional[str] = None,
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Instantiates the service account credentials.
|
|
27
|
+
If a `subject` email is passed, the service account will impersonate
|
|
28
|
+
that user and make requests on that user's behalf.
|
|
29
|
+
"""
|
|
30
|
+
self._credentials = Credentials.from_service_account_info(
|
|
31
|
+
credentials.model_dump(), scopes=SCOPES
|
|
32
|
+
)
|
|
33
|
+
if subject:
|
|
34
|
+
self._credentials = self._credentials.with_subject(subject)
|
|
35
|
+
|
|
36
|
+
def fetch_token(self):
|
|
37
|
+
self._credentials.refresh(Request())
|
|
38
|
+
return self._credentials.token
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LookerStudioAPIClient(APIClient):
|
|
42
|
+
def __init__(self, credentials: LookerStudioCredentials):
|
|
43
|
+
auth = LookerStudioAPIAuth(credentials=credentials)
|
|
44
|
+
super().__init__(auth=auth)
|
|
45
|
+
|
|
46
|
+
self._credentials = credentials
|
|
47
|
+
|
|
48
|
+
def _is_private_asset(self, asset_name: str) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Returns True if the asset is not viewable by anyone other than the owner.
|
|
51
|
+
|
|
52
|
+
The permissions dict contains `Role: Member[]` key-value pairs and has
|
|
53
|
+
at least one key-value pair to define the asset's unique OWNER.
|
|
54
|
+
If another key is present, it means the asset was shared with
|
|
55
|
+
another person or group.
|
|
56
|
+
|
|
57
|
+
See also https://developers.google.com/looker-studio/integrate/api/reference/types#Permissions
|
|
58
|
+
"""
|
|
59
|
+
data = self._get(LookerStudioAPIEndpoint.permissions(asset_name))
|
|
60
|
+
permissions = data["permissions"]
|
|
61
|
+
return len(permissions.keys()) == 1
|
|
62
|
+
|
|
63
|
+
def _user_assets(
|
|
64
|
+
self, asset_type: LookerStudioAssetType, user_email: str
|
|
65
|
+
) -> Iterator[dict]:
|
|
66
|
+
"""
|
|
67
|
+
Yields all assets of the given type, owned by the given user and visible
|
|
68
|
+
by other members.
|
|
69
|
+
"""
|
|
70
|
+
request = partial(
|
|
71
|
+
self._get,
|
|
72
|
+
LookerStudioAPIEndpoint.search(),
|
|
73
|
+
params={"assetTypes": [asset_type.value]},
|
|
74
|
+
)
|
|
75
|
+
assets = fetch_all_pages(request, LookerStudioPagination)
|
|
76
|
+
|
|
77
|
+
for asset in assets:
|
|
78
|
+
asset_name = asset["name"]
|
|
79
|
+
owner = asset["owner"]
|
|
80
|
+
if owner == user_email and not self._is_private_asset(asset_name):
|
|
81
|
+
yield asset
|
|
82
|
+
|
|
83
|
+
def _impersonate_user(self, user_email: str):
|
|
84
|
+
self._auth = LookerStudioAPIAuth(
|
|
85
|
+
credentials=self._credentials, subject=user_email
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def fetch_user_assets(self, user_email: str) -> Iterator[dict]:
|
|
89
|
+
"""Yields assets (reports and data sources) shared by the given user."""
|
|
90
|
+
self._impersonate_user(user_email)
|
|
91
|
+
|
|
92
|
+
reports = self._user_assets(
|
|
93
|
+
asset_type=LookerStudioAssetType.REPORT,
|
|
94
|
+
user_email=user_email,
|
|
95
|
+
)
|
|
96
|
+
data_sources = self._user_assets(
|
|
97
|
+
asset_type=LookerStudioAssetType.DATA_SOURCE,
|
|
98
|
+
user_email=user_email,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
yield from reports
|
|
102
|
+
yield from data_sources
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import AliasChoices, ConfigDict, Field
|
|
4
|
+
from pydantic.alias_generators import to_camel
|
|
5
|
+
|
|
6
|
+
from ....utils import PaginationModel
|
|
7
|
+
|
|
8
|
+
NEXT_PAGE_KEY = "pageToken"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LookerStudioPagination(PaginationModel):
|
|
12
|
+
items: list = Field(
|
|
13
|
+
default_factory=list,
|
|
14
|
+
validation_alias=AliasChoices("items", "users", "assets"),
|
|
15
|
+
)
|
|
16
|
+
next_page_token: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
model_config = ConfigDict(
|
|
19
|
+
alias_generator=to_camel,
|
|
20
|
+
populate_by_name=True,
|
|
21
|
+
from_attributes=True,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def is_last(self) -> bool:
|
|
25
|
+
return self.next_page_token is None
|
|
26
|
+
|
|
27
|
+
def next_page_payload(self) -> dict:
|
|
28
|
+
return {NEXT_PAGE_KEY: self.next_page_token}
|
|
29
|
+
|
|
30
|
+
def page_results(self) -> list:
|
|
31
|
+
return self.items
|
|
@@ -4,7 +4,7 @@ from typing import Any
|
|
|
4
4
|
from ...utils import group_by
|
|
5
5
|
from .constants import SCHEMA_NAME
|
|
6
6
|
|
|
7
|
-
_HAS_DUPLICATE_KEY = "#
|
|
7
|
+
_HAS_DUPLICATE_KEY = "#has_duplicate_label"
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _clean(raw: str) -> str:
|
|
@@ -70,9 +70,15 @@ def _to_table_payload(sobject: dict) -> dict:
|
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def
|
|
73
|
+
def _remove_duplicates(sobjects: list[dict]) -> list[dict]:
|
|
74
|
+
"""only keep one object per QualifiedApiName"""
|
|
75
|
+
by_name = group_by("QualifiedApiName", sobjects)
|
|
76
|
+
return [objects[0] for _, objects in by_name.items()]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _detect_duplicate_labels(sobjects: list[dict]) -> list[dict]:
|
|
74
80
|
"""
|
|
75
|
-
enrich the given data with "
|
|
81
|
+
enrich the given data with "has_duplicate_label" flag:
|
|
76
82
|
- True when another asset has the same Label in the list
|
|
77
83
|
- False otherwise
|
|
78
84
|
"""
|
|
@@ -94,7 +100,8 @@ class SalesforceFormatter:
|
|
|
94
100
|
"""
|
|
95
101
|
formats the raw list of sobjects to tables
|
|
96
102
|
"""
|
|
97
|
-
sobjects =
|
|
103
|
+
sobjects = _remove_duplicates(sobjects)
|
|
104
|
+
sobjects = _detect_duplicate_labels(sobjects)
|
|
98
105
|
for sobject in sobjects:
|
|
99
106
|
yield _to_table_payload(sobject)
|
|
100
107
|
|
|
@@ -102,6 +109,6 @@ class SalesforceFormatter:
|
|
|
102
109
|
def columns(sobject_fields: dict[str, list[dict]]) -> Iterator[dict]:
|
|
103
110
|
"""formats the raw list of sobject fields to columns"""
|
|
104
111
|
for table_name, fields in sobject_fields.items():
|
|
105
|
-
fields =
|
|
112
|
+
fields = _detect_duplicate_labels(fields)
|
|
106
113
|
for index, field in enumerate(fields):
|
|
107
114
|
yield _to_column_payload(field, index, table_name)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from .format import (
|
|
2
2
|
_HAS_DUPLICATE_KEY,
|
|
3
3
|
SalesforceFormatter,
|
|
4
|
-
|
|
4
|
+
_detect_duplicate_labels,
|
|
5
5
|
_field_description,
|
|
6
6
|
_name,
|
|
7
|
+
_remove_duplicates,
|
|
7
8
|
)
|
|
8
9
|
|
|
9
10
|
|
|
@@ -11,9 +12,10 @@ def _tables_sobjects() -> tuple[dict[str, str], ...]:
|
|
|
11
12
|
"""Returns 4 sobjects with 2 sharing the same label"""
|
|
12
13
|
a = {"Label": "a", "QualifiedApiName": "a_one"}
|
|
13
14
|
b = {"Label": "b", "QualifiedApiName": "b"}
|
|
14
|
-
c = {"Label": "c", "QualifiedApiName": "
|
|
15
|
+
c = {"Label": "c", "QualifiedApiName": "c_unique_so_doesnt_matter"}
|
|
15
16
|
a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
|
|
16
|
-
|
|
17
|
+
b_exact_duplicate = {"Label": "b", "QualifiedApiName": "b"}
|
|
18
|
+
return a, b, c, a_prime, b_exact_duplicate
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
def _columns_sobjects() -> dict[str, list[dict]]:
|
|
@@ -79,14 +81,14 @@ def test__name():
|
|
|
79
81
|
assert _name(empty_label_sobject) == "empty_label"
|
|
80
82
|
|
|
81
83
|
|
|
82
|
-
def
|
|
84
|
+
def test__detect_duplicate_labels():
|
|
83
85
|
objects = [
|
|
84
86
|
{"Label": "Foo"},
|
|
85
87
|
{"Label": "Bar"},
|
|
86
88
|
{"Label": "Foo"},
|
|
87
89
|
]
|
|
88
90
|
|
|
89
|
-
objects =
|
|
91
|
+
objects = _detect_duplicate_labels(objects)
|
|
90
92
|
assert objects == [
|
|
91
93
|
{"Label": "Foo", _HAS_DUPLICATE_KEY: True},
|
|
92
94
|
{"Label": "Bar", _HAS_DUPLICATE_KEY: False},
|
|
@@ -94,11 +96,25 @@ def test__detect_duplicates():
|
|
|
94
96
|
]
|
|
95
97
|
|
|
96
98
|
|
|
99
|
+
def test__remove_duplicates():
|
|
100
|
+
objects = [
|
|
101
|
+
{"QualifiedApiName": "Foo"},
|
|
102
|
+
{"QualifiedApiName": "Bar"},
|
|
103
|
+
{"QualifiedApiName": "Foo"},
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
objects = _remove_duplicates(objects)
|
|
107
|
+
assert len(objects) == 2
|
|
108
|
+
names = {sobject["QualifiedApiName"] for sobject in objects}
|
|
109
|
+
assert names == {"Foo", "Bar"}
|
|
110
|
+
|
|
111
|
+
|
|
97
112
|
def test_salesforce_formatter_tables():
|
|
98
113
|
sobjects = [*_tables_sobjects()]
|
|
99
|
-
tables = SalesforceFormatter.tables(sobjects)
|
|
114
|
+
tables = [t for t in SalesforceFormatter.tables(sobjects)]
|
|
100
115
|
expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
|
|
101
116
|
payload_names = {t["table_name"] for t in tables}
|
|
117
|
+
assert len(tables) == 4 # we only keep one "b"
|
|
102
118
|
assert payload_names == expected_names
|
|
103
119
|
|
|
104
120
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.22.
|
|
3
|
+
Version: 0.22.1
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -19,6 +19,7 @@ Provides-Extra: bigquery
|
|
|
19
19
|
Provides-Extra: databricks
|
|
20
20
|
Provides-Extra: dbt
|
|
21
21
|
Provides-Extra: looker
|
|
22
|
+
Provides-Extra: lookerstudio
|
|
22
23
|
Provides-Extra: metabase
|
|
23
24
|
Provides-Extra: mysql
|
|
24
25
|
Provides-Extra: postgres
|
|
@@ -31,6 +32,7 @@ Provides-Extra: tableau
|
|
|
31
32
|
Requires-Dist: cryptography (>=43.0.0,<44.0.0) ; extra == "snowflake"
|
|
32
33
|
Requires-Dist: databricks-sql-connector (>=3.2.0,<4.0.0) ; extra == "databricks" or extra == "all"
|
|
33
34
|
Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
35
|
+
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
34
36
|
Requires-Dist: google-auth (>=2,<3)
|
|
35
37
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
36
38
|
Requires-Dist: google-cloud-storage (>=2,<3)
|
|
@@ -205,6 +207,10 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
205
207
|
|
|
206
208
|
# Changelog
|
|
207
209
|
|
|
210
|
+
## 0.22.1 - 2024-12-05
|
|
211
|
+
|
|
212
|
+
* Salesforce: deduplicate tables
|
|
213
|
+
|
|
208
214
|
## 0.22.0 - 2024-12-04
|
|
209
215
|
|
|
210
216
|
* Stop supporting python3.8
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=p1jUz1AWTVMfmt6dwNvWxUSloLrkhHoWRxpT2RU1Hcc,15058
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -76,7 +76,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
76
76
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
77
77
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
78
78
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
79
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
79
|
+
castor_extractor/utils/__init__.py,sha256=X7WOOgrpGf7Vh8r-7eNGjuC0rKs0g9GTO3d7hZ18gwo,1550
|
|
80
80
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
81
81
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
82
82
|
castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
|
|
@@ -135,8 +135,8 @@ castor_extractor/utils/salesforce/pagination.py,sha256=wJq0rKLdacFRggyHwB6Fh3K6i
|
|
|
135
135
|
castor_extractor/utils/store.py,sha256=hnyrFwCsL48e9QrsBns-n8FospujZrkUy1P2YHAh_C0,2067
|
|
136
136
|
castor_extractor/utils/string.py,sha256=IQqNum7CJwuSvDGPbTAmz46YwtYDYgJKeXY7iixdjI4,2370
|
|
137
137
|
castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_N_-Kxwno,2501
|
|
138
|
-
castor_extractor/utils/time.py,sha256=
|
|
139
|
-
castor_extractor/utils/time_test.py,sha256=
|
|
138
|
+
castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
|
|
139
|
+
castor_extractor/utils/time_test.py,sha256=pH8DSosNlwDYZXZNNjYDcL0WbmZc_c212LEEn88Oqew,647
|
|
140
140
|
castor_extractor/utils/type.py,sha256=Sd8JlEgbGkBUZnRqCUDtREeBkOMTXtlNMyCph90_J0Q,328
|
|
141
141
|
castor_extractor/utils/validation.py,sha256=kQAFtqt3gfy7YqYQ0u-60vyNYUF_96he5QDVUQnZmDo,1896
|
|
142
142
|
castor_extractor/utils/validation_test.py,sha256=aSetitOCkH_K-Wto9ISOVGso5jGfTUOBLm3AZnvavO8,1181
|
|
@@ -168,6 +168,17 @@ castor_extractor/visualization/looker/extract.py,sha256=O_hzRftww3Cw1cgijL-K-8gh
|
|
|
168
168
|
castor_extractor/visualization/looker/fields.py,sha256=7oC7p-3Wp7XHBP_FT_D1wH3kINFRnc_qGVeH1a4UNZY,623
|
|
169
169
|
castor_extractor/visualization/looker/fields_test.py,sha256=7Cwq8Qky6aTZg8nCHp1gmPJtd9pGNB4QeMIRRWdHo5w,782
|
|
170
170
|
castor_extractor/visualization/looker/multithreading.py,sha256=Muuh3usBLqtv3sfHoyPYJ6jJ7V5ajR6N9ZJ_F-bNc60,2608
|
|
171
|
+
castor_extractor/visualization/looker_studio/__init__.py,sha256=p3mTWz7Yk1_m9vYohxCqwxnuE7SUYbU--TH2ezhf734,142
|
|
172
|
+
castor_extractor/visualization/looker_studio/assets.py,sha256=_ir4L2RTmGDb1WetAm6-EZ6W4tPXxi0kNppNBlmy9QE,135
|
|
173
|
+
castor_extractor/visualization/looker_studio/client/__init__.py,sha256=YkQaVDJa-7KSwdOLjtgKJMRiafbGNKC_46YVx0hYZ1Q,129
|
|
174
|
+
castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=hYKdU6TlWKkXx07r6HsZ4Wbxhasx8DP_jO6iDCjHjgk,3508
|
|
175
|
+
castor_extractor/visualization/looker_studio/client/client.py,sha256=AYdR46NOdn_ITK_wPAASROW0gJjx-iA0Gi43QeuU5BU,1302
|
|
176
|
+
castor_extractor/visualization/looker_studio/client/credentials.py,sha256=yzTaiJQ5cArTnbybUPF6fZZXbX9XQ0SBq-jVI2ECovA,521
|
|
177
|
+
castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqNDdlDBOOpiF7LpjyHMrzeClJktidCr1pTDUs,669
|
|
178
|
+
castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
|
|
179
|
+
castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Oqu_bGBEqYRR_aitBFyvfCZnx0kSZf4qGEI16tIRnhw,3482
|
|
180
|
+
castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
|
|
181
|
+
castor_extractor/visualization/looker_studio/client/scopes.py,sha256=824cqqgZuGq4L-rPNoHJe0ibXsxkRwB0CLG_kqw9Q0g,256
|
|
171
182
|
castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
|
|
172
183
|
castor_extractor/visualization/metabase/assets.py,sha256=nu3FwQBU_hdS2DBvgXAwQlEEi76QiNK2tMKEtMyctaY,2874
|
|
173
184
|
castor_extractor/visualization/metabase/client/__init__.py,sha256=KBvaPMofBRV3m_sZAnKNCrJGr-Z88EbpdzEzWPQ_uBk,99
|
|
@@ -390,8 +401,8 @@ castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29dee
|
|
|
390
401
|
castor_extractor/warehouse/salesforce/client.py,sha256=067ZyccmIYoY6VwLTSneefOJqUpobtnoEzxJMY2oSPs,3268
|
|
391
402
|
castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
|
|
392
403
|
castor_extractor/warehouse/salesforce/extract.py,sha256=BUQ1ZxGGSq9wWCJfRbKIzIBBeth_YXg8YSV72lbz2lc,3417
|
|
393
|
-
castor_extractor/warehouse/salesforce/format.py,sha256=
|
|
394
|
-
castor_extractor/warehouse/salesforce/format_test.py,sha256=
|
|
404
|
+
castor_extractor/warehouse/salesforce/format.py,sha256=M5uGA8aURL_Nt27T8R2tDfbU5ZUM3ECG4fGalEkWkYA,3688
|
|
405
|
+
castor_extractor/warehouse/salesforce/format_test.py,sha256=puTL-Co84jE2SQzKFKGLYU9rey4Ja_Ox8xiKy4iOjeo,3780
|
|
395
406
|
castor_extractor/warehouse/salesforce/pagination.py,sha256=m1S9JRNf6Oe-6dDghYUY5wwTzGzKW5H9pE60PCXMha0,920
|
|
396
407
|
castor_extractor/warehouse/salesforce/soql.py,sha256=XB8ohKwHFfC4Xger7Y84DXLW17IJDye_bZ3FL6DCcOI,1188
|
|
397
408
|
castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
|
|
@@ -425,8 +436,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
425
436
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
426
437
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
427
438
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
428
|
-
castor_extractor-0.22.
|
|
429
|
-
castor_extractor-0.22.
|
|
430
|
-
castor_extractor-0.22.
|
|
431
|
-
castor_extractor-0.22.
|
|
432
|
-
castor_extractor-0.22.
|
|
439
|
+
castor_extractor-0.22.1.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
440
|
+
castor_extractor-0.22.1.dist-info/METADATA,sha256=52H1eJe_L62yUSWkBJYLbRanXS6OdauukGW0RfeNiS4,22075
|
|
441
|
+
castor_extractor-0.22.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
442
|
+
castor_extractor-0.22.1.dist-info/entry_points.txt,sha256=7aVSxc-_2dicp28Ow-S4y0p4wGoTm9zGmVptMvfLdw8,1649
|
|
443
|
+
castor_extractor-0.22.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|