castor-extractor 0.21.7__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- castor_extractor/commands/__init__.py +0 -3
- castor_extractor/commands/file_check.py +1 -2
- castor_extractor/file_checker/column.py +5 -5
- castor_extractor/file_checker/file.py +7 -7
- castor_extractor/file_checker/file_test.py +2 -2
- castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
- castor_extractor/knowledge/confluence/client/client.py +2 -1
- castor_extractor/knowledge/confluence/extract.py +3 -2
- castor_extractor/knowledge/notion/client/client.py +3 -2
- castor_extractor/knowledge/notion/extract.py +3 -2
- castor_extractor/quality/soda/client/client.py +2 -1
- castor_extractor/quality/soda/client/pagination.py +1 -3
- castor_extractor/types.py +3 -3
- castor_extractor/uploader/env.py +2 -2
- castor_extractor/uploader/upload.py +4 -3
- castor_extractor/uploader/utils.py +1 -1
- castor_extractor/utils/client/abstract.py +2 -1
- castor_extractor/utils/client/api/auth.py +2 -2
- castor_extractor/utils/client/api/auth_test.py +2 -2
- castor_extractor/utils/client/api/client.py +8 -3
- castor_extractor/utils/client/api/pagination.py +3 -2
- castor_extractor/utils/client/api/safe_request.py +5 -5
- castor_extractor/utils/collection.py +7 -11
- castor_extractor/utils/dbt/client.py +3 -3
- castor_extractor/utils/dbt/client_test.py +2 -2
- castor_extractor/utils/deprecate.py +1 -2
- castor_extractor/utils/files.py +5 -5
- castor_extractor/utils/formatter.py +5 -4
- castor_extractor/utils/json_stream_write.py +2 -1
- castor_extractor/utils/object.py +2 -1
- castor_extractor/utils/pager/pager.py +2 -4
- castor_extractor/utils/pager/pager_on_id.py +2 -1
- castor_extractor/utils/pager/pager_on_id_test.py +5 -5
- castor_extractor/utils/pager/pager_test.py +3 -3
- castor_extractor/utils/retry.py +4 -3
- castor_extractor/utils/retry_test.py +2 -3
- castor_extractor/utils/safe.py +3 -3
- castor_extractor/utils/salesforce/client.py +2 -1
- castor_extractor/utils/salesforce/credentials.py +1 -3
- castor_extractor/utils/store.py +2 -1
- castor_extractor/utils/string.py +2 -2
- castor_extractor/utils/string_test.py +1 -3
- castor_extractor/utils/type.py +3 -2
- castor_extractor/utils/validation.py +4 -4
- castor_extractor/utils/write.py +2 -2
- castor_extractor/visualization/domo/client/client.py +8 -7
- castor_extractor/visualization/domo/client/credentials.py +2 -2
- castor_extractor/visualization/domo/client/endpoints.py +2 -2
- castor_extractor/visualization/domo/extract.py +3 -2
- castor_extractor/visualization/looker/api/client.py +17 -16
- castor_extractor/visualization/looker/api/utils.py +2 -2
- castor_extractor/visualization/looker/assets.py +1 -3
- castor_extractor/visualization/looker/extract.py +4 -3
- castor_extractor/visualization/looker/fields.py +3 -3
- castor_extractor/visualization/looker/multithreading.py +3 -3
- castor_extractor/visualization/metabase/assets.py +1 -3
- castor_extractor/visualization/metabase/client/api/client.py +8 -7
- castor_extractor/visualization/metabase/extract.py +3 -2
- castor_extractor/visualization/metabase/types.py +1 -3
- castor_extractor/visualization/mode/client/client.py +6 -6
- castor_extractor/visualization/mode/extract.py +2 -2
- castor_extractor/visualization/powerbi/assets.py +1 -3
- castor_extractor/visualization/powerbi/client/client.py +12 -11
- castor_extractor/visualization/powerbi/client/credentials.py +3 -3
- castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +3 -2
- castor_extractor/visualization/qlik/assets.py +1 -3
- castor_extractor/visualization/qlik/client/constants.py +1 -3
- castor_extractor/visualization/qlik/client/engine/error.py +1 -3
- castor_extractor/visualization/qlik/client/master.py +3 -3
- castor_extractor/visualization/qlik/client/rest.py +12 -12
- castor_extractor/visualization/qlik/extract.py +4 -3
- castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
- castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
- castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
- castor_extractor/visualization/sigma/client/client.py +11 -8
- castor_extractor/visualization/sigma/client/credentials.py +1 -3
- castor_extractor/visualization/sigma/client/pagination.py +1 -1
- castor_extractor/visualization/sigma/extract.py +3 -2
- castor_extractor/visualization/tableau/assets.py +1 -2
- castor_extractor/visualization/tableau/client/client.py +1 -2
- castor_extractor/visualization/tableau/client/client_utils.py +3 -2
- castor_extractor/visualization/tableau/client/credentials.py +3 -3
- castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
- castor_extractor/visualization/tableau/extract.py +2 -2
- castor_extractor/visualization/tableau/gql_fields.py +3 -3
- castor_extractor/visualization/tableau/tsc_fields.py +1 -2
- castor_extractor/visualization/tableau/types.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client.py +6 -1
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +56 -9
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/errors.py +5 -0
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
- castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
- castor_extractor/visualization/tableau_revamp/extract.py +2 -2
- castor_extractor/visualization/thoughtspot/client/client.py +3 -2
- castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
- castor_extractor/visualization/thoughtspot/extract.py +3 -2
- castor_extractor/warehouse/abstract/asset.py +4 -5
- castor_extractor/warehouse/abstract/extract.py +4 -3
- castor_extractor/warehouse/abstract/query.py +4 -4
- castor_extractor/warehouse/bigquery/client.py +8 -8
- castor_extractor/warehouse/bigquery/extract.py +1 -1
- castor_extractor/warehouse/bigquery/query.py +2 -2
- castor_extractor/warehouse/bigquery/types.py +2 -4
- castor_extractor/warehouse/databricks/api_client.py +15 -14
- castor_extractor/warehouse/databricks/client.py +16 -16
- castor_extractor/warehouse/databricks/extract.py +4 -4
- castor_extractor/warehouse/databricks/format.py +12 -12
- castor_extractor/warehouse/databricks/lineage.py +11 -11
- castor_extractor/warehouse/databricks/pagination.py +2 -2
- castor_extractor/warehouse/databricks/types.py +4 -4
- castor_extractor/warehouse/databricks/utils.py +5 -4
- castor_extractor/warehouse/mysql/query.py +2 -2
- castor_extractor/warehouse/postgres/query.py +2 -2
- castor_extractor/warehouse/redshift/client.py +1 -1
- castor_extractor/warehouse/redshift/query.py +2 -2
- castor_extractor/warehouse/salesforce/client.py +8 -8
- castor_extractor/warehouse/salesforce/extract.py +3 -4
- castor_extractor/warehouse/salesforce/format.py +8 -7
- castor_extractor/warehouse/salesforce/format_test.py +2 -4
- castor_extractor/warehouse/snowflake/query.py +5 -5
- castor_extractor/warehouse/sqlserver/client.py +1 -1
- castor_extractor/warehouse/sqlserver/query.py +2 -2
- {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/METADATA +11 -6
- {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/RECORD +131 -131
- {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/LICENCE +0 -0
- {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/WHEEL +0 -0
- {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/entry_points.txt +0 -0
castor_extractor/utils/object.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
+
from collections.abc import Iterator, Sequence
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from itertools import chain
|
|
4
5
|
from typing import (
|
|
5
6
|
Callable,
|
|
6
7
|
Generic,
|
|
7
|
-
Iterator,
|
|
8
|
-
List,
|
|
9
8
|
Optional,
|
|
10
|
-
Sequence,
|
|
11
9
|
TypeVar,
|
|
12
10
|
)
|
|
13
11
|
|
|
@@ -32,7 +30,7 @@ class PagerStopStrategy(Enum):
|
|
|
32
30
|
|
|
33
31
|
|
|
34
32
|
class AbstractPager(Generic[T]):
|
|
35
|
-
def all(self, per_page: int = DEFAULT_PER_PAGE) ->
|
|
33
|
+
def all(self, per_page: int = DEFAULT_PER_PAGE) -> list[T]:
|
|
36
34
|
"""Returns all data provided by the callback as a list"""
|
|
37
35
|
return list(chain.from_iterable(self.iterator(per_page=per_page)))
|
|
38
36
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterator, Sequence
|
|
2
|
+
from typing import Callable, Optional, Protocol, TypeVar
|
|
2
3
|
from uuid import UUID
|
|
3
4
|
|
|
4
5
|
from .pager import DEFAULT_PER_PAGE, AbstractPager, PagerStopStrategy
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Callable
|
|
1
|
+
from typing import Callable
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
|
|
4
4
|
from .pager_on_id import PagerOnId
|
|
@@ -9,11 +9,11 @@ ITEMS_WITH_IDS = [
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _make_callback_with_ids(
|
|
12
|
-
elements:
|
|
13
|
-
) -> Callable[[UUID, int],
|
|
14
|
-
def _callback(max_id: UUID, per: int) ->
|
|
12
|
+
elements: list[dict[str, str]],
|
|
13
|
+
) -> Callable[[UUID, int], list[dict[str, str]]]:
|
|
14
|
+
def _callback(max_id: UUID, per: int) -> list[dict[str, str]]:
|
|
15
15
|
"""assumes the elements are sorted by id"""
|
|
16
|
-
to_return:
|
|
16
|
+
to_return: list[dict[str, str]] = []
|
|
17
17
|
for element in elements:
|
|
18
18
|
if element["id"] > str(max_id):
|
|
19
19
|
to_return.append(element)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from typing import Callable
|
|
1
|
+
from typing import Callable
|
|
2
2
|
|
|
3
3
|
from .pager import Pager, PagerLogger
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def _make_callback(elements:
|
|
7
|
-
def _callback(page: int, per: int) ->
|
|
6
|
+
def _make_callback(elements: list[int]) -> Callable[[int, int], list[int]]:
|
|
7
|
+
def _callback(page: int, per: int) -> list[int]:
|
|
8
8
|
_start = (page - 1) * per
|
|
9
9
|
_end = _start + per
|
|
10
10
|
return elements[_start:_end]
|
castor_extractor/utils/retry.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import random
|
|
3
3
|
import time
|
|
4
|
+
from collections.abc import Sequence
|
|
4
5
|
from enum import Enum
|
|
5
|
-
from typing import Any, Callable,
|
|
6
|
+
from typing import Any, Callable, Union
|
|
6
7
|
|
|
7
8
|
from pydantic import BaseModel, PositiveInt, PrivateAttr
|
|
8
9
|
from pydantic.fields import Field
|
|
@@ -75,11 +76,11 @@ class Retry(BaseModel):
|
|
|
75
76
|
return True
|
|
76
77
|
|
|
77
78
|
|
|
78
|
-
WrapperReturnType = Union[
|
|
79
|
+
WrapperReturnType = Union[tuple[BaseException, None], tuple[None, Any]]
|
|
79
80
|
|
|
80
81
|
|
|
81
82
|
def retry(
|
|
82
|
-
exceptions: Sequence[
|
|
83
|
+
exceptions: Sequence[type[BaseException]],
|
|
83
84
|
max_retries: int = 1,
|
|
84
85
|
base_ms: int = 0,
|
|
85
86
|
jitter_ms: int = 1,
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from http import HTTPStatus
|
|
2
2
|
from statistics import variance
|
|
3
3
|
from time import time
|
|
4
|
-
from typing import List
|
|
5
4
|
from unittest.mock import patch
|
|
6
5
|
|
|
7
6
|
import pytest
|
|
@@ -43,8 +42,8 @@ def test_retry_strategy__jitter():
|
|
|
43
42
|
assert variance(jitters) > 0
|
|
44
43
|
|
|
45
44
|
|
|
46
|
-
def _iterate_base(retry: Retry, count: int) ->
|
|
47
|
-
bases:
|
|
45
|
+
def _iterate_base(retry: Retry, count: int) -> list[int]:
|
|
46
|
+
bases: list[int] = []
|
|
48
47
|
for _ in range(count):
|
|
49
48
|
retry._retry_attempts += 1
|
|
50
49
|
bases.append(retry.base())
|
castor_extractor/utils/safe.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Callable,
|
|
2
|
+
from typing import Callable, Optional, Union
|
|
3
3
|
|
|
4
4
|
logger = logging.getLogger(__name__)
|
|
5
5
|
|
|
@@ -16,12 +16,12 @@ class SafeMode:
|
|
|
16
16
|
|
|
17
17
|
def __init__(
|
|
18
18
|
self,
|
|
19
|
-
exceptions:
|
|
19
|
+
exceptions: tuple[type[BaseException], ...],
|
|
20
20
|
max_errors: Union[int, float],
|
|
21
21
|
):
|
|
22
22
|
self.exceptions = exceptions
|
|
23
23
|
self.max_errors = max_errors
|
|
24
|
-
self.errors_caught:
|
|
24
|
+
self.errors_caught: list[type[BaseException]] = []
|
|
25
25
|
|
|
26
26
|
@property
|
|
27
27
|
def should_raise(self) -> bool:
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
from pydantic import Field
|
|
4
2
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
3
|
|
|
@@ -29,7 +27,7 @@ class SalesforceCredentials(BaseSettings):
|
|
|
29
27
|
"""Generates the password for authentication"""
|
|
30
28
|
return self.password + self.security_token
|
|
31
29
|
|
|
32
|
-
def token_request_payload(self) ->
|
|
30
|
+
def token_request_payload(self) -> dict[str, str]:
|
|
33
31
|
"""
|
|
34
32
|
Params to post to the API in order to retrieve the authentication token
|
|
35
33
|
"""
|
castor_extractor/utils/store.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
+
from collections.abc import Iterable, Iterator
|
|
3
4
|
from io import StringIO
|
|
4
|
-
from typing import
|
|
5
|
+
from typing import Optional
|
|
5
6
|
|
|
6
7
|
from .formatter import CsvFormatter, Formatter
|
|
7
8
|
from .time import current_timestamp
|
castor_extractor/utils/string.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import re
|
|
3
3
|
import string
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import TypeVar, Union
|
|
5
5
|
|
|
6
6
|
_ALPHANUMERIC_REGEX = r"^\w+$"
|
|
7
7
|
_SYMBOLS = "[]{}()"
|
|
@@ -31,7 +31,7 @@ def _clean(input_: str) -> str:
|
|
|
31
31
|
return cleaned
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def string_to_tuple(input_: str) ->
|
|
34
|
+
def string_to_tuple(input_: str) -> tuple[str, ...]:
|
|
35
35
|
"""
|
|
36
36
|
Parse the given string and returns the corresponding Tuple of strings
|
|
37
37
|
|
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
from typing import Tuple
|
|
2
|
-
|
|
3
1
|
import pytest
|
|
4
2
|
|
|
5
3
|
from .string import decode_when_bytes, string_to_tuple
|
|
6
4
|
|
|
7
5
|
|
|
8
|
-
def _test(symbols: str, input_: str) ->
|
|
6
|
+
def _test(symbols: str, input_: str) -> tuple[str, ...]:
|
|
9
7
|
return string_to_tuple(symbols[0] + input_ + symbols[1])
|
|
10
8
|
|
|
11
9
|
|
castor_extractor/utils/type.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
|
2
|
+
from typing import Any, Callable, Union
|
|
2
3
|
|
|
3
|
-
SerializedAsset =
|
|
4
|
+
SerializedAsset = list[dict]
|
|
4
5
|
|
|
5
6
|
# https://stackoverflow.com/questions/51291722/define-a-jsonable-type-using-mypy-pep-526
|
|
6
7
|
JsonType = Union[Sequence, Mapping, str, int, float, bool]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
from urllib.parse import urlsplit, urlunsplit
|
|
3
3
|
|
|
4
4
|
BASE_URL_SCHEME = "https"
|
|
@@ -18,13 +18,13 @@ def _clean_path(path: str) -> str:
|
|
|
18
18
|
return path.rstrip("/")
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
def _get_hostname_port(netloc: str) ->
|
|
21
|
+
def _get_hostname_port(netloc: str) -> tuple[str, str]:
|
|
22
22
|
hostname, *rest = netloc.split(":")
|
|
23
23
|
port = ":".join(rest)
|
|
24
24
|
return hostname, port
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def _urlsplit(base_url: str) ->
|
|
27
|
+
def _urlsplit(base_url: str) -> tuple[str, str, str, str, str, str]:
|
|
28
28
|
"""
|
|
29
29
|
Returns URL split parts
|
|
30
30
|
|
|
@@ -51,7 +51,7 @@ def _urlsplit(base_url: str) -> Tuple[str, str, str, str, str, str]:
|
|
|
51
51
|
|
|
52
52
|
def _expect(
|
|
53
53
|
attr: str,
|
|
54
|
-
expected: Optional[
|
|
54
|
+
expected: Optional[list[str]],
|
|
55
55
|
actual: Optional[str],
|
|
56
56
|
) -> None:
|
|
57
57
|
if not expected and not actual:
|
castor_extractor/utils/write.py
CHANGED
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import sys
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import pkg_resources
|
|
9
9
|
|
|
@@ -70,7 +70,7 @@ def get_summary_payload(client_info: dict, dt: datetime) -> dict:
|
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def write_errors_logs(output_directory: str, ts: int, errors:
|
|
73
|
+
def write_errors_logs(output_directory: str, ts: int, errors: list[str]):
|
|
74
74
|
"""
|
|
75
75
|
write a json file logs from code execution
|
|
76
76
|
"""
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from datetime import datetime, timedelta
|
|
3
4
|
from http import HTTPStatus
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Optional
|
|
5
6
|
|
|
6
7
|
import requests
|
|
7
8
|
|
|
@@ -137,14 +138,14 @@ class DomoClient:
|
|
|
137
138
|
endpoint: Endpoint,
|
|
138
139
|
params: Optional[dict] = None,
|
|
139
140
|
asset_id: Optional[str] = None,
|
|
140
|
-
) ->
|
|
141
|
+
) -> list[dict]:
|
|
141
142
|
"""Used when the response contains multiple elements"""
|
|
142
143
|
return self._get(endpoint, params, asset_id)
|
|
143
144
|
|
|
144
|
-
def _get_paginated(self, endpoint: Endpoint) ->
|
|
145
|
+
def _get_paginated(self, endpoint: Endpoint) -> list[dict]:
|
|
145
146
|
"""Used when the response is paginated and need iterations"""
|
|
146
147
|
pagination = Pagination()
|
|
147
|
-
all_results:
|
|
148
|
+
all_results: list[dict] = []
|
|
148
149
|
|
|
149
150
|
while pagination.needs_increment:
|
|
150
151
|
params = {**pagination.params, **endpoint.params}
|
|
@@ -155,7 +156,7 @@ class DomoClient:
|
|
|
155
156
|
|
|
156
157
|
return all_results
|
|
157
158
|
|
|
158
|
-
def _datasources(self, card_ids:
|
|
159
|
+
def _datasources(self, card_ids: list[int]) -> RawData:
|
|
159
160
|
"""Yields all distinct datasources associated to the given cards"""
|
|
160
161
|
if not card_ids:
|
|
161
162
|
return empty_iterator()
|
|
@@ -178,7 +179,7 @@ class DomoClient:
|
|
|
178
179
|
|
|
179
180
|
def _process_pages(
|
|
180
181
|
self,
|
|
181
|
-
page_tree:
|
|
182
|
+
page_tree: list[dict],
|
|
182
183
|
parent_path: str = _PARENT_FOLDER,
|
|
183
184
|
) -> Iterator[dict]:
|
|
184
185
|
"""Recursively fetch pages while building the folder architecture"""
|
|
@@ -252,7 +253,7 @@ class DomoClient:
|
|
|
252
253
|
}
|
|
253
254
|
|
|
254
255
|
def _unique_datasets(self) -> RawData:
|
|
255
|
-
ids_encountered:
|
|
256
|
+
ids_encountered: set[str] = set()
|
|
256
257
|
for dataset in self._datasets():
|
|
257
258
|
dataset_id = dataset.get("id")
|
|
258
259
|
if not dataset_id or dataset_id in ids_encountered:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import Field
|
|
4
4
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
@@ -27,5 +27,5 @@ class DomoCredentials(BaseSettings):
|
|
|
27
27
|
return HTTPBasicAuth(self.client_id, self.api_token)
|
|
28
28
|
|
|
29
29
|
@property
|
|
30
|
-
def private_headers(self) ->
|
|
30
|
+
def private_headers(self) -> dict[str, str]:
|
|
31
31
|
return {"X-DOMO-Developer-Token": self.developer_token}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
_AUTH_URL = (
|
|
5
5
|
"grant_type=client_credentials&scope=data%20dashboard%20audit%20user"
|
|
@@ -79,7 +79,7 @@ class EndpointFactory:
|
|
|
79
79
|
is_private=True,
|
|
80
80
|
)
|
|
81
81
|
|
|
82
|
-
def cards_metadata(self, card_ids:
|
|
82
|
+
def cards_metadata(self, card_ids: list[int]) -> Endpoint:
|
|
83
83
|
urns = ",".join(map(str, card_ids))
|
|
84
84
|
url = f"{self.base_url}/api/content/v1/cards?urns={urns}&parts=datasources"
|
|
85
85
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: DomoClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[DomoAsset, Union[list, Iterator, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Domo"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting PAGES from API")
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import logging
|
|
3
|
+
from collections.abc import Iterator, Sequence
|
|
3
4
|
from datetime import date, timedelta
|
|
4
|
-
from typing import Callable,
|
|
5
|
+
from typing import Callable, Optional
|
|
5
6
|
|
|
6
7
|
from dateutil.utils import today
|
|
7
8
|
from looker_sdk import init40
|
|
@@ -98,7 +99,7 @@ class ApiClient:
|
|
|
98
99
|
self.per_page = page_size
|
|
99
100
|
self._safe_mode = safe_mode
|
|
100
101
|
|
|
101
|
-
def folders(self) ->
|
|
102
|
+
def folders(self) -> list[Folder]:
|
|
102
103
|
"""Lists folders of the given Looker account"""
|
|
103
104
|
|
|
104
105
|
def _search(page: int, per_page: int) -> Sequence[Folder]:
|
|
@@ -110,7 +111,7 @@ class ApiClient:
|
|
|
110
111
|
|
|
111
112
|
return Pager(_search, logger=self._logger).all(per_page=self.per_page)
|
|
112
113
|
|
|
113
|
-
def dashboards(self, folder_id: Optional[str] = None) ->
|
|
114
|
+
def dashboards(self, folder_id: Optional[str] = None) -> list[Dashboard]:
|
|
114
115
|
"""
|
|
115
116
|
Lists dashboards of the given Looker account using pagination.
|
|
116
117
|
The optional folder_id allows restricting the search to the given folder.
|
|
@@ -132,7 +133,7 @@ class ApiClient:
|
|
|
132
133
|
|
|
133
134
|
return Pager(_search, logger=self._logger).all(per_page=self.per_page)
|
|
134
135
|
|
|
135
|
-
def looks(self, folder_id: Optional[str] = None) ->
|
|
136
|
+
def looks(self, folder_id: Optional[str] = None) -> list[Look]:
|
|
136
137
|
"""
|
|
137
138
|
Fetch looks via `search_looks` using pagination. The optional folder_id
|
|
138
139
|
allows restricting the search to the given folder.
|
|
@@ -155,7 +156,7 @@ class ApiClient:
|
|
|
155
156
|
|
|
156
157
|
return Pager(_search, logger=self._logger).all(per_page=self.per_page)
|
|
157
158
|
|
|
158
|
-
def _all_looks(self) ->
|
|
159
|
+
def _all_looks(self) -> list[Look]:
|
|
159
160
|
"""
|
|
160
161
|
fetch looks via `all_looks`
|
|
161
162
|
https://castor.cloud.looker.com/extensions/marketplace_extension_api_explorer::api-explorer/4.0/methods/Look/all_looks
|
|
@@ -163,7 +164,7 @@ class ApiClient:
|
|
|
163
164
|
# No pagination : see https://community.looker.com/looker-api-77/api-paging-limits-14598
|
|
164
165
|
return list(self._sdk.all_looks(fields=format_fields(LOOK_FIELDS)))
|
|
165
166
|
|
|
166
|
-
def users(self) ->
|
|
167
|
+
def users(self) -> list[User]:
|
|
167
168
|
"""Lists users of the given Looker account"""
|
|
168
169
|
|
|
169
170
|
def _search(page: int, per_page: int) -> Sequence[User]:
|
|
@@ -179,7 +180,7 @@ class ApiClient:
|
|
|
179
180
|
|
|
180
181
|
return Pager(_search, logger=self._logger).all(per_page=self.per_page)
|
|
181
182
|
|
|
182
|
-
def lookml_models(self) ->
|
|
183
|
+
def lookml_models(self) -> list[LookmlModel]:
|
|
183
184
|
"""Iterates LookML models of the given Looker account"""
|
|
184
185
|
|
|
185
186
|
models = self._sdk.all_lookml_models(
|
|
@@ -197,7 +198,7 @@ class ApiClient:
|
|
|
197
198
|
|
|
198
199
|
def explores(
|
|
199
200
|
self,
|
|
200
|
-
explore_names=Iterator[
|
|
201
|
+
explore_names=Iterator[tuple[str, str]],
|
|
201
202
|
) -> Iterator[LookmlModelExplore]:
|
|
202
203
|
"""Iterates explores of the given Looker account for the provided model/explore names"""
|
|
203
204
|
|
|
@@ -214,7 +215,7 @@ class ApiClient:
|
|
|
214
215
|
if explore_ is not None:
|
|
215
216
|
yield explore_
|
|
216
217
|
|
|
217
|
-
def connections(self) ->
|
|
218
|
+
def connections(self) -> list[DBConnection]:
|
|
218
219
|
"""Lists databases connections of the given Looker account"""
|
|
219
220
|
|
|
220
221
|
connections = self._sdk.all_connections(
|
|
@@ -226,7 +227,7 @@ class ApiClient:
|
|
|
226
227
|
|
|
227
228
|
return list(connections)
|
|
228
229
|
|
|
229
|
-
def projects(self) ->
|
|
230
|
+
def projects(self) -> list[Project]:
|
|
230
231
|
"""Lists projects of the given Looker account"""
|
|
231
232
|
|
|
232
233
|
projects = self._sdk.all_projects(fields=format_fields(PROJECT_FIELDS))
|
|
@@ -236,7 +237,7 @@ class ApiClient:
|
|
|
236
237
|
|
|
237
238
|
return list(projects)
|
|
238
239
|
|
|
239
|
-
def groups_hierarchy(self) ->
|
|
240
|
+
def groups_hierarchy(self) -> list[GroupHierarchy]:
|
|
240
241
|
"""Lists groups with hierarchy of the given Looker account"""
|
|
241
242
|
groups_hierarchy = self._sdk.search_groups_with_hierarchy(
|
|
242
243
|
fields=format_fields(GROUPS_HIERARCHY_FIELDS),
|
|
@@ -244,7 +245,7 @@ class ApiClient:
|
|
|
244
245
|
logger.info("All looker groups_hierarchy fetched")
|
|
245
246
|
return list(groups_hierarchy)
|
|
246
247
|
|
|
247
|
-
def groups_roles(self) ->
|
|
248
|
+
def groups_roles(self) -> list[GroupSearch]:
|
|
248
249
|
"""Lists groups with roles of the given Looker account"""
|
|
249
250
|
groups_roles = self._sdk.search_groups_with_roles(
|
|
250
251
|
fields=format_fields(GROUPS_ROLES_FIELDS),
|
|
@@ -252,12 +253,12 @@ class ApiClient:
|
|
|
252
253
|
logger.info("All looker groups_roles fetched")
|
|
253
254
|
return list(groups_roles)
|
|
254
255
|
|
|
255
|
-
def content_views(self) ->
|
|
256
|
+
def content_views(self) -> list[ContentView]:
|
|
256
257
|
"""
|
|
257
258
|
List the number of views per {user x week x dashboard|look}
|
|
258
259
|
https://cloud.google.com/looker/docs/reference/looker-api/latest/types/ContentView
|
|
259
260
|
"""
|
|
260
|
-
content_views:
|
|
261
|
+
content_views: list[ContentView] = []
|
|
261
262
|
|
|
262
263
|
for day in _mondays(history_depth_in_days=CONTENT_VIEWS_HISTORY_DAYS):
|
|
263
264
|
formatted_day = day.strftime("%Y-%m-%d")
|
|
@@ -279,7 +280,7 @@ class ApiClient:
|
|
|
279
280
|
)
|
|
280
281
|
return content_views
|
|
281
282
|
|
|
282
|
-
def users_attributes(self) ->
|
|
283
|
+
def users_attributes(self) -> list[UserAttribute]:
|
|
283
284
|
"""Lists user attributes of the given Looker account"""
|
|
284
285
|
user_attributes = list(
|
|
285
286
|
self._sdk.all_user_attributes(
|
|
@@ -298,7 +299,7 @@ class ApiClient:
|
|
|
298
299
|
asset: LookerAsset,
|
|
299
300
|
*,
|
|
300
301
|
folder_id: Optional[str] = None,
|
|
301
|
-
explore_names: Optional[Iterator[
|
|
302
|
+
explore_names: Optional[Iterator[tuple[str, str]]] = None,
|
|
302
303
|
) -> list:
|
|
303
304
|
if asset == LookerAsset.USERS:
|
|
304
305
|
return self.users()
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
from looker_sdk.sdk.api40.models import LookmlModel
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def lookml_explore_names(
|
|
7
7
|
lookmls: Iterable[LookmlModel],
|
|
8
|
-
) ->
|
|
8
|
+
) -> set[tuple[str, str]]:
|
|
9
9
|
"""
|
|
10
10
|
Explores from the lookml models
|
|
11
11
|
Only valid explores are yielded: with all infos
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Set
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset, classproperty
|
|
4
2
|
|
|
5
3
|
|
|
@@ -20,7 +18,7 @@ class LookerAsset(ExternalAsset):
|
|
|
20
18
|
USERS_ATTRIBUTES = "users_attributes"
|
|
21
19
|
|
|
22
20
|
@classproperty
|
|
23
|
-
def optional(cls) ->
|
|
21
|
+
def optional(cls) -> set["LookerAsset"]:
|
|
24
22
|
return {
|
|
25
23
|
LookerAsset.CONNECTIONS,
|
|
26
24
|
LookerAsset.CONTENT_VIEWS,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Optional, Union
|
|
3
4
|
|
|
4
5
|
from looker_sdk.sdk.api40.models import LookmlModel
|
|
5
6
|
|
|
@@ -58,7 +59,7 @@ def iterate_all_data(
|
|
|
58
59
|
search_per_folder: bool,
|
|
59
60
|
thread_pool_size: int,
|
|
60
61
|
log_to_stdout: bool,
|
|
61
|
-
) -> Iterable[Union[StreamableList,
|
|
62
|
+
) -> Iterable[Union[StreamableList, tuple[LookerAsset, list]]]:
|
|
62
63
|
"""Iterate over the extracted Data From looker"""
|
|
63
64
|
|
|
64
65
|
logger.info("Extracting users from Looker API")
|
|
@@ -67,7 +68,7 @@ def iterate_all_data(
|
|
|
67
68
|
|
|
68
69
|
logger.info("Extracting folders from Looker API")
|
|
69
70
|
folders = client.folders()
|
|
70
|
-
folder_ids:
|
|
71
|
+
folder_ids: set[str] = {folder.id for folder in folders if folder.id}
|
|
71
72
|
yield LookerAsset.FOLDERS, deep_serialize(folders)
|
|
72
73
|
|
|
73
74
|
logger.info("Extracting looks from Looker API")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union, cast
|
|
2
2
|
|
|
3
|
-
Field = Union[str,
|
|
4
|
-
Fields = Union[Field,
|
|
3
|
+
Field = Union[str, dict[str, Any]]
|
|
4
|
+
Fields = Union[Field, tuple[Field, ...]]
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def format_fields(fields: Fields) -> str:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
|
+
from collections.abc import Iterable
|
|
3
4
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
5
|
from functools import partial
|
|
5
|
-
from typing import Iterable, List, Set
|
|
6
6
|
|
|
7
7
|
from looker_sdk.error import SDKError
|
|
8
8
|
from tqdm import tqdm # type: ignore
|
|
@@ -31,7 +31,7 @@ def _make_api_request(
|
|
|
31
31
|
client: ApiClient,
|
|
32
32
|
asset: LookerAsset,
|
|
33
33
|
folder_id: str,
|
|
34
|
-
) ->
|
|
34
|
+
) -> list:
|
|
35
35
|
"""
|
|
36
36
|
Calls the appropriate Looker API endpoint to retrieve either Looks or
|
|
37
37
|
Dashboards withered by the given folder ID.
|
|
@@ -44,7 +44,7 @@ def _make_api_request(
|
|
|
44
44
|
class MultithreadingFetcher:
|
|
45
45
|
def __init__(
|
|
46
46
|
self,
|
|
47
|
-
folder_ids:
|
|
47
|
+
folder_ids: set[str],
|
|
48
48
|
client: ApiClient,
|
|
49
49
|
thread_pool_size: int,
|
|
50
50
|
log_to_stdout: bool,
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict, Tuple
|
|
2
|
-
|
|
3
1
|
from ...types import ExternalAsset
|
|
4
2
|
|
|
5
3
|
|
|
@@ -15,7 +13,7 @@ class MetabaseAsset(ExternalAsset):
|
|
|
15
13
|
DASHBOARD_CARDS = "dashboard_cards"
|
|
16
14
|
|
|
17
15
|
|
|
18
|
-
EXPORTED_FIELDS:
|
|
16
|
+
EXPORTED_FIELDS: dict[MetabaseAsset, tuple[str, ...]] = {
|
|
19
17
|
MetabaseAsset.COLLECTION: (
|
|
20
18
|
"id",
|
|
21
19
|
"name",
|