castor-extractor 0.21.9__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +4 -0
- castor_extractor/commands/__init__.py +0 -3
- castor_extractor/commands/file_check.py +1 -2
- castor_extractor/file_checker/column.py +5 -5
- castor_extractor/file_checker/file.py +7 -7
- castor_extractor/file_checker/file_test.py +2 -2
- castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
- castor_extractor/knowledge/confluence/client/client.py +2 -1
- castor_extractor/knowledge/confluence/extract.py +3 -2
- castor_extractor/knowledge/notion/client/client.py +3 -2
- castor_extractor/knowledge/notion/extract.py +3 -2
- castor_extractor/quality/soda/client/client.py +2 -1
- castor_extractor/quality/soda/client/pagination.py +1 -3
- castor_extractor/types.py +3 -3
- castor_extractor/uploader/env.py +2 -2
- castor_extractor/uploader/upload.py +4 -3
- castor_extractor/uploader/utils.py +1 -1
- castor_extractor/utils/client/abstract.py +2 -1
- castor_extractor/utils/client/api/auth.py +2 -2
- castor_extractor/utils/client/api/auth_test.py +2 -2
- castor_extractor/utils/client/api/client.py +3 -3
- castor_extractor/utils/client/api/pagination.py +3 -2
- castor_extractor/utils/client/api/safe_request.py +5 -5
- castor_extractor/utils/collection.py +7 -11
- castor_extractor/utils/dbt/client.py +3 -3
- castor_extractor/utils/dbt/client_test.py +2 -2
- castor_extractor/utils/deprecate.py +1 -2
- castor_extractor/utils/files.py +5 -5
- castor_extractor/utils/formatter.py +5 -4
- castor_extractor/utils/json_stream_write.py +2 -1
- castor_extractor/utils/object.py +2 -1
- castor_extractor/utils/pager/pager.py +2 -4
- castor_extractor/utils/pager/pager_on_id.py +2 -1
- castor_extractor/utils/pager/pager_on_id_test.py +5 -5
- castor_extractor/utils/pager/pager_test.py +3 -3
- castor_extractor/utils/retry.py +4 -3
- castor_extractor/utils/retry_test.py +2 -3
- castor_extractor/utils/safe.py +3 -3
- castor_extractor/utils/salesforce/client.py +2 -1
- castor_extractor/utils/salesforce/credentials.py +1 -3
- castor_extractor/utils/store.py +2 -1
- castor_extractor/utils/string.py +2 -2
- castor_extractor/utils/string_test.py +1 -3
- castor_extractor/utils/type.py +3 -2
- castor_extractor/utils/validation.py +4 -4
- castor_extractor/utils/write.py +2 -2
- castor_extractor/visualization/domo/client/client.py +8 -7
- castor_extractor/visualization/domo/client/credentials.py +2 -2
- castor_extractor/visualization/domo/client/endpoints.py +2 -2
- castor_extractor/visualization/domo/extract.py +3 -2
- castor_extractor/visualization/looker/api/client.py +17 -16
- castor_extractor/visualization/looker/api/utils.py +2 -2
- castor_extractor/visualization/looker/assets.py +1 -3
- castor_extractor/visualization/looker/extract.py +4 -3
- castor_extractor/visualization/looker/fields.py +3 -3
- castor_extractor/visualization/looker/multithreading.py +3 -3
- castor_extractor/visualization/metabase/assets.py +1 -3
- castor_extractor/visualization/metabase/client/api/client.py +8 -7
- castor_extractor/visualization/metabase/extract.py +3 -2
- castor_extractor/visualization/metabase/types.py +1 -3
- castor_extractor/visualization/mode/client/client.py +6 -6
- castor_extractor/visualization/mode/extract.py +2 -2
- castor_extractor/visualization/powerbi/assets.py +1 -3
- castor_extractor/visualization/powerbi/client/client.py +12 -11
- castor_extractor/visualization/powerbi/client/credentials.py +3 -3
- castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
- castor_extractor/visualization/powerbi/extract.py +3 -2
- castor_extractor/visualization/qlik/assets.py +1 -3
- castor_extractor/visualization/qlik/client/constants.py +1 -3
- castor_extractor/visualization/qlik/client/engine/error.py +1 -3
- castor_extractor/visualization/qlik/client/master.py +3 -3
- castor_extractor/visualization/qlik/client/rest.py +12 -12
- castor_extractor/visualization/qlik/extract.py +4 -3
- castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
- castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
- castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
- castor_extractor/visualization/sigma/client/client.py +9 -8
- castor_extractor/visualization/sigma/client/credentials.py +1 -3
- castor_extractor/visualization/sigma/extract.py +3 -2
- castor_extractor/visualization/tableau/assets.py +1 -2
- castor_extractor/visualization/tableau/client/client.py +1 -2
- castor_extractor/visualization/tableau/client/client_utils.py +3 -2
- castor_extractor/visualization/tableau/client/credentials.py +3 -3
- castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
- castor_extractor/visualization/tableau/extract.py +2 -2
- castor_extractor/visualization/tableau/gql_fields.py +3 -3
- castor_extractor/visualization/tableau/tsc_fields.py +1 -2
- castor_extractor/visualization/tableau/types.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
- castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
- castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
- castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
- castor_extractor/visualization/tableau_revamp/extract.py +2 -2
- castor_extractor/visualization/thoughtspot/client/client.py +3 -2
- castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
- castor_extractor/visualization/thoughtspot/extract.py +3 -2
- castor_extractor/warehouse/abstract/asset.py +4 -5
- castor_extractor/warehouse/abstract/extract.py +4 -3
- castor_extractor/warehouse/abstract/query.py +4 -4
- castor_extractor/warehouse/bigquery/client.py +8 -8
- castor_extractor/warehouse/bigquery/extract.py +1 -1
- castor_extractor/warehouse/bigquery/query.py +2 -2
- castor_extractor/warehouse/bigquery/types.py +2 -4
- castor_extractor/warehouse/databricks/api_client.py +15 -14
- castor_extractor/warehouse/databricks/client.py +16 -16
- castor_extractor/warehouse/databricks/extract.py +4 -4
- castor_extractor/warehouse/databricks/format.py +12 -12
- castor_extractor/warehouse/databricks/lineage.py +11 -11
- castor_extractor/warehouse/databricks/pagination.py +2 -2
- castor_extractor/warehouse/databricks/types.py +4 -4
- castor_extractor/warehouse/databricks/utils.py +5 -4
- castor_extractor/warehouse/mysql/query.py +2 -2
- castor_extractor/warehouse/postgres/query.py +2 -2
- castor_extractor/warehouse/redshift/client.py +1 -1
- castor_extractor/warehouse/redshift/query.py +2 -2
- castor_extractor/warehouse/salesforce/client.py +8 -8
- castor_extractor/warehouse/salesforce/extract.py +3 -4
- castor_extractor/warehouse/salesforce/format.py +8 -7
- castor_extractor/warehouse/salesforce/format_test.py +2 -4
- castor_extractor/warehouse/snowflake/query.py +5 -5
- castor_extractor/warehouse/sqlserver/client.py +1 -1
- castor_extractor/warehouse/sqlserver/query.py +2 -2
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/METADATA +7 -6
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/RECORD +128 -128
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/LICENCE +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/WHEEL +0 -0
- {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from argparse import ArgumentParser
|
|
4
|
-
from typing import Set
|
|
5
4
|
|
|
6
5
|
from castor_extractor import file_checker # type: ignore
|
|
7
6
|
from castor_extractor.utils import ( # type: ignore
|
|
@@ -15,7 +14,7 @@ logger = logging.getLogger(__name__)
|
|
|
15
14
|
|
|
16
15
|
WarehouseTemplate = file_checker.GenericWarehouseFileTemplate
|
|
17
16
|
|
|
18
|
-
Ids =
|
|
17
|
+
Ids = set[str]
|
|
19
18
|
_ID_KEY = "id"
|
|
20
19
|
|
|
21
20
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from typing import Callable,
|
|
1
|
+
from typing import Callable, Optional
|
|
2
2
|
|
|
3
3
|
from dateutil.parser import parse
|
|
4
4
|
|
|
5
5
|
from ..utils import string_to_tuple
|
|
6
6
|
from .enums import DataType, Issue
|
|
7
7
|
|
|
8
|
-
_CONVERTERS:
|
|
8
|
+
_CONVERTERS: dict[DataType, Callable] = {
|
|
9
9
|
DataType.DATETIME: parse,
|
|
10
10
|
DataType.FLOAT: float,
|
|
11
11
|
DataType.INTEGER: int,
|
|
@@ -29,13 +29,13 @@ class ColumnChecker:
|
|
|
29
29
|
data_type: DataType = DataType.STRING,
|
|
30
30
|
is_mandatory: bool = True,
|
|
31
31
|
is_unique: bool = False,
|
|
32
|
-
foreign: Optional[
|
|
33
|
-
enum_values: Optional[
|
|
32
|
+
foreign: Optional[set[str]] = None,
|
|
33
|
+
enum_values: Optional[set[str]] = None,
|
|
34
34
|
):
|
|
35
35
|
self.data_type = data_type
|
|
36
36
|
self.is_mandatory = is_mandatory
|
|
37
37
|
self.is_unique = is_unique
|
|
38
|
-
self.occurrences:
|
|
38
|
+
self.occurrences: set[str] = set()
|
|
39
39
|
self.foreign = foreign
|
|
40
40
|
self.enum_values = enum_values
|
|
41
41
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
3
|
|
|
4
4
|
from .column import ColumnChecker
|
|
5
5
|
from .enums import Issue
|
|
@@ -8,8 +8,8 @@ logger = logging.getLogger(__name__)
|
|
|
8
8
|
|
|
9
9
|
_SEPARATOR = f"{30 * '-'}\n"
|
|
10
10
|
|
|
11
|
-
FileTemplate =
|
|
12
|
-
IssueCounter =
|
|
11
|
+
FileTemplate = dict[str, ColumnChecker] # column_name, column_checker
|
|
12
|
+
IssueCounter = dict[Issue, int] # occurrences per type of issue
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class FileCheckerResults:
|
|
@@ -22,7 +22,7 @@ class FileCheckerResults:
|
|
|
22
22
|
self.total_rows: int = 0
|
|
23
23
|
self.valid_rows: int = 0
|
|
24
24
|
self.counter: IssueCounter = {issue: 0 for issue in Issue}
|
|
25
|
-
self.indices:
|
|
25
|
+
self.indices: set[int] = set()
|
|
26
26
|
|
|
27
27
|
def summary(self) -> str:
|
|
28
28
|
"""
|
|
@@ -67,7 +67,7 @@ class FileCheckerRun:
|
|
|
67
67
|
|
|
68
68
|
def __init__(
|
|
69
69
|
self,
|
|
70
|
-
content: Iterable[
|
|
70
|
+
content: Iterable[dict],
|
|
71
71
|
template: FileTemplate,
|
|
72
72
|
file_name: str,
|
|
73
73
|
verbose: bool = False,
|
|
@@ -128,7 +128,7 @@ class FileCheckerRun:
|
|
|
128
128
|
header += f"{str(k):<20} {str(v):<100}\n"
|
|
129
129
|
self.logger.info(header + _SEPARATOR + issue_log + _SEPARATOR)
|
|
130
130
|
|
|
131
|
-
def occurrences(self, name: str) ->
|
|
131
|
+
def occurrences(self, name: str) -> set[str]:
|
|
132
132
|
"""
|
|
133
133
|
Return values of the given column, provided:
|
|
134
134
|
- the column exists in the template
|
|
@@ -158,7 +158,7 @@ class FileCheckerRun:
|
|
|
158
158
|
for _ in self.valid_rows():
|
|
159
159
|
pass
|
|
160
160
|
|
|
161
|
-
def valid_rows(self) -> Iterator[
|
|
161
|
+
def valid_rows(self) -> Iterator[dict]:
|
|
162
162
|
"""
|
|
163
163
|
Reads the file content and yields only valid rows.
|
|
164
164
|
- Invalid rows are ignored
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import os
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Iterator
|
|
4
4
|
|
|
5
5
|
from .column import ColumnChecker
|
|
6
6
|
from .enums import DataType, Issue
|
|
@@ -10,7 +10,7 @@ _TEST_FILE = "file_test_users.csv"
|
|
|
10
10
|
_TEST_FILE_VALID = "file_test_users_valid.csv"
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def _content(path: str) -> Iterator[
|
|
13
|
+
def _content(path: str) -> Iterator[dict]:
|
|
14
14
|
absolute_path = os.path.join(os.path.dirname(__file__), path)
|
|
15
15
|
with open(absolute_path) as csvfile:
|
|
16
16
|
yield from csv.DictReader(csvfile)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Set
|
|
2
|
-
|
|
3
1
|
from ..column import ColumnChecker
|
|
4
2
|
from ..constants import TABLE_TYPES
|
|
5
3
|
from ..enums import DataType
|
|
@@ -23,7 +21,7 @@ class GenericWarehouseFileTemplate:
|
|
|
23
21
|
}
|
|
24
22
|
|
|
25
23
|
@staticmethod
|
|
26
|
-
def schema(database_ids:
|
|
24
|
+
def schema(database_ids: set[str]) -> FileTemplate:
|
|
27
25
|
return {
|
|
28
26
|
"id": ColumnChecker(is_unique=True),
|
|
29
27
|
"database_id": ColumnChecker(foreign=database_ids),
|
|
@@ -33,7 +31,7 @@ class GenericWarehouseFileTemplate:
|
|
|
33
31
|
}
|
|
34
32
|
|
|
35
33
|
@staticmethod
|
|
36
|
-
def table(schema_ids:
|
|
34
|
+
def table(schema_ids: set[str]) -> FileTemplate:
|
|
37
35
|
return {
|
|
38
36
|
"id": ColumnChecker(is_unique=True),
|
|
39
37
|
"schema_id": ColumnChecker(foreign=schema_ids),
|
|
@@ -44,7 +42,7 @@ class GenericWarehouseFileTemplate:
|
|
|
44
42
|
}
|
|
45
43
|
|
|
46
44
|
@staticmethod
|
|
47
|
-
def column(table_ids:
|
|
45
|
+
def column(table_ids: set[str]) -> FileTemplate:
|
|
48
46
|
return {
|
|
49
47
|
"id": ColumnChecker(is_unique=True),
|
|
50
48
|
"table_id": ColumnChecker(foreign=table_ids),
|
|
@@ -58,7 +56,7 @@ class GenericWarehouseFileTemplate:
|
|
|
58
56
|
}
|
|
59
57
|
|
|
60
58
|
@staticmethod
|
|
61
|
-
def query(database_ids:
|
|
59
|
+
def query(database_ids: set[str], user_ids: set[str]) -> FileTemplate:
|
|
62
60
|
return {
|
|
63
61
|
"database_id": ColumnChecker(foreign=database_ids),
|
|
64
62
|
"query_text": ColumnChecker(),
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: ConfluenceClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[ConfluenceAsset, Union[list, Iterator, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Confluence"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting USERS from API")
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from collections.abc import Iterator
|
|
1
2
|
from functools import partial
|
|
2
3
|
from http import HTTPStatus
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from ....utils import APIClient, BearerAuth, RequestSafeMode, fetch_all_pages
|
|
6
7
|
from ..assets import NotionAsset
|
|
@@ -23,7 +24,7 @@ NOTION_BASE_HEADERS = {
|
|
|
23
24
|
NOTION_DEFAULT_TIMEOUT_S = 180
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
def _search_filter(asset: str) ->
|
|
27
|
+
def _search_filter(asset: str) -> dict[str, dict[str, str]]:
|
|
27
28
|
return {"filter": {"value": asset, "property": "object"}}
|
|
28
29
|
|
|
29
30
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable, Iterator
|
|
3
|
+
from typing import Union
|
|
3
4
|
|
|
4
5
|
from ...utils import (
|
|
5
6
|
OUTPUT_DIR,
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
def iterate_all_data(
|
|
20
21
|
client: NotionClient,
|
|
21
|
-
) -> Iterable[
|
|
22
|
+
) -> Iterable[tuple[NotionAsset, Union[list, Iterator, dict]]]:
|
|
22
23
|
"""Iterate over the extracted data from Notion"""
|
|
23
24
|
|
|
24
25
|
logger.info("Extracting USERS from API")
|
castor_extractor/types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Literal,
|
|
3
|
+
from typing import Literal, TypedDict
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class CsvOptions(TypedDict):
|
|
@@ -34,7 +34,7 @@ class ExternalAsset(Enum):
|
|
|
34
34
|
__metaclass__ = ABC
|
|
35
35
|
|
|
36
36
|
@classproperty
|
|
37
|
-
def optional(cls) ->
|
|
37
|
+
def optional(cls) -> set["ExternalAsset"]:
|
|
38
38
|
"""
|
|
39
39
|
Returns the assets that are not necessarily extracted/pushed.
|
|
40
40
|
Example:
|
|
@@ -46,7 +46,7 @@ class ExternalAsset(Enum):
|
|
|
46
46
|
return set()
|
|
47
47
|
|
|
48
48
|
@classproperty
|
|
49
|
-
def mandatory(cls) ->
|
|
49
|
+
def mandatory(cls) -> set["ExternalAsset"]:
|
|
50
50
|
"""
|
|
51
51
|
Returns the assets that must always be provided.
|
|
52
52
|
"""
|
castor_extractor/uploader/env.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Optional
|
|
2
|
+
from typing import Optional
|
|
3
3
|
|
|
4
4
|
from .constant import (
|
|
5
5
|
DEFAULT_RETRY,
|
|
@@ -27,7 +27,7 @@ def _parse_int(value: Optional[str], default: int) -> int:
|
|
|
27
27
|
return default
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def get_blob_env() ->
|
|
30
|
+
def get_blob_env() -> tuple[float, int]:
|
|
31
31
|
"""
|
|
32
32
|
Retrieve timeout and retries values. It look for environment variables
|
|
33
33
|
first and return default value otherwise
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
import logging
|
|
3
3
|
import ntpath
|
|
4
|
+
from collections.abc import Iterable
|
|
4
5
|
from datetime import datetime
|
|
5
|
-
from typing import
|
|
6
|
+
from typing import Optional
|
|
6
7
|
from uuid import UUID
|
|
7
8
|
|
|
8
9
|
import requests
|
|
@@ -32,7 +33,7 @@ def _path_and_url(
|
|
|
32
33
|
source_id: UUID,
|
|
33
34
|
file_type: FileType,
|
|
34
35
|
file_path: str,
|
|
35
|
-
) ->
|
|
36
|
+
) -> tuple[str, str]:
|
|
36
37
|
now = datetime.utcnow()
|
|
37
38
|
timestamp = int(now.timestamp())
|
|
38
39
|
filename = ntpath.basename(file_path)
|
|
@@ -48,7 +49,7 @@ def _path_and_url(
|
|
|
48
49
|
return path, url
|
|
49
50
|
|
|
50
51
|
|
|
51
|
-
def _headers(token: str) ->
|
|
52
|
+
def _headers(token: str) -> dict:
|
|
52
53
|
return {
|
|
53
54
|
"Authorization": f"Token {token}",
|
|
54
55
|
"Accept": "text/csv, application/json",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional, Union
|
|
4
4
|
|
|
5
5
|
from requests.auth import AuthBase, HTTPBasicAuth
|
|
6
6
|
|
|
@@ -33,7 +33,7 @@ class CustomAuth(AuthBase, ABC):
|
|
|
33
33
|
pass
|
|
34
34
|
|
|
35
35
|
@abstractmethod
|
|
36
|
-
def _authentication_header(self) ->
|
|
36
|
+
def _authentication_header(self) -> dict[str, str]:
|
|
37
37
|
pass
|
|
38
38
|
|
|
39
39
|
def __call__(self, r):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from .auth import BasicAuth, BearerAuth, CustomAuth
|
|
4
4
|
|
|
@@ -9,7 +9,7 @@ class _MockRequest:
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class _CustomAuth(CustomAuth):
|
|
12
|
-
def _authentication_header(self) ->
|
|
12
|
+
def _authentication_header(self) -> dict[str, str]:
|
|
13
13
|
return {"custom-token": "token"}
|
|
14
14
|
|
|
15
15
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from http import HTTPStatus
|
|
3
|
-
from typing import Callable,
|
|
3
|
+
from typing import Callable, Literal, Optional
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
from requests import Response
|
|
@@ -12,7 +12,7 @@ from .utils import build_url
|
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
|
-
Headers = Optional[
|
|
15
|
+
Headers = Optional[dict[str, str]]
|
|
16
16
|
|
|
17
17
|
# https://requests.readthedocs.io/en/latest/api/#requests.request
|
|
18
18
|
HttpMethod = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
|
|
@@ -27,7 +27,7 @@ def _generate_payloads(
|
|
|
27
27
|
params: Optional[dict],
|
|
28
28
|
data: Optional[dict],
|
|
29
29
|
pagination_params: Optional[dict],
|
|
30
|
-
) ->
|
|
30
|
+
) -> tuple[Optional[dict], Optional[dict]]:
|
|
31
31
|
_pagination_params = pagination_params or {}
|
|
32
32
|
|
|
33
33
|
if method == "GET":
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import abstractmethod
|
|
3
|
+
from collections.abc import Iterator
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from functools import partial
|
|
5
6
|
from time import sleep
|
|
6
|
-
from typing import Callable,
|
|
7
|
+
from typing import Callable, Optional, Union
|
|
7
8
|
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
@@ -56,7 +57,7 @@ class PaginationModel(BaseModel):
|
|
|
56
57
|
|
|
57
58
|
def fetch_all_pages(
|
|
58
59
|
request: Callable,
|
|
59
|
-
pagination_model:
|
|
60
|
+
pagination_model: type[PaginationModel],
|
|
60
61
|
rate_limit: Optional[int] = None,
|
|
61
62
|
) -> Iterator:
|
|
62
63
|
"""
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, Callable,
|
|
2
|
+
from typing import Any, Callable, Optional, Union
|
|
3
3
|
|
|
4
4
|
from requests import HTTPError, Response
|
|
5
5
|
|
|
6
6
|
logger = logging.getLogger(__name__)
|
|
7
7
|
|
|
8
|
-
ResponseJson = Union[dict,
|
|
8
|
+
ResponseJson = Union[dict, list[dict]]
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class RequestSafeMode:
|
|
@@ -21,11 +21,11 @@ class RequestSafeMode:
|
|
|
21
21
|
def __init__(
|
|
22
22
|
self,
|
|
23
23
|
max_errors: Union[int, float] = 0,
|
|
24
|
-
status_codes:
|
|
24
|
+
status_codes: tuple[int, ...] = (),
|
|
25
25
|
):
|
|
26
26
|
self.max_errors = max_errors
|
|
27
|
-
self.status_codes:
|
|
28
|
-
self.status_codes_caught:
|
|
27
|
+
self.status_codes: list[int] = list(status_codes)
|
|
28
|
+
self.status_codes_caught: list[int] = []
|
|
29
29
|
|
|
30
30
|
def catch_response(self, exception: HTTPError, status_code: int):
|
|
31
31
|
if int(status_code) not in self.status_codes:
|
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
|
+
from collections.abc import Iterable, Sequence
|
|
2
3
|
from typing import (
|
|
3
4
|
Any,
|
|
4
|
-
Dict,
|
|
5
|
-
Iterable,
|
|
6
|
-
List,
|
|
7
|
-
Sequence,
|
|
8
|
-
Set,
|
|
9
5
|
TypeVar,
|
|
10
6
|
)
|
|
11
7
|
|
|
@@ -15,9 +11,9 @@ from .type import Getter
|
|
|
15
11
|
T = TypeVar("T")
|
|
16
12
|
|
|
17
13
|
|
|
18
|
-
def group_by(identifier: Getter, elements: Sequence) ->
|
|
14
|
+
def group_by(identifier: Getter, elements: Sequence) -> dict[Any, list]:
|
|
19
15
|
"""Groups the elements by the given key"""
|
|
20
|
-
groups:
|
|
16
|
+
groups: dict[Any, list] = defaultdict(list)
|
|
21
17
|
for element in elements:
|
|
22
18
|
key = getproperty(element, identifier)
|
|
23
19
|
groups[key].append(element)
|
|
@@ -25,7 +21,7 @@ def group_by(identifier: Getter, elements: Sequence) -> Dict[Any, List]:
|
|
|
25
21
|
return groups
|
|
26
22
|
|
|
27
23
|
|
|
28
|
-
def mapping_from_rows(rows:
|
|
24
|
+
def mapping_from_rows(rows: list[dict], key: Any, value: Any) -> dict:
|
|
29
25
|
"""
|
|
30
26
|
Create a dictionary mapping from a list of dictionaries using specified keys for mapping.
|
|
31
27
|
|
|
@@ -68,13 +64,13 @@ def empty_iterator():
|
|
|
68
64
|
def deduplicate(
|
|
69
65
|
identifier: Getter,
|
|
70
66
|
elements: Iterable[T],
|
|
71
|
-
) ->
|
|
67
|
+
) -> list[T]:
|
|
72
68
|
"""
|
|
73
69
|
Remove duplicates in the given elements, using the specified identifier
|
|
74
70
|
Only the first occurrence is kept.
|
|
75
71
|
"""
|
|
76
|
-
deduplicated:
|
|
77
|
-
processed:
|
|
72
|
+
deduplicated: list[T] = []
|
|
73
|
+
processed: set[Any] = set()
|
|
78
74
|
|
|
79
75
|
for element in elements:
|
|
80
76
|
key = getproperty(element, identifier)
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import Literal, Optional
|
|
6
|
+
from typing import Literal, Optional
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
9
|
from dateutil.parser import parse
|
|
@@ -92,7 +92,7 @@ class DbtClient:
|
|
|
92
92
|
result = self._call(url=self._account_url)
|
|
93
93
|
return result[0]["id"]
|
|
94
94
|
|
|
95
|
-
def list_job_identifiers(self) ->
|
|
95
|
+
def list_job_identifiers(self) -> set[int]:
|
|
96
96
|
"""
|
|
97
97
|
Return the IDs of all non-deleted jobs for this account
|
|
98
98
|
https://docs.getdbt.com/dbt-cloud/api-v2-legacy#tag/Jobs/operation/listJobsForAccount
|
|
@@ -104,7 +104,7 @@ class DbtClient:
|
|
|
104
104
|
def last_run(
|
|
105
105
|
self,
|
|
106
106
|
job_id: Optional[int] = None,
|
|
107
|
-
finished_at_range: Optional[
|
|
107
|
+
finished_at_range: Optional[tuple[datetime, datetime]] = None,
|
|
108
108
|
) -> Optional[DbtRun]:
|
|
109
109
|
"""
|
|
110
110
|
Extract the last successful run id, optionally filtered on a given datetime range
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import datetime, timedelta
|
|
3
|
-
from typing import Optional,
|
|
3
|
+
from typing import Optional, Union
|
|
4
4
|
from unittest.mock import MagicMock, patch
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
@@ -21,7 +21,7 @@ _RECENT_DATE_STR = "2023-10-06 05:09:31.731991+00:00"
|
|
|
21
21
|
def _assert_called_with(
|
|
22
22
|
mocked_call: MagicMock,
|
|
23
23
|
job_id: Union[int, str],
|
|
24
|
-
date_range: Optional[
|
|
24
|
+
date_range: Optional[tuple[datetime, datetime]] = None,
|
|
25
25
|
) -> None:
|
|
26
26
|
url = "https://cloud.getdbt.com/api/v2/accounts/40/runs/"
|
|
27
27
|
params = {
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
3
|
import warnings
|
|
4
|
-
from typing import Tuple
|
|
5
4
|
|
|
6
5
|
|
|
7
|
-
def deprecate_python(min_version_supported:
|
|
6
|
+
def deprecate_python(min_version_supported: tuple[int, ...]):
|
|
8
7
|
"""raises a warning if python version < min_version_supported"""
|
|
9
8
|
|
|
10
9
|
python_version = (
|
castor_extractor/utils/files.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import glob
|
|
2
2
|
import os
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def explode(path: str) ->
|
|
6
|
+
def explode(path: str) -> tuple[str, str, str]:
|
|
7
7
|
"""
|
|
8
8
|
Split a file path into 3 parts:
|
|
9
9
|
- Head (directory)
|
|
@@ -20,9 +20,9 @@ def search_files(
|
|
|
20
20
|
directory: str,
|
|
21
21
|
*,
|
|
22
22
|
filter_endswith: Optional[str] = None,
|
|
23
|
-
filter_extensions: Optional[
|
|
24
|
-
does_not_contain: Optional[
|
|
25
|
-
) ->
|
|
23
|
+
filter_extensions: Optional[set[str]] = None,
|
|
24
|
+
does_not_contain: Optional[set[str]] = None,
|
|
25
|
+
) -> list[str]:
|
|
26
26
|
"""Retrieve files in a directory, matching given criteria"""
|
|
27
27
|
|
|
28
28
|
def _does_not_contain(path: str) -> bool:
|
|
@@ -6,9 +6,10 @@ import logging
|
|
|
6
6
|
import re
|
|
7
7
|
import sys
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
9
10
|
from datetime import date, datetime
|
|
10
11
|
from enum import Enum
|
|
11
|
-
from typing import IO, Any,
|
|
12
|
+
from typing import IO, Any, Union
|
|
12
13
|
from uuid import UUID
|
|
13
14
|
|
|
14
15
|
from ..types import CsvOptions
|
|
@@ -49,7 +50,7 @@ def _scalar(value: Any) -> ScalarValue:
|
|
|
49
50
|
return str(value)
|
|
50
51
|
|
|
51
52
|
|
|
52
|
-
def _row(header: Sequence[str], row: dict) ->
|
|
53
|
+
def _row(header: Sequence[str], row: dict) -> list[ScalarValue]:
|
|
53
54
|
return [_scalar(row.get(h)) for h in header]
|
|
54
55
|
|
|
55
56
|
|
|
@@ -60,7 +61,7 @@ def remove_unsupported_byte(element: ScalarValue) -> ScalarValue:
|
|
|
60
61
|
return re.sub("\x00", "", element)
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
def to_string_array(arr_json: str) ->
|
|
64
|
+
def to_string_array(arr_json: str) -> list[str]:
|
|
64
65
|
"""
|
|
65
66
|
Converts a JSON-serialized string array value as a string to a list
|
|
66
67
|
Ex: '["items","count"]' to ["items", "order"]
|
|
@@ -100,7 +101,7 @@ def from_csv(buffer: IO[str]) -> Iterator[dict]:
|
|
|
100
101
|
"""convert data as from a CSV string to list of dict"""
|
|
101
102
|
try:
|
|
102
103
|
reader = csv.reader(buffer, **CSV_OPTIONS)
|
|
103
|
-
header:
|
|
104
|
+
header: list[str] = []
|
|
104
105
|
for row in reader:
|
|
105
106
|
if not header:
|
|
106
107
|
header = list(row)
|
castor_extractor/utils/object.py
CHANGED