castor-extractor 0.21.7__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (131) hide show
  1. CHANGELOG.md +8 -0
  2. castor_extractor/commands/__init__.py +0 -3
  3. castor_extractor/commands/file_check.py +1 -2
  4. castor_extractor/file_checker/column.py +5 -5
  5. castor_extractor/file_checker/file.py +7 -7
  6. castor_extractor/file_checker/file_test.py +2 -2
  7. castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
  8. castor_extractor/knowledge/confluence/client/client.py +2 -1
  9. castor_extractor/knowledge/confluence/extract.py +3 -2
  10. castor_extractor/knowledge/notion/client/client.py +3 -2
  11. castor_extractor/knowledge/notion/extract.py +3 -2
  12. castor_extractor/quality/soda/client/client.py +2 -1
  13. castor_extractor/quality/soda/client/pagination.py +1 -3
  14. castor_extractor/types.py +3 -3
  15. castor_extractor/uploader/env.py +2 -2
  16. castor_extractor/uploader/upload.py +4 -3
  17. castor_extractor/uploader/utils.py +1 -1
  18. castor_extractor/utils/client/abstract.py +2 -1
  19. castor_extractor/utils/client/api/auth.py +2 -2
  20. castor_extractor/utils/client/api/auth_test.py +2 -2
  21. castor_extractor/utils/client/api/client.py +8 -3
  22. castor_extractor/utils/client/api/pagination.py +3 -2
  23. castor_extractor/utils/client/api/safe_request.py +5 -5
  24. castor_extractor/utils/collection.py +7 -11
  25. castor_extractor/utils/dbt/client.py +3 -3
  26. castor_extractor/utils/dbt/client_test.py +2 -2
  27. castor_extractor/utils/deprecate.py +1 -2
  28. castor_extractor/utils/files.py +5 -5
  29. castor_extractor/utils/formatter.py +5 -4
  30. castor_extractor/utils/json_stream_write.py +2 -1
  31. castor_extractor/utils/object.py +2 -1
  32. castor_extractor/utils/pager/pager.py +2 -4
  33. castor_extractor/utils/pager/pager_on_id.py +2 -1
  34. castor_extractor/utils/pager/pager_on_id_test.py +5 -5
  35. castor_extractor/utils/pager/pager_test.py +3 -3
  36. castor_extractor/utils/retry.py +4 -3
  37. castor_extractor/utils/retry_test.py +2 -3
  38. castor_extractor/utils/safe.py +3 -3
  39. castor_extractor/utils/salesforce/client.py +2 -1
  40. castor_extractor/utils/salesforce/credentials.py +1 -3
  41. castor_extractor/utils/store.py +2 -1
  42. castor_extractor/utils/string.py +2 -2
  43. castor_extractor/utils/string_test.py +1 -3
  44. castor_extractor/utils/type.py +3 -2
  45. castor_extractor/utils/validation.py +4 -4
  46. castor_extractor/utils/write.py +2 -2
  47. castor_extractor/visualization/domo/client/client.py +8 -7
  48. castor_extractor/visualization/domo/client/credentials.py +2 -2
  49. castor_extractor/visualization/domo/client/endpoints.py +2 -2
  50. castor_extractor/visualization/domo/extract.py +3 -2
  51. castor_extractor/visualization/looker/api/client.py +17 -16
  52. castor_extractor/visualization/looker/api/utils.py +2 -2
  53. castor_extractor/visualization/looker/assets.py +1 -3
  54. castor_extractor/visualization/looker/extract.py +4 -3
  55. castor_extractor/visualization/looker/fields.py +3 -3
  56. castor_extractor/visualization/looker/multithreading.py +3 -3
  57. castor_extractor/visualization/metabase/assets.py +1 -3
  58. castor_extractor/visualization/metabase/client/api/client.py +8 -7
  59. castor_extractor/visualization/metabase/extract.py +3 -2
  60. castor_extractor/visualization/metabase/types.py +1 -3
  61. castor_extractor/visualization/mode/client/client.py +6 -6
  62. castor_extractor/visualization/mode/extract.py +2 -2
  63. castor_extractor/visualization/powerbi/assets.py +1 -3
  64. castor_extractor/visualization/powerbi/client/client.py +12 -11
  65. castor_extractor/visualization/powerbi/client/credentials.py +3 -3
  66. castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
  67. castor_extractor/visualization/powerbi/extract.py +3 -2
  68. castor_extractor/visualization/qlik/assets.py +1 -3
  69. castor_extractor/visualization/qlik/client/constants.py +1 -3
  70. castor_extractor/visualization/qlik/client/engine/error.py +1 -3
  71. castor_extractor/visualization/qlik/client/master.py +3 -3
  72. castor_extractor/visualization/qlik/client/rest.py +12 -12
  73. castor_extractor/visualization/qlik/extract.py +4 -3
  74. castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
  75. castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
  76. castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
  77. castor_extractor/visualization/sigma/client/client.py +11 -8
  78. castor_extractor/visualization/sigma/client/credentials.py +1 -3
  79. castor_extractor/visualization/sigma/client/pagination.py +1 -1
  80. castor_extractor/visualization/sigma/extract.py +3 -2
  81. castor_extractor/visualization/tableau/assets.py +1 -2
  82. castor_extractor/visualization/tableau/client/client.py +1 -2
  83. castor_extractor/visualization/tableau/client/client_utils.py +3 -2
  84. castor_extractor/visualization/tableau/client/credentials.py +3 -3
  85. castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
  86. castor_extractor/visualization/tableau/extract.py +2 -2
  87. castor_extractor/visualization/tableau/gql_fields.py +3 -3
  88. castor_extractor/visualization/tableau/tsc_fields.py +1 -2
  89. castor_extractor/visualization/tableau/types.py +3 -3
  90. castor_extractor/visualization/tableau_revamp/client/client.py +6 -1
  91. castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +56 -9
  92. castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
  93. castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
  94. castor_extractor/visualization/tableau_revamp/client/errors.py +5 -0
  95. castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
  96. castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
  97. castor_extractor/visualization/tableau_revamp/extract.py +2 -2
  98. castor_extractor/visualization/thoughtspot/client/client.py +3 -2
  99. castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
  100. castor_extractor/visualization/thoughtspot/extract.py +3 -2
  101. castor_extractor/warehouse/abstract/asset.py +4 -5
  102. castor_extractor/warehouse/abstract/extract.py +4 -3
  103. castor_extractor/warehouse/abstract/query.py +4 -4
  104. castor_extractor/warehouse/bigquery/client.py +8 -8
  105. castor_extractor/warehouse/bigquery/extract.py +1 -1
  106. castor_extractor/warehouse/bigquery/query.py +2 -2
  107. castor_extractor/warehouse/bigquery/types.py +2 -4
  108. castor_extractor/warehouse/databricks/api_client.py +15 -14
  109. castor_extractor/warehouse/databricks/client.py +16 -16
  110. castor_extractor/warehouse/databricks/extract.py +4 -4
  111. castor_extractor/warehouse/databricks/format.py +12 -12
  112. castor_extractor/warehouse/databricks/lineage.py +11 -11
  113. castor_extractor/warehouse/databricks/pagination.py +2 -2
  114. castor_extractor/warehouse/databricks/types.py +4 -4
  115. castor_extractor/warehouse/databricks/utils.py +5 -4
  116. castor_extractor/warehouse/mysql/query.py +2 -2
  117. castor_extractor/warehouse/postgres/query.py +2 -2
  118. castor_extractor/warehouse/redshift/client.py +1 -1
  119. castor_extractor/warehouse/redshift/query.py +2 -2
  120. castor_extractor/warehouse/salesforce/client.py +8 -8
  121. castor_extractor/warehouse/salesforce/extract.py +3 -4
  122. castor_extractor/warehouse/salesforce/format.py +8 -7
  123. castor_extractor/warehouse/salesforce/format_test.py +2 -4
  124. castor_extractor/warehouse/snowflake/query.py +5 -5
  125. castor_extractor/warehouse/sqlserver/client.py +1 -1
  126. castor_extractor/warehouse/sqlserver/query.py +2 -2
  127. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/METADATA +11 -6
  128. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/RECORD +131 -131
  129. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/LICENCE +0 -0
  130. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/WHEEL +0 -0
  131. {castor_extractor-0.21.7.dist-info → castor_extractor-0.22.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,4 @@
1
1
  from enum import Enum
2
- from typing import Set
3
2
 
4
3
  from ...types import ExternalAsset, classproperty
5
4
 
@@ -24,7 +23,7 @@ class TableauAsset(ExternalAsset):
24
23
  WORKBOOK_TO_DATASOURCE = "workbooks_to_datasource"
25
24
 
26
25
  @classproperty
27
- def optional(cls) -> Set["TableauAsset"]:
26
+ def optional(cls) -> set["TableauAsset"]:
28
27
  return {
29
28
  TableauAsset.DASHBOARD,
30
29
  TableauAsset.DASHBOARD_SHEET,
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import List
3
2
 
4
3
  import tableauserverclient as TSC # type: ignore
5
4
 
@@ -43,7 +42,7 @@ class ApiClient:
43
42
  self._page_size = PAGE_SIZE
44
43
  self._server.version = TABLEAU_SERVER_VERSION
45
44
  self._safe_mode = bool(kwargs.get("safe_mode"))
46
- self.errors: List[str] = []
45
+ self.errors: list[str] = []
47
46
 
48
47
  @staticmethod
49
48
  def name() -> str:
@@ -1,4 +1,5 @@
1
- from typing import Dict, Iterator, Optional
1
+ from collections.abc import Iterator
2
+ from typing import Optional
2
3
 
3
4
  from ....utils import SerializedAsset
4
5
  from ..assets import TableauAsset
@@ -69,6 +70,6 @@ def query_scroll(
69
70
  break
70
71
 
71
72
 
72
- def extract_asset(asset: Dict, asset_type: TableauAsset) -> Dict:
73
+ def extract_asset(asset: dict, asset_type: TableauAsset) -> dict:
73
74
  """Agnostic function extracting dedicated attributes with define asset"""
74
75
  return {key: getattr(asset, key) for key in TSC_FIELDS[asset_type]}
@@ -1,5 +1,5 @@
1
1
  from enum import Enum
2
- from typing import Dict, Optional
2
+ from typing import Optional
3
3
 
4
4
  from ....utils import from_env
5
5
 
@@ -20,7 +20,7 @@ class CredentialsKey(Enum):
20
20
  TABLEAU_SERVER_URL = "server_url"
21
21
 
22
22
 
23
- CREDENTIALS_ENV: Dict[CredentialsKey, str] = {
23
+ CREDENTIALS_ENV: dict[CredentialsKey, str] = {
24
24
  CredentialsKey.TABLEAU_USER: "CASTOR_TABLEAU_USER",
25
25
  CredentialsKey.TABLEAU_PASSWORD: "CASTOR_TABLEAU_PASSWORD",
26
26
  CredentialsKey.TABLEAU_TOKEN_NAME: "CASTOR_TABLEAU_TOKEN_NAME",
@@ -89,7 +89,7 @@ class CredentialsApi:
89
89
  CredentialsKey.TABLEAU_TOKEN: token,
90
90
  }
91
91
 
92
- def to_dict(self, hide: bool = False) -> Dict[str, str]:
92
+ def to_dict(self, hide: bool = False) -> dict[str, str]:
93
93
  safe = (
94
94
  CredentialsKey.TABLEAU_USER,
95
95
  CredentialsKey.TABLEAU_SITE_ID,
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Dict, List
3
2
 
4
3
  import tableauserverclient as TSC # type: ignore
5
4
 
@@ -48,7 +47,7 @@ def safe_mode_fetch_usage(client) -> SerializedAsset:
48
47
  Returns computed usages when page number is not found
49
48
  Log errors if ServerResponseError is return
50
49
  """
51
- list_usages: List[Dict] = []
50
+ list_usages: list[dict] = []
52
51
  page_number: int = 0
53
52
 
54
53
  while True:
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, Tuple
2
+ from collections.abc import Iterable
3
3
 
4
4
  from ...utils import (
5
5
  OUTPUT_DIR,
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
19
19
 
20
20
  def iterate_all_data(
21
21
  client: Client,
22
- ) -> Iterable[Tuple[TableauAsset, list]]:
22
+ ) -> Iterable[tuple[TableauAsset, list]]:
23
23
  """Iterate over the extracted Data from Tableau"""
24
24
 
25
25
  logger.info("Extracting USER from Tableau API")
@@ -1,6 +1,6 @@
1
1
  # Fields which will be use for Tableau GraphQL API
2
2
  from enum import Enum
3
- from typing import Dict, List, Union
3
+ from typing import Union
4
4
 
5
5
  from .assets import TableauAsset, TableauGraphqlAsset
6
6
 
@@ -189,9 +189,9 @@ class GQLQueryFields(Enum):
189
189
  """
190
190
 
191
191
 
192
- QueryInfo = List[Dict[str, Union[GQLQueryFields, TableauGraphqlAsset]]]
192
+ QueryInfo = list[dict[str, Union[GQLQueryFields, TableauGraphqlAsset]]]
193
193
 
194
- QUERY_FIELDS: Dict[TableauAsset, QueryInfo] = {
194
+ QUERY_FIELDS: dict[TableauAsset, QueryInfo] = {
195
195
  TableauAsset.CUSTOM_SQL_TABLE: [
196
196
  {
197
197
  FIELDS: GQLQueryFields.CUSTOM_SQL_TABLE,
@@ -1,10 +1,9 @@
1
1
  # TSC for TableauServerClient: basic REST API to extracting core objects
2
- from typing import Dict, Set
3
2
 
4
3
  from .assets import TableauAsset
5
4
 
6
5
  # TSC fields extracted per assets
7
- TSC_FIELDS: Dict[TableauAsset, Set[str]] = {
6
+ TSC_FIELDS: dict[TableauAsset, set[str]] = {
8
7
  TableauAsset.PROJECT: {
9
8
  "id",
10
9
  "name",
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Tuple, Union
1
+ from typing import Union
2
2
 
3
3
  from tableauserverclient import ServerResponseError # type: ignore
4
4
  from typing_extensions import Literal
@@ -6,6 +6,6 @@ from typing_extensions import Literal
6
6
  from .errors import TableauErrorCode
7
7
 
8
8
  PageReturn = Union[
9
- Tuple[List[Dict], Literal[None]],
10
- Tuple[Literal[None], Union[TableauErrorCode, ServerResponseError]],
9
+ tuple[list[dict], Literal[None]],
10
+ tuple[Literal[None], Union[TableauErrorCode, ServerResponseError]],
11
11
  ]
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from typing import Optional
2
3
 
3
4
  import tableauserverclient as TSC # type: ignore
4
5
 
@@ -121,12 +122,16 @@ class TableauRevampClient:
121
122
  credentials: TableauRevampCredentials,
122
123
  timeout_sec: int = DEFAULT_TIMEOUT_SECONDS,
123
124
  with_pulse: bool = False,
125
+ override_page_size: Optional[int] = None,
124
126
  ):
125
127
  self._credentials = credentials
126
128
  self._server = _server(credentials.server_url, timeout_sec)
127
129
  self._with_pulse = with_pulse
128
130
 
129
- self._client_metadata = TableauClientMetadataApi(server=self._server)
131
+ self._client_metadata = TableauClientMetadataApi(
132
+ server=self._server,
133
+ override_page_size=override_page_size,
134
+ )
130
135
  self._client_rest = TableauClientRestApi(server=self._server)
131
136
  self._client_tsc = TableauClientTSC(server=self._server)
132
137
 
@@ -1,16 +1,17 @@
1
- from typing import Dict, Iterator, Optional
1
+ from collections.abc import Iterator
2
+ from typing import Optional
2
3
 
3
4
  import tableauserverclient as TSC # type: ignore
4
5
 
5
- from ....utils import SerializedAsset
6
+ from ....utils import SerializedAsset, retry
6
7
  from ..assets import TableauRevampAsset
7
8
  from ..constants import DEFAULT_PAGE_SIZE
8
- from .errors import TableauApiError
9
+ from .errors import TableauApiError, TableauApiTimeout
9
10
  from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
10
11
 
11
12
  # increase the value when extraction is too slow
12
13
  # decrease the value when timeouts arise
13
- _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
14
+ _CUSTOM_PAGE_SIZE: dict[TableauRevampAsset, int] = {
14
15
  # for some clients, extraction of columns tend to hit the node limit
15
16
  # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
16
17
  # the workaround is to reduce pagination
@@ -20,21 +21,58 @@ _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
20
21
  TableauRevampAsset.TABLE: 50,
21
22
  }
22
23
 
24
+ _TIMEOUT_MESSAGE = (
25
+ "Execution canceled because timeout of 30000 millis was reached"
26
+ )
27
+
28
+ _RETRY_BASE_MS = 10_000
29
+ _RETRY_COUNT = 4
30
+
31
+
32
+ def _check_errors(answer: dict) -> None:
33
+ """
34
+ handle errors in graphql response:
35
+ - return None when there's no errors in the answer
36
+ - TableauApiTimeout if any of the errors is a timeout
37
+ - TableauApiError (generic) otherwise
38
+ """
39
+ if "errors" not in answer:
40
+ return
41
+
42
+ errors = answer["errors"]
43
+
44
+ for error in errors:
45
+ if error.get("message") == _TIMEOUT_MESSAGE:
46
+ # we need specific handling for timeout issues (retry strategy)
47
+ raise TableauApiTimeout(errors)
48
+
49
+ raise TableauApiError(answer["errors"])
50
+
23
51
 
24
52
  def gql_query_scroll(
25
53
  server,
26
54
  query: str,
27
55
  resource: str,
28
56
  ) -> Iterator[SerializedAsset]:
29
- """Iterate over GQL query results, handling pagination and cursor"""
57
+ """
58
+ Iterate over GQL query results, handling pagination and cursor
30
59
 
60
+ We have a retry strategy when timeout issues arise.
61
+ It's a known issue on Tableau side, still waiting for their fix:
62
+ https://issues.salesforce.com/issue/a028c00000zKahoAAC/undefined
63
+ """
64
+
65
+ @retry(
66
+ exceptions=(TableauApiTimeout,),
67
+ max_retries=_RETRY_COUNT,
68
+ base_ms=_RETRY_BASE_MS,
69
+ )
31
70
  def _call(cursor: Optional[str]) -> dict:
32
71
  # If cursor is defined it must be quoted else use null token
33
72
  token = "null" if cursor is None else f'"{cursor}"'
34
73
  query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
35
74
  answer = server.metadata.query(query_)
36
- if "errors" in answer:
37
- raise TableauApiError(answer["errors"])
75
+ _check_errors(answer)
38
76
  return answer["data"][f"{resource}Connection"]
39
77
 
40
78
  cursor = None
@@ -58,8 +96,10 @@ class TableauClientMetadataApi:
58
96
  def __init__(
59
97
  self,
60
98
  server: TSC.Server,
99
+ override_page_size: Optional[int] = None,
61
100
  ):
62
101
  self._server = server
102
+ self._forced_page_size = override_page_size
63
103
 
64
104
  def _call(
65
105
  self,
@@ -75,9 +115,16 @@ class TableauClientMetadataApi:
75
115
  result_pages = gql_query_scroll(self._server, query, resource)
76
116
  return [asset for page in result_pages for asset in page]
77
117
 
118
+ def _page_size(self, asset: TableauRevampAsset) -> int:
119
+ return (
120
+ self._forced_page_size
121
+ or _CUSTOM_PAGE_SIZE.get(asset)
122
+ or DEFAULT_PAGE_SIZE
123
+ )
124
+
78
125
  def _fetch_fields(self) -> SerializedAsset:
79
126
  result: SerializedAsset = []
80
- page_size = _CUSTOM_PAGE_SIZE[TableauRevampAsset.FIELD]
127
+ page_size = self._page_size(TableauRevampAsset.FIELD)
81
128
  for resource, fields in FIELDS_QUERIES:
82
129
  current = self._call(resource, fields, page_size)
83
130
  result.extend(current)
@@ -90,6 +137,6 @@ class TableauClientMetadataApi:
90
137
  if asset == TableauRevampAsset.FIELD:
91
138
  return self._fetch_fields()
92
139
 
93
- page_size = _CUSTOM_PAGE_SIZE.get(asset) or DEFAULT_PAGE_SIZE
140
+ page_size = self._page_size(asset)
94
141
  resource, fields = GQL_QUERIES[asset]
95
142
  return self._call(resource, fields, page_size)
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, Optional
2
+ from typing import Optional
3
3
 
4
4
  import requests
5
5
  import tableauserverclient as TSC # type: ignore
@@ -40,7 +40,7 @@ class TableauClientRestApi:
40
40
  return self._server.http_options["timeout"]
41
41
 
42
42
  @property
43
- def headers(self) -> Dict[str, str]:
43
+ def headers(self) -> dict[str, str]:
44
44
  return {"x-tableau-auth": self._server.auth_token}
45
45
 
46
46
  def _get_site_name(self) -> str:
@@ -52,7 +52,7 @@ class TableauClientRestApi:
52
52
  self,
53
53
  url: str,
54
54
  page_token: Optional[str] = None,
55
- ) -> Dict:
55
+ ) -> dict:
56
56
  if page_token:
57
57
  url += f"?page_token={page_token}"
58
58
 
@@ -1,4 +1,5 @@
1
- from typing import Any, Dict, Iterable, Iterator
1
+ from collections.abc import Iterable, Iterator
2
+ from typing import Any
2
3
 
3
4
  import tableauserverclient as TSC # type: ignore
4
5
 
@@ -30,7 +31,7 @@ class TableauClientTSC:
30
31
  self,
31
32
  data: Iterable,
32
33
  asset: TableauRevampAsset,
33
- ) -> Iterator[Dict]:
34
+ ) -> Iterator[dict]:
34
35
  keys = REST_FIELDS[asset]
35
36
 
36
37
  for row in data:
@@ -1,3 +1,8 @@
1
1
  class TableauApiError(ValueError):
2
2
  def __init__(self, error: str):
3
3
  super().__init__(f"Tableau API returned the following error: {error}")
4
+
5
+
6
+ class TableauApiTimeout(ValueError):
7
+ def __init__(self, error: str):
8
+ super().__init__(f"Tableau API returned a timeout error: {error}")
@@ -1,5 +1,3 @@
1
- from typing import Dict, Tuple
2
-
3
1
  from ..assets import TableauRevampAsset
4
2
 
5
3
  QUERY_TEMPLATE = """
@@ -130,7 +128,7 @@ workbook { id }
130
128
  """
131
129
 
132
130
 
133
- GQL_QUERIES: Dict[TableauRevampAsset, Tuple[str, str]] = {
131
+ GQL_QUERIES: dict[TableauRevampAsset, tuple[str, str]] = {
134
132
  TableauRevampAsset.COLUMN: ("columns", _COLUMNS_QUERY),
135
133
  TableauRevampAsset.DASHBOARD: ("dashboards", _DASHBOARDS_QUERY),
136
134
  TableauRevampAsset.DATASOURCE: ("datasources", _DATASOURCES_QUERY),
@@ -1,9 +1,7 @@
1
- from typing import Dict, Set
2
-
3
1
  from ..assets import TableauRevampAsset
4
2
 
5
3
  # list of fields to pick in REST API or TSC responses
6
- REST_FIELDS: Dict[TableauRevampAsset, Set[str]] = {
4
+ REST_FIELDS: dict[TableauRevampAsset, set[str]] = {
7
5
  TableauRevampAsset.DATASOURCE: {
8
6
  "id",
9
7
  "project_id",
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterable, Tuple
2
+ from collections.abc import Iterable
3
3
 
4
4
  from ...utils import (
5
5
  OUTPUT_DIR,
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
18
18
 
19
19
  def iterate_all_data(
20
20
  client: TableauRevampClient,
21
- ) -> Iterable[Tuple[TableauRevampAsset, list]]:
21
+ ) -> Iterable[tuple[TableauRevampAsset, list]]:
22
22
  """Iterate over the extracted Data from Tableau"""
23
23
 
24
24
  for asset in TableauRevampAsset:
@@ -1,4 +1,5 @@
1
- from typing import Dict, Iterator, Optional
1
+ from collections.abc import Iterator
2
+ from typing import Optional
2
3
 
3
4
  import requests
4
5
 
@@ -35,7 +36,7 @@ THOUGHTSPOT_SAFE_MODE = RequestSafeMode()
35
36
 
36
37
 
37
38
  class ThoughtspotBearerAuth(BearerAuth):
38
- def __init__(self, host: str, token_payload: Dict[str, str]):
39
+ def __init__(self, host: str, token_payload: dict[str, str]):
39
40
  auth_endpoint = ThoughtspotEndpointFactory.authentication()
40
41
  self.authentication_url = build_url(host, auth_endpoint)
41
42
  self.token_payload = token_payload
@@ -1,6 +1,6 @@
1
1
  import csv
2
+ from collections.abc import Iterator
2
3
  from io import StringIO
3
- from typing import Iterator
4
4
 
5
5
 
6
6
  def usage_liveboard_reader(usage_liveboard_csv: str) -> Iterator[dict]:
@@ -1,5 +1,6 @@
1
1
  import logging
2
- from typing import Iterable, Iterator, Tuple, Union
2
+ from collections.abc import Iterable, Iterator
3
+ from typing import Union
3
4
 
4
5
  from ...utils import (
5
6
  OUTPUT_DIR,
@@ -21,7 +22,7 @@ logger = logging.getLogger(__name__)
21
22
 
22
23
  def iterate_all_data(
23
24
  client: ThoughtspotClient,
24
- ) -> Iterable[Tuple[ThoughtspotAsset, Union[list, Iterator, dict]]]:
25
+ ) -> Iterable[tuple[ThoughtspotAsset, Union[list, Iterator, dict]]]:
25
26
  """Iterate over the extracted data from Thoughtspot"""
26
27
 
27
28
  for asset in ThoughtspotAsset:
@@ -1,5 +1,4 @@
1
1
  from enum import Enum
2
- from typing import Dict, List, Set, Tuple
3
2
 
4
3
  from ...types import ExternalAsset, classproperty
5
4
 
@@ -26,7 +25,7 @@ class WarehouseAsset(ExternalAsset):
26
25
  VIEW_DDL = "view_ddl"
27
26
 
28
27
  @classproperty
29
- def optional(cls) -> Set["WarehouseAsset"]:
28
+ def optional(cls) -> set["WarehouseAsset"]:
30
29
  return {
31
30
  WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
32
31
  WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
@@ -50,7 +49,7 @@ class WarehouseAssetGroup(Enum):
50
49
 
51
50
 
52
51
  # tuple of supported assets for each group (depends on the technology)
53
- SupportedAssets = Dict[WarehouseAssetGroup, Tuple[WarehouseAsset, ...]]
52
+ SupportedAssets = dict[WarehouseAssetGroup, tuple[WarehouseAsset, ...]]
54
53
 
55
54
  # shared by all technologies
56
55
  CATALOG_ASSETS = (
@@ -80,13 +79,13 @@ NON_EXTRACTABLE_ASSETS = {WarehouseAssetGroup.EXTERNAL_LINEAGE}
80
79
 
81
80
  def extractable_asset_groups(
82
81
  supported_assets: SupportedAssets,
83
- ) -> List[Tuple[WarehouseAsset, ...]]:
82
+ ) -> list[tuple[WarehouseAsset, ...]]:
84
83
  """
85
84
  helper function to differentiate
86
85
  extractable assets vs supported (ingest-able) assets
87
86
  """
88
87
  groups = set(supported_assets).difference(NON_EXTRACTABLE_ASSETS)
89
- extractable: Set[Tuple[WarehouseAsset, ...]] = {
88
+ extractable: set[tuple[WarehouseAsset, ...]] = {
90
89
  supported_assets[group] for group in groups
91
90
  }
92
91
  return list(extractable)
@@ -1,6 +1,7 @@
1
1
  import logging
2
+ from collections.abc import Iterator
2
3
  from itertools import chain
3
- from typing import Callable, Iterator, List, Optional, Tuple
4
+ from typing import Callable, Optional
4
5
 
5
6
  from ...utils import (
6
7
  OUTPUT_DIR,
@@ -16,7 +17,7 @@ from .query import AbstractQueryBuilder, ExtractionQuery
16
17
  logger = logging.getLogger(__name__)
17
18
 
18
19
 
19
- def common_args(kwargs: dict) -> Tuple[str, bool]:
20
+ def common_args(kwargs: dict) -> tuple[str, bool]:
20
21
  """Args used by all technologies"""
21
22
  output_directory = kwargs.get("output_directory") or from_env(OUTPUT_DIR)
22
23
  skip_existing = kwargs.get("skip_existing") or False
@@ -39,7 +40,7 @@ class SQLExtractionProcessor:
39
40
  self._safe_mode = safe_mode
40
41
 
41
42
  @staticmethod
42
- def _unique(data: Iterator[dict]) -> List[dict]:
43
+ def _unique(data: Iterator[dict]) -> list[dict]:
43
44
  """
44
45
  Remove duplicate in the given data.
45
46
  Remark: this method implies loading all data in memory: it breaks the streaming pipeline !
@@ -1,7 +1,7 @@
1
1
  import inspect
2
2
  import os
3
3
  from abc import ABC, abstractmethod
4
- from typing import List, Optional, Tuple
4
+ from typing import Optional
5
5
 
6
6
  from .asset import WarehouseAsset
7
7
  from .time_filter import TimeFilter
@@ -37,7 +37,7 @@ class AbstractQueryBuilder(ABC):
37
37
  def __init__(
38
38
  self,
39
39
  time_filter: Optional[TimeFilter],
40
- duplicated: Optional[Tuple[WarehouseAsset, ...]] = None,
40
+ duplicated: Optional[tuple[WarehouseAsset, ...]] = None,
41
41
  ):
42
42
  self._time_filter = time_filter or TimeFilter.default()
43
43
  self._duplicated = duplicated
@@ -55,7 +55,7 @@ class AbstractQueryBuilder(ABC):
55
55
  """read from a file located in queries directory"""
56
56
  root = os.path.dirname(inspect.getfile(self.__class__))
57
57
  path = os.path.join(root, QUERIES_DIR, filename)
58
- with open(path, "r") as f:
58
+ with open(path) as f:
59
59
  return f.read()
60
60
 
61
61
  def load_statement(self, asset: WarehouseAsset) -> str:
@@ -75,7 +75,7 @@ class AbstractQueryBuilder(ABC):
75
75
  return ExtractionQuery(statement, params)
76
76
 
77
77
  @abstractmethod
78
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
78
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
79
79
  """
80
80
  Build the Query allowing extraction of the given asset
81
81
  - Most of the time, returns a single query
@@ -1,6 +1,6 @@
1
1
  import itertools
2
2
  import logging
3
- from typing import List, Optional, Set
3
+ from typing import Optional
4
4
 
5
5
  from google.api_core.exceptions import Forbidden # type: ignore
6
6
  from google.cloud.bigquery import Client as GoogleCloudClient # type: ignore
@@ -27,9 +27,9 @@ class BigQueryClient(SqlalchemyClient):
27
27
  def __init__(
28
28
  self,
29
29
  credentials: dict,
30
- db_allowed: Optional[Set[str]] = None,
31
- db_blocked: Optional[Set[str]] = None,
32
- dataset_blocked: Optional[Set[str]] = None,
30
+ db_allowed: Optional[set[str]] = None,
31
+ db_blocked: Optional[set[str]] = None,
32
+ dataset_blocked: Optional[set[str]] = None,
33
33
  ):
34
34
  super().__init__(credentials)
35
35
  self._db_allowed = db_allowed
@@ -37,8 +37,8 @@ class BigQueryClient(SqlalchemyClient):
37
37
  self._dataset_blocked = dataset_blocked
38
38
  self.credentials = self._credentials()
39
39
  self.client = self._client()
40
- self._projects: List[str] | None = None
41
- self._datasets: List[Dataset] | None = None
40
+ self._projects: list[str] | None = None
41
+ self._datasets: list[Dataset] | None = None
42
42
 
43
43
  @staticmethod
44
44
  def name() -> str:
@@ -78,7 +78,7 @@ class BigQueryClient(SqlalchemyClient):
78
78
  credentials=self.credentials,
79
79
  )
80
80
 
81
- def _list_datasets(self) -> List[Dataset]:
81
+ def _list_datasets(self) -> list[Dataset]:
82
82
  """
83
83
  Returns datasets available for the given GCP client
84
84
  Cache the result in self._datasets to reduce number of API calls
@@ -98,7 +98,7 @@ class BigQueryClient(SqlalchemyClient):
98
98
  base_ms=_RETRY_BASE_MS,
99
99
  log_exc_info=True,
100
100
  )
101
- def get_projects(self) -> List[str]:
101
+ def get_projects(self) -> list[str]:
102
102
  """
103
103
  Returns distinct project_id available for the given GCP client
104
104
  Cache the result in self._projects to reduce number of API calls.
@@ -38,7 +38,7 @@ def _credentials(params: dict) -> dict:
38
38
  """extract GCP credentials"""
39
39
  path = params.get("credentials") or from_env(BIGQUERY_CREDENTIALS)
40
40
  logger.info(f"Credentials fetched from {path}")
41
- with open(path, "r") as file:
41
+ with open(path) as file:
42
42
  return cast(dict, json.load(file))
43
43
 
44
44
 
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import List, Optional
2
+ from typing import Optional
3
3
 
4
4
  from ..abstract import (
5
5
  AbstractQueryBuilder,
@@ -109,7 +109,7 @@ class BigQueryQueryBuilder(AbstractQueryBuilder):
109
109
  else self._regions
110
110
  )
111
111
 
112
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
112
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
113
113
  """
114
114
  It would be easier to stitch data directly in the query statement (UNION ALL).
115
115
  Unfortunately, querying INFORMATION_SCHEMA on multiple regions
@@ -1,4 +1,2 @@
1
- from typing import Set, Tuple
2
-
3
- SetString = Set[str]
4
- SetTwoString = Set[Tuple[str, str]]
1
+ SetString = set[str]
2
+ SetTwoString = set[tuple[str, str]]