castor-extractor 0.21.9__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (128) hide show
  1. CHANGELOG.md +4 -0
  2. castor_extractor/commands/__init__.py +0 -3
  3. castor_extractor/commands/file_check.py +1 -2
  4. castor_extractor/file_checker/column.py +5 -5
  5. castor_extractor/file_checker/file.py +7 -7
  6. castor_extractor/file_checker/file_test.py +2 -2
  7. castor_extractor/file_checker/templates/generic_warehouse.py +4 -6
  8. castor_extractor/knowledge/confluence/client/client.py +2 -1
  9. castor_extractor/knowledge/confluence/extract.py +3 -2
  10. castor_extractor/knowledge/notion/client/client.py +3 -2
  11. castor_extractor/knowledge/notion/extract.py +3 -2
  12. castor_extractor/quality/soda/client/client.py +2 -1
  13. castor_extractor/quality/soda/client/pagination.py +1 -3
  14. castor_extractor/types.py +3 -3
  15. castor_extractor/uploader/env.py +2 -2
  16. castor_extractor/uploader/upload.py +4 -3
  17. castor_extractor/uploader/utils.py +1 -1
  18. castor_extractor/utils/client/abstract.py +2 -1
  19. castor_extractor/utils/client/api/auth.py +2 -2
  20. castor_extractor/utils/client/api/auth_test.py +2 -2
  21. castor_extractor/utils/client/api/client.py +3 -3
  22. castor_extractor/utils/client/api/pagination.py +3 -2
  23. castor_extractor/utils/client/api/safe_request.py +5 -5
  24. castor_extractor/utils/collection.py +7 -11
  25. castor_extractor/utils/dbt/client.py +3 -3
  26. castor_extractor/utils/dbt/client_test.py +2 -2
  27. castor_extractor/utils/deprecate.py +1 -2
  28. castor_extractor/utils/files.py +5 -5
  29. castor_extractor/utils/formatter.py +5 -4
  30. castor_extractor/utils/json_stream_write.py +2 -1
  31. castor_extractor/utils/object.py +2 -1
  32. castor_extractor/utils/pager/pager.py +2 -4
  33. castor_extractor/utils/pager/pager_on_id.py +2 -1
  34. castor_extractor/utils/pager/pager_on_id_test.py +5 -5
  35. castor_extractor/utils/pager/pager_test.py +3 -3
  36. castor_extractor/utils/retry.py +4 -3
  37. castor_extractor/utils/retry_test.py +2 -3
  38. castor_extractor/utils/safe.py +3 -3
  39. castor_extractor/utils/salesforce/client.py +2 -1
  40. castor_extractor/utils/salesforce/credentials.py +1 -3
  41. castor_extractor/utils/store.py +2 -1
  42. castor_extractor/utils/string.py +2 -2
  43. castor_extractor/utils/string_test.py +1 -3
  44. castor_extractor/utils/type.py +3 -2
  45. castor_extractor/utils/validation.py +4 -4
  46. castor_extractor/utils/write.py +2 -2
  47. castor_extractor/visualization/domo/client/client.py +8 -7
  48. castor_extractor/visualization/domo/client/credentials.py +2 -2
  49. castor_extractor/visualization/domo/client/endpoints.py +2 -2
  50. castor_extractor/visualization/domo/extract.py +3 -2
  51. castor_extractor/visualization/looker/api/client.py +17 -16
  52. castor_extractor/visualization/looker/api/utils.py +2 -2
  53. castor_extractor/visualization/looker/assets.py +1 -3
  54. castor_extractor/visualization/looker/extract.py +4 -3
  55. castor_extractor/visualization/looker/fields.py +3 -3
  56. castor_extractor/visualization/looker/multithreading.py +3 -3
  57. castor_extractor/visualization/metabase/assets.py +1 -3
  58. castor_extractor/visualization/metabase/client/api/client.py +8 -7
  59. castor_extractor/visualization/metabase/extract.py +3 -2
  60. castor_extractor/visualization/metabase/types.py +1 -3
  61. castor_extractor/visualization/mode/client/client.py +6 -6
  62. castor_extractor/visualization/mode/extract.py +2 -2
  63. castor_extractor/visualization/powerbi/assets.py +1 -3
  64. castor_extractor/visualization/powerbi/client/client.py +12 -11
  65. castor_extractor/visualization/powerbi/client/credentials.py +3 -3
  66. castor_extractor/visualization/powerbi/client/endpoints.py +2 -2
  67. castor_extractor/visualization/powerbi/extract.py +3 -2
  68. castor_extractor/visualization/qlik/assets.py +1 -3
  69. castor_extractor/visualization/qlik/client/constants.py +1 -3
  70. castor_extractor/visualization/qlik/client/engine/error.py +1 -3
  71. castor_extractor/visualization/qlik/client/master.py +3 -3
  72. castor_extractor/visualization/qlik/client/rest.py +12 -12
  73. castor_extractor/visualization/qlik/extract.py +4 -3
  74. castor_extractor/visualization/salesforce_reporting/client/rest.py +3 -2
  75. castor_extractor/visualization/salesforce_reporting/client/soql.py +1 -3
  76. castor_extractor/visualization/salesforce_reporting/extract.py +3 -2
  77. castor_extractor/visualization/sigma/client/client.py +9 -8
  78. castor_extractor/visualization/sigma/client/credentials.py +1 -3
  79. castor_extractor/visualization/sigma/extract.py +3 -2
  80. castor_extractor/visualization/tableau/assets.py +1 -2
  81. castor_extractor/visualization/tableau/client/client.py +1 -2
  82. castor_extractor/visualization/tableau/client/client_utils.py +3 -2
  83. castor_extractor/visualization/tableau/client/credentials.py +3 -3
  84. castor_extractor/visualization/tableau/client/safe_mode.py +1 -2
  85. castor_extractor/visualization/tableau/extract.py +2 -2
  86. castor_extractor/visualization/tableau/gql_fields.py +3 -3
  87. castor_extractor/visualization/tableau/tsc_fields.py +1 -2
  88. castor_extractor/visualization/tableau/types.py +3 -3
  89. castor_extractor/visualization/tableau_revamp/client/client_metadata_api.py +3 -2
  90. castor_extractor/visualization/tableau_revamp/client/client_rest_api.py +3 -3
  91. castor_extractor/visualization/tableau_revamp/client/client_tsc.py +3 -2
  92. castor_extractor/visualization/tableau_revamp/client/gql_queries.py +1 -3
  93. castor_extractor/visualization/tableau_revamp/client/rest_fields.py +1 -3
  94. castor_extractor/visualization/tableau_revamp/extract.py +2 -2
  95. castor_extractor/visualization/thoughtspot/client/client.py +3 -2
  96. castor_extractor/visualization/thoughtspot/client/utils.py +1 -1
  97. castor_extractor/visualization/thoughtspot/extract.py +3 -2
  98. castor_extractor/warehouse/abstract/asset.py +4 -5
  99. castor_extractor/warehouse/abstract/extract.py +4 -3
  100. castor_extractor/warehouse/abstract/query.py +4 -4
  101. castor_extractor/warehouse/bigquery/client.py +8 -8
  102. castor_extractor/warehouse/bigquery/extract.py +1 -1
  103. castor_extractor/warehouse/bigquery/query.py +2 -2
  104. castor_extractor/warehouse/bigquery/types.py +2 -4
  105. castor_extractor/warehouse/databricks/api_client.py +15 -14
  106. castor_extractor/warehouse/databricks/client.py +16 -16
  107. castor_extractor/warehouse/databricks/extract.py +4 -4
  108. castor_extractor/warehouse/databricks/format.py +12 -12
  109. castor_extractor/warehouse/databricks/lineage.py +11 -11
  110. castor_extractor/warehouse/databricks/pagination.py +2 -2
  111. castor_extractor/warehouse/databricks/types.py +4 -4
  112. castor_extractor/warehouse/databricks/utils.py +5 -4
  113. castor_extractor/warehouse/mysql/query.py +2 -2
  114. castor_extractor/warehouse/postgres/query.py +2 -2
  115. castor_extractor/warehouse/redshift/client.py +1 -1
  116. castor_extractor/warehouse/redshift/query.py +2 -2
  117. castor_extractor/warehouse/salesforce/client.py +8 -8
  118. castor_extractor/warehouse/salesforce/extract.py +3 -4
  119. castor_extractor/warehouse/salesforce/format.py +8 -7
  120. castor_extractor/warehouse/salesforce/format_test.py +2 -4
  121. castor_extractor/warehouse/snowflake/query.py +5 -5
  122. castor_extractor/warehouse/sqlserver/client.py +1 -1
  123. castor_extractor/warehouse/sqlserver/query.py +2 -2
  124. {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/METADATA +7 -6
  125. {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/RECORD +128 -128
  126. {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/LICENCE +0 -0
  127. {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/WHEEL +0 -0
  128. {castor_extractor-0.21.9.dist-info → castor_extractor-0.22.0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Dict, Optional
2
+ from typing import Optional
3
3
 
4
4
  from ...utils import AbstractStorage, LocalStorage, write_summary
5
5
  from ..abstract import (
@@ -29,7 +29,7 @@ DATABRICKS_ASSETS: SupportedAssets = {
29
29
  logger = logging.getLogger(__name__)
30
30
 
31
31
  OTimeFilter = Optional[TimeFilter]
32
- Paths = Dict[str, str]
32
+ Paths = dict[str, str]
33
33
 
34
34
 
35
35
  class DatabricksExtractionProcessor:
@@ -71,7 +71,7 @@ class DatabricksExtractionProcessor:
71
71
  if self._should_not_reextract(WarehouseAssetGroup.CATALOG):
72
72
  return self._existing_group_paths(WarehouseAssetGroup.CATALOG)
73
73
 
74
- catalog_locations: Dict[str, str] = dict()
74
+ catalog_locations: dict[str, str] = dict()
75
75
  databases = self._client.databases()
76
76
  location = self._storage.put(WarehouseAsset.DATABASE.value, databases)
77
77
  catalog_locations[WarehouseAsset.DATABASE.value] = location
@@ -101,7 +101,7 @@ class DatabricksExtractionProcessor:
101
101
  return self._existing_group_paths(
102
102
  WarehouseAssetGroup.ADDITIONAL_LINEAGE
103
103
  )
104
- lineage_locations: Dict[str, str] = dict()
104
+ lineage_locations: dict[str, str] = dict()
105
105
 
106
106
  # extract catalog
107
107
  databases = self._client.databases()
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from datetime import datetime
3
- from typing import Dict, List, Optional
3
+ from typing import Optional
4
4
 
5
5
  from .types import TablesColumns
6
6
  from .utils import build_path
@@ -12,7 +12,7 @@ EXCLUDED_SCHEMAS = {"information_schema", "default"}
12
12
 
13
13
  TABLE_URL_TPL = "{host}explore/data/{catalog_name}/{schema_name}/{table_name}?o={workspace_id}"
14
14
 
15
- TagMapping = Dict[str, List[str]]
15
+ TagMapping = dict[str, list[str]]
16
16
 
17
17
 
18
18
  def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
@@ -87,7 +87,7 @@ class DatabricksFormatter:
87
87
  """
88
88
 
89
89
  @staticmethod
90
- def format_database(raw_databases: List[dict]) -> List[dict]:
90
+ def format_database(raw_databases: list[dict]) -> list[dict]:
91
91
  databases = []
92
92
  for catalog in raw_databases:
93
93
  name = catalog["name"]
@@ -101,7 +101,7 @@ class DatabricksFormatter:
101
101
  return databases
102
102
 
103
103
  @staticmethod
104
- def format_schema(raw_schemas: List[dict], database: dict) -> List[dict]:
104
+ def format_schema(raw_schemas: list[dict], database: dict) -> list[dict]:
105
105
  schemas = []
106
106
  for schema in raw_schemas:
107
107
  if schema["name"] in EXCLUDED_SCHEMAS:
@@ -118,7 +118,7 @@ class DatabricksFormatter:
118
118
 
119
119
  @staticmethod
120
120
  def format_table_column(
121
- raw_tables: List[dict],
121
+ raw_tables: list[dict],
122
122
  schema: dict,
123
123
  host: str,
124
124
  workspace_id: str,
@@ -141,8 +141,8 @@ class DatabricksFormatter:
141
141
  return tables, columns
142
142
 
143
143
  @staticmethod
144
- def format_lineage(timestamps: dict) -> List[dict]:
145
- lineage: List[dict] = []
144
+ def format_lineage(timestamps: dict) -> list[dict]:
145
+ lineage: list[dict] = []
146
146
  for link, timestamp in timestamps.items():
147
147
  parent_path, child_path = link
148
148
  link_ = {
@@ -154,7 +154,7 @@ class DatabricksFormatter:
154
154
  return lineage
155
155
 
156
156
  @staticmethod
157
- def format_query(raw_queries: List[dict]) -> List[dict]:
157
+ def format_query(raw_queries: list[dict]) -> list[dict]:
158
158
  queries = []
159
159
  for q in raw_queries:
160
160
  if not q["query_text"]:
@@ -176,7 +176,7 @@ class DatabricksFormatter:
176
176
  return queries
177
177
 
178
178
  @staticmethod
179
- def _primary(emails: List[dict]) -> Optional[str]:
179
+ def _primary(emails: list[dict]) -> Optional[str]:
180
180
  """helper function to select a unique email"""
181
181
  if not emails:
182
182
  return None
@@ -189,7 +189,7 @@ class DatabricksFormatter:
189
189
  emails = user.get("emails")
190
190
  return self._primary(emails) if emails else None
191
191
 
192
- def format_user(self, raw_users: List[dict]) -> List[dict]:
192
+ def format_user(self, raw_users: list[dict]) -> list[dict]:
193
193
  users = []
194
194
  for user in raw_users:
195
195
  users.append(
@@ -204,8 +204,8 @@ class DatabricksFormatter:
204
204
  return users
205
205
 
206
206
  @staticmethod
207
- def format_view_ddl(tables: List[dict], schema: dict) -> List[dict]:
208
- view_ddl: List[dict] = []
207
+ def format_view_ddl(tables: list[dict], schema: dict) -> list[dict]:
208
+ view_ddl: list[dict] = []
209
209
  if not tables:
210
210
  return view_ddl
211
211
  for table in tables:
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Set, Tuple, cast
1
+ from typing import cast
2
2
 
3
3
  from .types import Link, Ostr, OTimestampedLink, TimestampedLink
4
4
 
@@ -9,7 +9,7 @@ class LineageLinks:
9
9
  """
10
10
 
11
11
  def __init__(self):
12
- self.lineage: Dict[Link, Ostr] = dict()
12
+ self.lineage: dict[Link, Ostr] = dict()
13
13
 
14
14
  def add(self, timestamped_link: TimestampedLink) -> None:
15
15
  """
@@ -52,7 +52,7 @@ def _link(path_from: Ostr, path_to: Ostr, timestamp: Ostr) -> OTimestampedLink:
52
52
 
53
53
  def single_table_lineage_links(
54
54
  table_path: str, single_table_lineage: dict
55
- ) -> List[TimestampedLink]:
55
+ ) -> list[TimestampedLink]:
56
56
  """
57
57
  process databricks lineage API response for a given table
58
58
  returns a list of (parent, child, timestamp)
@@ -60,7 +60,7 @@ def single_table_lineage_links(
60
60
  Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
61
61
  we could also have `notebookInfos` or `fileInfo`
62
62
  """
63
- links: List[OTimestampedLink] = []
63
+ links: list[OTimestampedLink] = []
64
64
  # add parent:
65
65
  for link in single_table_lineage.get("upstreams", []):
66
66
  parent = link.get("tableInfo", {})
@@ -80,7 +80,7 @@ def single_table_lineage_links(
80
80
 
81
81
  def single_column_lineage_links(
82
82
  column_path: str, single_column_lineage: dict
83
- ) -> List[TimestampedLink]:
83
+ ) -> list[TimestampedLink]:
84
84
  """
85
85
  process databricks lineage API response for a given table
86
86
  returns a list of (parent, child, timestamp)
@@ -88,7 +88,7 @@ def single_column_lineage_links(
88
88
  Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
89
89
  we could also have `notebookInfos` or `fileInfo`
90
90
  """
91
- links: List[OTimestampedLink] = []
91
+ links: list[OTimestampedLink] = []
92
92
  # add parent:
93
93
  for link in single_column_lineage.get("upstream_cols", []):
94
94
  parent_path = _to_column_path(link)
@@ -105,8 +105,8 @@ def single_column_lineage_links(
105
105
 
106
106
 
107
107
  def paths_for_column_lineage(
108
- tables: List[dict], columns: List[dict], table_lineage: List[dict]
109
- ) -> List[Tuple[str, str]]:
108
+ tables: list[dict], columns: list[dict], table_lineage: list[dict]
109
+ ) -> list[tuple[str, str]]:
110
110
  """
111
111
  helper providing a list of candidate columns to look lineage for:
112
112
  we only look for column lineage where there is table lineage
@@ -118,12 +118,12 @@ def paths_for_column_lineage(
118
118
  for table in tables
119
119
  }
120
120
 
121
- tables_with_lineage: Set[str] = set()
121
+ tables_with_lineage: set[str] = set()
122
122
  for t in table_lineage:
123
123
  tables_with_lineage.add(t["parent_path"])
124
124
  tables_with_lineage.add(t["child_path"])
125
125
 
126
- paths_to_return: List[Tuple[str, str]] = []
126
+ paths_to_return: list[tuple[str, str]] = []
127
127
  for column in columns:
128
128
  table_path = mapping[column["table_id"]]
129
129
  if table_path not in tables_with_lineage:
@@ -134,7 +134,7 @@ def paths_for_column_lineage(
134
134
  return paths_to_return
135
135
 
136
136
 
137
- def deduplicate_lineage(lineages: List[TimestampedLink]) -> dict:
137
+ def deduplicate_lineage(lineages: list[TimestampedLink]) -> dict:
138
138
  deduplicated_lineage = LineageLinks()
139
139
  for timestamped_link in lineages:
140
140
  deduplicated_lineage.add(timestamped_link)
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from pydantic import Field
4
4
 
@@ -10,7 +10,7 @@ DATABRICKS_PAGE_SIZE = 100
10
10
  class DatabricksPagination(PaginationModel):
11
11
  next_page_token: Optional[str] = None
12
12
  has_next_page: bool = False
13
- res: List[dict] = Field(default_factory=list)
13
+ res: list[dict] = Field(default_factory=list)
14
14
 
15
15
  def is_last(self) -> bool:
16
16
  return not (self.has_next_page and self.next_page_token)
@@ -1,8 +1,8 @@
1
- from typing import List, Optional, Tuple
1
+ from typing import Optional
2
2
 
3
- Link = Tuple[str, str]
4
- TablesColumns = Tuple[List[dict], List[dict]]
3
+ Link = tuple[str, str]
4
+ TablesColumns = tuple[list[dict], list[dict]]
5
5
  Ostr = Optional[str]
6
- TimestampedLink = Tuple[str, str, Ostr]
6
+ TimestampedLink = tuple[str, str, Ostr]
7
7
 
8
8
  OTimestampedLink = Optional[TimestampedLink]
@@ -1,5 +1,6 @@
1
+ from collections.abc import Iterable
1
2
  from datetime import date
2
- from typing import Dict, Iterable, List, Optional
3
+ from typing import Optional
3
4
 
4
5
  from ...utils import at_midnight
5
6
  from ..abstract import TimeFilter
@@ -14,8 +15,8 @@ def _day_hour_to_epoch_ms(day: date, hour: int) -> int:
14
15
 
15
16
 
16
17
  def build_path(
17
- row: Dict,
18
- keys: List[str],
18
+ row: dict,
19
+ keys: list[str],
19
20
  ) -> str:
20
21
  """
21
22
  format an asset's path:
@@ -26,7 +27,7 @@ def build_path(
26
27
  return ".".join(key_values)
27
28
 
28
29
 
29
- def tag_label(row: Dict) -> str:
30
+ def tag_label(row: dict) -> str:
30
31
  """
31
32
  format the tag's label:
32
33
  - {key:value} when the value is not empty
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from ..abstract import (
4
4
  AbstractQueryBuilder,
@@ -19,6 +19,6 @@ class MySQLQueryBuilder(AbstractQueryBuilder):
19
19
  ):
20
20
  super().__init__(time_filter=time_filter)
21
21
 
22
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
22
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
23
23
  query = self.build_default(asset)
24
24
  return [query]
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from ..abstract import (
4
4
  AbstractQueryBuilder,
@@ -19,6 +19,6 @@ class PostgresQueryBuilder(AbstractQueryBuilder):
19
19
  ):
20
20
  super().__init__(time_filter=time_filter)
21
21
 
22
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
22
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
23
23
  query = self.build_default(asset)
24
24
  return [query]
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Iterator
2
+ from collections.abc import Iterator
3
3
 
4
4
  from psycopg2 import extensions # type: ignore
5
5
  from sqlalchemy.engine import Connection, ResultProxy
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from ..abstract import (
4
4
  AbstractQueryBuilder,
@@ -27,7 +27,7 @@ class RedshiftQueryBuilder(AbstractQueryBuilder):
27
27
  params = self._time_filter.to_dict()
28
28
  return ExtractionQuery(statement, params)
29
29
 
30
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
30
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
31
31
  if asset == WarehouseAsset.QUERY and self.is_serverless:
32
32
  query = self.build_query_serverless()
33
33
  else:
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from functools import partial
3
- from typing import Dict, List, Optional, Tuple
3
+ from typing import Optional
4
4
 
5
5
  from tqdm import tqdm # type: ignore
6
6
 
@@ -29,7 +29,7 @@ class SalesforceClient(SalesforceBaseClient):
29
29
  def name() -> str:
30
30
  return "Salesforce"
31
31
 
32
- def fetch_sobjects(self) -> List[dict]:
32
+ def fetch_sobjects(self) -> list[dict]:
33
33
  """Fetch all sobjects"""
34
34
  logger.info("Extracting sobjects")
35
35
  query = format_sobject_query()
@@ -39,7 +39,7 @@ class SalesforceClient(SalesforceBaseClient):
39
39
  results = fetch_all_pages(request_, SalesforceSQLPagination)
40
40
  return list(results)
41
41
 
42
- def fetch_fields(self, sobject_name: str) -> List[dict]:
42
+ def fetch_fields(self, sobject_name: str) -> list[dict]:
43
43
  """Fetches fields of a given sobject"""
44
44
  query = SOBJECT_FIELDS_QUERY_TPL.format(
45
45
  entity_definition_id=sobject_name
@@ -55,7 +55,7 @@ class SalesforceClient(SalesforceBaseClient):
55
55
  return None
56
56
  return response["records"][0]["Description"]
57
57
 
58
- def add_table_descriptions(self, sobjects: List[dict]) -> List[dict]:
58
+ def add_table_descriptions(self, sobjects: list[dict]) -> list[dict]:
59
59
  """
60
60
  Add table descriptions.
61
61
  We use the tooling API which does not handle well the LIMIT in SOQL
@@ -67,7 +67,7 @@ class SalesforceClient(SalesforceBaseClient):
67
67
  described_sobjects.append({**sobject, "Description": description})
68
68
  return described_sobjects
69
69
 
70
- def tables(self) -> List[dict]:
70
+ def tables(self) -> list[dict]:
71
71
  """
72
72
  Get Salesforce sobjects as tables
73
73
  """
@@ -77,13 +77,13 @@ class SalesforceClient(SalesforceBaseClient):
77
77
  return list(self.formatter.tables(described_sobjects))
78
78
 
79
79
  def columns(
80
- self, sobject_names: List[Tuple[str, str]], show_progress: bool = True
81
- ) -> List[dict]:
80
+ self, sobject_names: list[tuple[str, str]], show_progress: bool = True
81
+ ) -> list[dict]:
82
82
  """
83
83
  Get salesforce sobject fields as columns
84
84
  show_progress: optionally deactivate the tqdm progress bar
85
85
  """
86
- sobject_fields: Dict[str, List[dict]] = dict()
86
+ sobject_fields: dict[str, list[dict]] = dict()
87
87
  for api_name, table_name in tqdm(
88
88
  sobject_names, disable=not show_progress
89
89
  ):
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Dict, List, Tuple
3
2
 
4
3
  from ...utils import AbstractStorage, LocalStorage, write_summary
5
4
  from ...utils.salesforce import SalesforceCredentials
@@ -14,9 +13,9 @@ from .client import SalesforceClient
14
13
  logger = logging.getLogger(__name__)
15
14
 
16
15
 
17
- Paths = Dict[str, str]
16
+ Paths = dict[str, str]
18
17
 
19
- SALESFORCE_CATALOG_ASSETS: Tuple[WarehouseAsset, ...] = (
18
+ SALESFORCE_CATALOG_ASSETS: tuple[WarehouseAsset, ...] = (
20
19
  WarehouseAsset.TABLE,
21
20
  WarehouseAsset.COLUMN,
22
21
  )
@@ -81,7 +80,7 @@ class SalesforceExtractionProcessor:
81
80
 
82
81
  def extract_role(self) -> Paths:
83
82
  """extract no users and return the empty file location"""
84
- users: List[dict] = []
83
+ users: list[dict] = []
85
84
  location = self._storage.put(WarehouseAsset.USER.value, users)
86
85
  logger.info(f"Extracted {len(users)} users to {location}")
87
86
  return {WarehouseAsset.USER.value: location}
@@ -1,4 +1,5 @@
1
- from typing import Any, Dict, Iterator, List
1
+ from collections.abc import Iterator
2
+ from typing import Any
2
3
 
3
4
  from ...utils import group_by
4
5
  from .constants import SCHEMA_NAME
@@ -25,10 +26,10 @@ def _name(sobject: dict) -> str:
25
26
  return f"{label} ({api_name})"
26
27
 
27
28
 
28
- def _field_description(field: Dict[str, Any]) -> str:
29
- context: Dict[str, str] = {}
29
+ def _field_description(field: dict[str, Any]) -> str:
30
+ context: dict[str, str] = {}
30
31
 
31
- field_definition: Dict[str, str] = field.get("FieldDefinition") or {}
32
+ field_definition: dict[str, str] = field.get("FieldDefinition") or {}
32
33
  if description := field_definition.get("Description"):
33
34
  context["Description"] = _clean(description)
34
35
  if help_text := field.get("InlineHelpText"):
@@ -69,7 +70,7 @@ def _to_table_payload(sobject: dict) -> dict:
69
70
  }
70
71
 
71
72
 
72
- def _detect_duplicates(sobjects: List[dict]) -> List[dict]:
73
+ def _detect_duplicates(sobjects: list[dict]) -> list[dict]:
73
74
  """
74
75
  enrich the given data with "has_duplicate" flag:
75
76
  - True when another asset has the same Label in the list
@@ -89,7 +90,7 @@ class SalesforceFormatter:
89
90
  """
90
91
 
91
92
  @staticmethod
92
- def tables(sobjects: List[dict]) -> Iterator[dict]:
93
+ def tables(sobjects: list[dict]) -> Iterator[dict]:
93
94
  """
94
95
  formats the raw list of sobjects to tables
95
96
  """
@@ -98,7 +99,7 @@ class SalesforceFormatter:
98
99
  yield _to_table_payload(sobject)
99
100
 
100
101
  @staticmethod
101
- def columns(sobject_fields: Dict[str, List[dict]]) -> Iterator[dict]:
102
+ def columns(sobject_fields: dict[str, list[dict]]) -> Iterator[dict]:
102
103
  """formats the raw list of sobject fields to columns"""
103
104
  for table_name, fields in sobject_fields.items():
104
105
  fields = _detect_duplicates(fields)
@@ -1,5 +1,3 @@
1
- from typing import Dict, List, Tuple
2
-
3
1
  from .format import (
4
2
  _HAS_DUPLICATE_KEY,
5
3
  SalesforceFormatter,
@@ -9,7 +7,7 @@ from .format import (
9
7
  )
10
8
 
11
9
 
12
- def _tables_sobjects() -> Tuple[Dict[str, str], ...]:
10
+ def _tables_sobjects() -> tuple[dict[str, str], ...]:
13
11
  """Returns 4 sobjects with 2 sharing the same label"""
14
12
  a = {"Label": "a", "QualifiedApiName": "a_one"}
15
13
  b = {"Label": "b", "QualifiedApiName": "b"}
@@ -18,7 +16,7 @@ def _tables_sobjects() -> Tuple[Dict[str, str], ...]:
18
16
  return a, b, c, a_prime
19
17
 
20
18
 
21
- def _columns_sobjects() -> Dict[str, List[dict]]:
19
+ def _columns_sobjects() -> dict[str, list[dict]]:
22
20
  a = {"Label": "First Name", "QualifiedApiName": "owner_name"}
23
21
  b = {"Label": "First Name", "QualifiedApiName": "editor_name"}
24
22
  c = {"Label": "Foo Bar", "QualifiedApiName": "foo_bar"}
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from ..abstract import (
4
4
  CATALOG_ASSETS,
@@ -14,7 +14,7 @@ DB_FILTERED_ASSETS = (
14
14
  )
15
15
 
16
16
 
17
- def _database_filter(db_list: Optional[List[str]], allow: bool) -> str:
17
+ def _database_filter(db_list: Optional[list[str]], allow: bool) -> str:
18
18
  if not db_list:
19
19
  return ""
20
20
  keyword = "IN" if allow else "NOT IN"
@@ -34,8 +34,8 @@ class SnowflakeQueryBuilder(AbstractQueryBuilder):
34
34
  def __init__(
35
35
  self,
36
36
  time_filter: Optional[TimeFilter] = None,
37
- db_allowed: Optional[List[str]] = None,
38
- db_blocked: Optional[List[str]] = None,
37
+ db_allowed: Optional[list[str]] = None,
38
+ db_blocked: Optional[list[str]] = None,
39
39
  fetch_transient: Optional[bool] = False,
40
40
  ):
41
41
  super().__init__(time_filter=time_filter)
@@ -52,7 +52,7 @@ class SnowflakeQueryBuilder(AbstractQueryBuilder):
52
52
 
53
53
  return statement
54
54
 
55
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
55
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
56
56
  query = self.build_default(asset)
57
57
 
58
58
  if asset in DB_FILTERED_ASSETS:
@@ -1,4 +1,4 @@
1
- from typing import Iterator
1
+ from collections.abc import Iterator
2
2
 
3
3
  from sqlalchemy import text
4
4
 
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Optional
2
2
 
3
3
  from ..abstract import (
4
4
  AbstractQueryBuilder,
@@ -19,6 +19,6 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
19
19
  ):
20
20
  super().__init__(time_filter=time_filter)
21
21
 
22
- def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
22
+ def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
23
23
  query = self.build_default(asset)
24
24
  return [query]
@@ -1,16 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.21.9
3
+ Version: 0.22.0
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
7
7
  Author: Castor
8
8
  Author-email: support@castordoc.com
9
- Requires-Python: >=3.8,<3.13
9
+ Requires-Python: >=3.9,<3.13
10
10
  Classifier: License :: Other/Proprietary License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.8
14
13
  Classifier: Programming Language :: Python :: 3.9
15
14
  Classifier: Programming Language :: Python :: 3.10
16
15
  Classifier: Programming Language :: Python :: 3.11
@@ -39,10 +38,8 @@ Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
39
38
  Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
40
39
  Requires-Dist: looker-sdk (>=24.16.0,<24.17.0) ; extra == "looker" or extra == "all"
41
40
  Requires-Dist: msal (>=1.20.0,<2.0.0) ; extra == "powerbi" or extra == "all"
42
- Requires-Dist: numpy (<1.25) ; (python_version >= "3.8" and python_version < "3.9") and (extra == "bigquery" or extra == "databricks" or extra == "all")
43
41
  Requires-Dist: numpy (<2) ; extra == "bigquery" or extra == "databricks" or extra == "all"
44
42
  Requires-Dist: numpy (>=1.26) ; (python_version >= "3.12" and python_version < "3.13") and (extra == "bigquery" or extra == "databricks" or extra == "all")
45
- Requires-Dist: pandas (<2.1) ; (python_version >= "3.8" and python_version < "3.9") and (extra == "databricks" or extra == "all")
46
43
  Requires-Dist: pandas (>=2.1) ; (python_version >= "3.12" and python_version < "3.13") and (extra == "databricks" or extra == "all")
47
44
  Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0) ; extra == "metabase" or extra == "postgres" or extra == "redshift" or extra == "all"
48
45
  Requires-Dist: pycryptodome (>=3.0.0,<4.0.0) ; extra == "metabase" or extra == "all"
@@ -52,7 +49,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
52
49
  Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
53
50
  Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
54
51
  Requires-Dist: requests (>=2.0.0,<3.0.0)
55
- Requires-Dist: setuptools (>=75.3.0,<75.4.0)
52
+ Requires-Dist: setuptools (>=75.6)
56
53
  Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
57
54
  Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
58
55
  Requires-Dist: sqlalchemy (>=1.4,<1.5)
@@ -208,6 +205,10 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
208
205
 
209
206
  # Changelog
210
207
 
208
+ ## 0.22.0 - 2024-12-04
209
+
210
+ * Stop supporting python3.8
211
+
211
212
  ## 0.21.9 - 2024-12-04
212
213
 
213
214
  * Tableau: fix handling of timeout retry