PyPI - castor-extractor - Versions diffs - 0.21.7__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

castor-extractor 0.21.7py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of castor-extractor might be problematic. Click here for more details.

Files changed (131) hide show

castor_extractor/warehouse/databricks/api_client.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import logging
+from collections.abc import Iterator
 from functools import partial
 from http import HTTPStatus
-from typing import Iterator, List, Optional, Set, Tuple
+from typing import Optional
 import requests
@@ -55,8 +56,8 @@ class DatabricksAPIClient(APIClient):
     def __init__(
         self,
         credentials: DatabricksCredentials,
-        db_allowed: Optional[Set[str]] = None,
-        db_blocked: Optional[Set[str]] = None,
+        db_allowed: Optional[set[str]] = None,
+        db_blocked: Optional[set[str]] = None,
     ):
         auth = DatabricksAuth(credentials)
         super().__init__(
@@ -81,18 +82,18 @@ class DatabricksAPIClient(APIClient):
             return False
         return True
-    def databases(self) -> List[dict]:
+    def databases(self) -> list[dict]:
         content = self._get(DatabricksEndpointFactory.databases())
         _databases = self.formatter.format_database(content.get("catalogs", []))
         return [d for d in _databases if self._keep_catalog(d["database_name"])]
-    def _schemas_of_database(self, database: dict) -> List[dict]:
+    def _schemas_of_database(self, database: dict) -> list[dict]:
         payload = {"catalog_name": database["database_name"]}
         content = self._get(DatabricksEndpointFactory.schemas(), params=payload)
         schemas = content.get("schemas", [])
         return self.formatter.format_schema(schemas, database)
-    def schemas(self, databases: List[dict]) -> List[dict]:
+    def schemas(self, databases: list[dict]) -> list[dict]:
         """
         Get the databricks schemas (also sometimes called databases)
         (which correspond to the schemas in Castor)
@@ -143,8 +144,8 @@ class DatabricksAPIClient(APIClient):
     )
     def get_single_column_lineage(
         self,
-        names: Tuple[str, str],
-    ) -> List[TimestampedLink]:
+        names: tuple[str, str],
+    ) -> list[TimestampedLink]:
         """
         Helper function used in get_lineage_links.
         Call data lineage API and return the content of the result
@@ -172,7 +173,7 @@ class DatabricksAPIClient(APIClient):
     )
     def get_single_table_lineage(
         self, table_path: str
-    ) -> List[TimestampedLink]:
+    ) -> list[TimestampedLink]:
         """
         Helper function used in get_lineage_links.
         Call data lineage API and return the content of the result
@@ -210,7 +211,7 @@ class DatabricksAPIClient(APIClient):
         queries = fetch_all_pages(request, DatabricksPagination)
         return queries
-    def queries(self, time_filter: Optional[TimeFilter] = None) -> List[dict]:
+    def queries(self, time_filter: Optional[TimeFilter] = None) -> list[dict]:
         """get all queries, hour per hour"""
         time_range_filters = hourly_time_filters(time_filter)
         raw_queries = []
@@ -220,14 +221,14 @@ class DatabricksAPIClient(APIClient):
             raw_queries.extend(hourly)
         return self.formatter.format_query(raw_queries)
-    def users(self) -> List[dict]:
+    def users(self) -> list[dict]:
         """
         retrieve user from api
         """
         content = self._get(DatabricksEndpointFactory.users())
         return self.formatter.format_user(content.get("Resources", []))
-    def _view_ddl_per_schema(self, schema: dict) -> List[dict]:
+    def _view_ddl_per_schema(self, schema: dict) -> list[dict]:
         payload = {
             "catalog_name": schema["database_id"],
             "schema_name": schema["schema_name"],
@@ -236,9 +237,9 @@ class DatabricksAPIClient(APIClient):
         content = self._get(DatabricksEndpointFactory.tables(), params=payload)
         return self.formatter.format_view_ddl(content.get("tables", []), schema)
-    def view_ddl(self, schemas: List[dict]) -> List[dict]:
+    def view_ddl(self, schemas: list[dict]) -> list[dict]:
         """retrieve view ddl"""
-        view_ddl: List[dict] = []
+        view_ddl: list[dict] = []
         for schema in schemas:
             v_to_add = self._view_ddl_per_schema(schema)
             view_ddl.extend(v_to_add)

castor_extractor/warehouse/databricks/client.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Optional, Set
+from typing import Optional
 from ...utils import (
     mapping_from_rows,
@@ -25,8 +25,8 @@ class DatabricksClient:
     def __init__(
         self,
         credentials: DatabricksCredentials,
-        db_allowed: Optional[Set[str]] = None,
-        db_blocked: Optional[Set[str]] = None,
+        db_allowed: Optional[set[str]] = None,
+        db_blocked: Optional[set[str]] = None,
         has_table_tags: bool = False,
         has_column_tags: bool = False,
     ):
@@ -58,26 +58,26 @@ class DatabricksClient:
         return {**table, "owner_external_id": owner_external_id}
     @staticmethod
-    def _get_user_mapping(users: List[dict]) -> dict:
+    def _get_user_mapping(users: list[dict]) -> dict:
         return {
             **mapping_from_rows(users, "email", "id"),
             **mapping_from_rows(users, "user_name", "id"),
         }
-    def schemas(self, databases: List[dict]) -> List[dict]:
+    def schemas(self, databases: list[dict]) -> list[dict]:
         return self.api_client.schemas(databases)
-    def databases(self) -> List[dict]:
+    def databases(self) -> list[dict]:
         return self.api_client.databases()
     def tables_and_columns(
-        self, schemas: List[dict], users: List[dict]
+        self, schemas: list[dict], users: list[dict]
     ) -> TablesColumns:
         """
         Get the databricks tables & columns leveraging the unity catalog API
         """
-        tables: List[dict] = []
-        columns: List[dict] = []
+        tables: list[dict] = []
+        columns: list[dict] = []
         user_mapping = self._get_user_mapping(users)
         table_tags = self.sql_client.get_tags_mapping(TagEntity.TABLE)
         column_tags = self.sql_client.get_tags_mapping(TagEntity.COLUMN)
@@ -95,7 +95,7 @@ class DatabricksClient:
             columns.extend(c_to_add)
         return tables, columns
-    def table_lineage(self, tables: List[dict]) -> List[dict]:
+    def table_lineage(self, tables: list[dict]) -> list[dict]:
         """
         Wrapper function that retrieves all table lineage
         """
@@ -113,8 +113,8 @@ class DatabricksClient:
         return self.formatter.format_lineage(deduplicated)
     def column_lineage(
-        self, tables: List[dict], columns: List[dict], table_lineage: List[dict]
-    ) -> List[dict]:
+        self, tables: list[dict], columns: list[dict], table_lineage: list[dict]
+    ) -> list[dict]:
         """
         Wrapper function that retrieves all column lineage
         we only try to retrieve column lineage if we found table lineage
@@ -129,17 +129,17 @@ class DatabricksClient:
             results = executor.map(
                 self.api_client.get_single_column_lineage, candidate_paths
             )
-        lineages: List[TimestampedLink] = [
+        lineages: list[TimestampedLink] = [
             link for links in results for link in links
         ]
         deduplicated = deduplicate_lineage(lineages)
         return self.formatter.format_lineage(deduplicated)
-    def queries(self, time_filter: Optional[TimeFilter] = None) -> List[dict]:
+    def queries(self, time_filter: Optional[TimeFilter] = None) -> list[dict]:
         return self.api_client.queries(time_filter)
-    def users(self) -> List[dict]:
+    def users(self) -> list[dict]:
         return self.api_client.users()
-    def view_ddl(self, schemas: List[dict]) -> List[dict]:
+    def view_ddl(self, schemas: list[dict]) -> list[dict]:
         return self.api_client.view_ddl(schemas)

castor_extractor/warehouse/databricks/extract.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Dict, Optional
+from typing import Optional
 from ...utils import AbstractStorage, LocalStorage, write_summary
 from ..abstract import (
@@ -29,7 +29,7 @@ DATABRICKS_ASSETS: SupportedAssets = {
 logger = logging.getLogger(__name__)
 OTimeFilter = Optional[TimeFilter]
-Paths = Dict[str, str]
+Paths = dict[str, str]
 class DatabricksExtractionProcessor:
@@ -71,7 +71,7 @@ class DatabricksExtractionProcessor:
         if self._should_not_reextract(WarehouseAssetGroup.CATALOG):
             return self._existing_group_paths(WarehouseAssetGroup.CATALOG)
-        catalog_locations: Dict[str, str] = dict()
+        catalog_locations: dict[str, str] = dict()
         databases = self._client.databases()
         location = self._storage.put(WarehouseAsset.DATABASE.value, databases)
         catalog_locations[WarehouseAsset.DATABASE.value] = location
@@ -101,7 +101,7 @@ class DatabricksExtractionProcessor:
             return self._existing_group_paths(
                 WarehouseAssetGroup.ADDITIONAL_LINEAGE
             )
-        lineage_locations: Dict[str, str] = dict()
+        lineage_locations: dict[str, str] = dict()
         # extract catalog
         databases = self._client.databases()

castor_extractor/warehouse/databricks/format.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from datetime import datetime
-from typing import Dict, List, Optional
+from typing import Optional
 from .types import TablesColumns
 from .utils import build_path
@@ -12,7 +12,7 @@ EXCLUDED_SCHEMAS = {"information_schema", "default"}
 TABLE_URL_TPL = "{host}explore/data/{catalog_name}/{schema_name}/{table_name}?o={workspace_id}"
-TagMapping = Dict[str, List[str]]
+TagMapping = dict[str, list[str]]
 def _to_datetime_or_none(time_ms: Optional[int]) -> Optional[datetime]:
@@ -87,7 +87,7 @@ class DatabricksFormatter:
     """
     @staticmethod
-    def format_database(raw_databases: List[dict]) -> List[dict]:
+    def format_database(raw_databases: list[dict]) -> list[dict]:
         databases = []
         for catalog in raw_databases:
             name = catalog["name"]
@@ -101,7 +101,7 @@ class DatabricksFormatter:
         return databases
     @staticmethod
-    def format_schema(raw_schemas: List[dict], database: dict) -> List[dict]:
+    def format_schema(raw_schemas: list[dict], database: dict) -> list[dict]:
         schemas = []
         for schema in raw_schemas:
             if schema["name"] in EXCLUDED_SCHEMAS:
@@ -118,7 +118,7 @@ class DatabricksFormatter:
     @staticmethod
     def format_table_column(
-        raw_tables: List[dict],
+        raw_tables: list[dict],
         schema: dict,
         host: str,
         workspace_id: str,
@@ -141,8 +141,8 @@ class DatabricksFormatter:
         return tables, columns
     @staticmethod
-    def format_lineage(timestamps: dict) -> List[dict]:
-        lineage: List[dict] = []
+    def format_lineage(timestamps: dict) -> list[dict]:
+        lineage: list[dict] = []
         for link, timestamp in timestamps.items():
             parent_path, child_path = link
             link_ = {
@@ -154,7 +154,7 @@ class DatabricksFormatter:
         return lineage
     @staticmethod
-    def format_query(raw_queries: List[dict]) -> List[dict]:
+    def format_query(raw_queries: list[dict]) -> list[dict]:
         queries = []
         for q in raw_queries:
             if not q["query_text"]:
@@ -176,7 +176,7 @@ class DatabricksFormatter:
         return queries
     @staticmethod
-    def _primary(emails: List[dict]) -> Optional[str]:
+    def _primary(emails: list[dict]) -> Optional[str]:
         """helper function to select a unique email"""
         if not emails:
             return None
@@ -189,7 +189,7 @@ class DatabricksFormatter:
         emails = user.get("emails")
         return self._primary(emails) if emails else None
-    def format_user(self, raw_users: List[dict]) -> List[dict]:
+    def format_user(self, raw_users: list[dict]) -> list[dict]:
         users = []
         for user in raw_users:
             users.append(
@@ -204,8 +204,8 @@ class DatabricksFormatter:
         return users
     @staticmethod
-    def format_view_ddl(tables: List[dict], schema: dict) -> List[dict]:
-        view_ddl: List[dict] = []
+    def format_view_ddl(tables: list[dict], schema: dict) -> list[dict]:
+        view_ddl: list[dict] = []
         if not tables:
             return view_ddl
         for table in tables:

castor_extractor/warehouse/databricks/lineage.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, List, Set, Tuple, cast
+from typing import cast
 from .types import Link, Ostr, OTimestampedLink, TimestampedLink
@@ -9,7 +9,7 @@ class LineageLinks:
     """
     def __init__(self):
-        self.lineage: Dict[Link, Ostr] = dict()
+        self.lineage: dict[Link, Ostr] = dict()
     def add(self, timestamped_link: TimestampedLink) -> None:
         """
@@ -52,7 +52,7 @@ def _link(path_from: Ostr, path_to: Ostr, timestamp: Ostr) -> OTimestampedLink:
 def single_table_lineage_links(
     table_path: str, single_table_lineage: dict
-) -> List[TimestampedLink]:
+) -> list[TimestampedLink]:
     """
     process databricks lineage API response for a given table
     returns a list of (parent, child, timestamp)
@@ -60,7 +60,7 @@ def single_table_lineage_links(
     Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
     we could also have `notebookInfos` or `fileInfo`
     """
-    links: List[OTimestampedLink] = []
+    links: list[OTimestampedLink] = []
     # add parent:
     for link in single_table_lineage.get("upstreams", []):
         parent = link.get("tableInfo", {})
@@ -80,7 +80,7 @@ def single_table_lineage_links(
 def single_column_lineage_links(
     column_path: str, single_column_lineage: dict
-) -> List[TimestampedLink]:
+) -> list[TimestampedLink]:
     """
     process databricks lineage API response for a given table
     returns a list of (parent, child, timestamp)
@@ -88,7 +88,7 @@ def single_column_lineage_links(
     Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
     we could also have `notebookInfos` or `fileInfo`
     """
-    links: List[OTimestampedLink] = []
+    links: list[OTimestampedLink] = []
     # add parent:
     for link in single_column_lineage.get("upstream_cols", []):
         parent_path = _to_column_path(link)
@@ -105,8 +105,8 @@ def single_column_lineage_links(
 def paths_for_column_lineage(
-    tables: List[dict], columns: List[dict], table_lineage: List[dict]
-) -> List[Tuple[str, str]]:
+    tables: list[dict], columns: list[dict], table_lineage: list[dict]
+) -> list[tuple[str, str]]:
     """
     helper providing a list of candidate columns to look lineage for:
     we only look for column lineage where there is table lineage
@@ -118,12 +118,12 @@ def paths_for_column_lineage(
         for table in tables
     }
-    tables_with_lineage: Set[str] = set()
+    tables_with_lineage: set[str] = set()
     for t in table_lineage:
         tables_with_lineage.add(t["parent_path"])
         tables_with_lineage.add(t["child_path"])
-    paths_to_return: List[Tuple[str, str]] = []
+    paths_to_return: list[tuple[str, str]] = []
     for column in columns:
         table_path = mapping[column["table_id"]]
         if table_path not in tables_with_lineage:
@@ -134,7 +134,7 @@ def paths_for_column_lineage(
     return paths_to_return
-def deduplicate_lineage(lineages: List[TimestampedLink]) -> dict:
+def deduplicate_lineage(lineages: list[TimestampedLink]) -> dict:
     deduplicated_lineage = LineageLinks()
     for timestamped_link in lineages:
         deduplicated_lineage.add(timestamped_link)

castor_extractor/warehouse/databricks/pagination.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Optional
 from pydantic import Field
@@ -10,7 +10,7 @@ DATABRICKS_PAGE_SIZE = 100
 class DatabricksPagination(PaginationModel):
     next_page_token: Optional[str] = None
     has_next_page: bool = False
-    res: List[dict] = Field(default_factory=list)
+    res: list[dict] = Field(default_factory=list)
     def is_last(self) -> bool:
         return not (self.has_next_page and self.next_page_token)

castor_extractor/warehouse/databricks/types.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from typing import List, Optional, Tuple
+from typing import Optional
-Link = Tuple[str, str]
-TablesColumns = Tuple[List[dict], List[dict]]
+Link = tuple[str, str]
+TablesColumns = tuple[list[dict], list[dict]]
 Ostr = Optional[str]
-TimestampedLink = Tuple[str, str, Ostr]
+TimestampedLink = tuple[str, str, Ostr]
 OTimestampedLink = Optional[TimestampedLink]

castor_extractor/warehouse/databricks/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
+from collections.abc import Iterable
 from datetime import date
-from typing import Dict, Iterable, List, Optional
+from typing import Optional
 from ...utils import at_midnight
 from ..abstract import TimeFilter
@@ -14,8 +15,8 @@ def _day_hour_to_epoch_ms(day: date, hour: int) -> int:
 def build_path(
-    row: Dict,
-    keys: List[str],
+    row: dict,
+    keys: list[str],
 ) -> str:
     """
     format an asset's path:
@@ -26,7 +27,7 @@ def build_path(
     return ".".join(key_values)
-def tag_label(row: Dict) -> str:
+def tag_label(row: dict) -> str:
     """
     format the tag's label:
     - {key:value} when the value is not empty

castor_extractor/warehouse/mysql/query.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Optional
 from ..abstract import (
     AbstractQueryBuilder,
@@ -19,6 +19,6 @@ class MySQLQueryBuilder(AbstractQueryBuilder):
     ):
         super().__init__(time_filter=time_filter)
-    def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
+    def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
         query = self.build_default(asset)
         return [query]

castor_extractor/warehouse/postgres/query.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Optional
 from ..abstract import (
     AbstractQueryBuilder,
@@ -19,6 +19,6 @@ class PostgresQueryBuilder(AbstractQueryBuilder):
     ):
         super().__init__(time_filter=time_filter)
-    def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
+    def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
         query = self.build_default(asset)
         return [query]

castor_extractor/warehouse/redshift/client.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Iterator
+from collections.abc import Iterator
 from psycopg2 import extensions  # type: ignore
 from sqlalchemy.engine import Connection, ResultProxy

castor_extractor/warehouse/redshift/query.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Optional
 from ..abstract import (
     AbstractQueryBuilder,
@@ -27,7 +27,7 @@ class RedshiftQueryBuilder(AbstractQueryBuilder):
         params = self._time_filter.to_dict()
         return ExtractionQuery(statement, params)
-    def build(self, asset: WarehouseAsset) -> List[ExtractionQuery]:
+    def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
         if asset == WarehouseAsset.QUERY and self.is_serverless:
             query = self.build_query_serverless()
         else:

castor_extractor/warehouse/salesforce/client.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from functools import partial
-from typing import Dict, List, Optional, Tuple
+from typing import Optional
 from tqdm import tqdm  # type: ignore
@@ -29,7 +29,7 @@ class SalesforceClient(SalesforceBaseClient):
     def name() -> str:
         return "Salesforce"
-    def fetch_sobjects(self) -> List[dict]:
+    def fetch_sobjects(self) -> list[dict]:
         """Fetch all sobjects"""
         logger.info("Extracting sobjects")
         query = format_sobject_query()
@@ -39,7 +39,7 @@ class SalesforceClient(SalesforceBaseClient):
         results = fetch_all_pages(request_, SalesforceSQLPagination)
         return list(results)
-    def fetch_fields(self, sobject_name: str) -> List[dict]:
+    def fetch_fields(self, sobject_name: str) -> list[dict]:
         """Fetches fields of a given sobject"""
         query = SOBJECT_FIELDS_QUERY_TPL.format(
             entity_definition_id=sobject_name
@@ -55,7 +55,7 @@ class SalesforceClient(SalesforceBaseClient):
             return None
         return response["records"][0]["Description"]
-    def add_table_descriptions(self, sobjects: List[dict]) -> List[dict]:
+    def add_table_descriptions(self, sobjects: list[dict]) -> list[dict]:
         """
         Add table descriptions.
         We use the tooling API which does not handle well the LIMIT in SOQL
@@ -67,7 +67,7 @@ class SalesforceClient(SalesforceBaseClient):
             described_sobjects.append({**sobject, "Description": description})
         return described_sobjects
-    def tables(self) -> List[dict]:
+    def tables(self) -> list[dict]:
         """
         Get Salesforce sobjects as tables
         """
@@ -77,13 +77,13 @@ class SalesforceClient(SalesforceBaseClient):
         return list(self.formatter.tables(described_sobjects))
     def columns(
-        self, sobject_names: List[Tuple[str, str]], show_progress: bool = True
-    ) -> List[dict]:
+        self, sobject_names: list[tuple[str, str]], show_progress: bool = True
+    ) -> list[dict]:
         """
         Get salesforce sobject fields as columns
         show_progress: optionally deactivate the tqdm progress bar
         """
-        sobject_fields: Dict[str, List[dict]] = dict()
+        sobject_fields: dict[str, list[dict]] = dict()
         for api_name, table_name in tqdm(
             sobject_names, disable=not show_progress
         ):

castor_extractor/warehouse/salesforce/extract.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import logging
-from typing import Dict, List, Tuple
 from ...utils import AbstractStorage, LocalStorage, write_summary
 from ...utils.salesforce import SalesforceCredentials
@@ -14,9 +13,9 @@ from .client import SalesforceClient
 logger = logging.getLogger(__name__)
-Paths = Dict[str, str]
+Paths = dict[str, str]
-SALESFORCE_CATALOG_ASSETS: Tuple[WarehouseAsset, ...] = (
+SALESFORCE_CATALOG_ASSETS: tuple[WarehouseAsset, ...] = (
     WarehouseAsset.TABLE,
     WarehouseAsset.COLUMN,
 )
@@ -81,7 +80,7 @@ class SalesforceExtractionProcessor:
     def extract_role(self) -> Paths:
         """extract no users and return the empty file location"""
-        users: List[dict] = []
+        users: list[dict] = []
         location = self._storage.put(WarehouseAsset.USER.value, users)
         logger.info(f"Extracted {len(users)} users to {location}")
         return {WarehouseAsset.USER.value: location}

castor_extractor/warehouse/salesforce/format.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Dict, Iterator, List
+from collections.abc import Iterator
+from typing import Any
 from ...utils import group_by
 from .constants import SCHEMA_NAME
@@ -25,10 +26,10 @@ def _name(sobject: dict) -> str:
     return f"{label} ({api_name})"
-def _field_description(field: Dict[str, Any]) -> str:
-    context: Dict[str, str] = {}
+def _field_description(field: dict[str, Any]) -> str:
+    context: dict[str, str] = {}
-    field_definition: Dict[str, str] = field.get("FieldDefinition") or {}
+    field_definition: dict[str, str] = field.get("FieldDefinition") or {}
     if description := field_definition.get("Description"):
         context["Description"] = _clean(description)
     if help_text := field.get("InlineHelpText"):
@@ -69,7 +70,7 @@ def _to_table_payload(sobject: dict) -> dict:
     }
-def _detect_duplicates(sobjects: List[dict]) -> List[dict]:
+def _detect_duplicates(sobjects: list[dict]) -> list[dict]:
     """
     enrich the given data with "has_duplicate" flag:
     - True when another asset has the same Label in the list
@@ -89,7 +90,7 @@ class SalesforceFormatter:
     """
     @staticmethod
-    def tables(sobjects: List[dict]) -> Iterator[dict]:
+    def tables(sobjects: list[dict]) -> Iterator[dict]:
         """
         formats the raw list of sobjects to tables
         """
@@ -98,7 +99,7 @@ class SalesforceFormatter:
             yield _to_table_payload(sobject)
     @staticmethod
-    def columns(sobject_fields: Dict[str, List[dict]]) -> Iterator[dict]:
+    def columns(sobject_fields: dict[str, list[dict]]) -> Iterator[dict]:
         """formats the raw list of sobject fields to columns"""
         for table_name, fields in sobject_fields.items():
             fields = _detect_duplicates(fields)

castor_extractor/warehouse/salesforce/format_test.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import Dict, List, Tuple
 from .format import (
     _HAS_DUPLICATE_KEY,
     SalesforceFormatter,
@@ -9,7 +7,7 @@ from .format import (
 )
-def _tables_sobjects() -> Tuple[Dict[str, str], ...]:
+def _tables_sobjects() -> tuple[dict[str, str], ...]:
     """Returns 4 sobjects with 2 sharing the same label"""
     a = {"Label": "a", "QualifiedApiName": "a_one"}
     b = {"Label": "b", "QualifiedApiName": "b"}
@@ -18,7 +16,7 @@ def _tables_sobjects() -> Tuple[Dict[str, str], ...]:
     return a, b, c, a_prime
-def _columns_sobjects() -> Dict[str, List[dict]]:
+def _columns_sobjects() -> dict[str, list[dict]]:
     a = {"Label": "First Name", "QualifiedApiName": "owner_name"}
     b = {"Label": "First Name", "QualifiedApiName": "editor_name"}
     c = {"Label": "Foo Bar", "QualifiedApiName": "foo_bar"}

castor-extractor 0.21.7__py3-none-any.whl → 0.22.0__py3-none-any.whl

Potentially problematic release.

castor-extractor 0.21.7py3-none-any.whl → 0.22.0py3-none-any.whl