PyPI - castor-extractor - Versions diffs - 0.24.4__py3-none-any.whl → 0.24.9__py3-none-any.whl - Mend

castor-extractor 0.24.4py3-none-any.whl → 0.24.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of castor-extractor might be problematic. Click here for more details.

Files changed (24) hide show

CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,25 @@
 # Changelog
+## 0.24.9 - 2025-04-16
+* Introduce API client for **Coalesce**
+## 0.24.8 - 2025-04-16
+* Tableau - remove duplicates introduced by `offset` pagination
+## 0.24.7 - 2025-04-07
+* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
+## 0.24.6 - 2025-04-03
+* Domo - extract cards metadata by batch to prevent from hitting URL max length
+## 0.24.5 - 2025-04-02
+* bump dependencies: google-cloud-storage
 ## 0.24.4 - 2025-03-19
 * Snowflake:

castor_extractor/transformation/__init__.py ADDED Viewed

File without changes

castor_extractor/transformation/coalesce/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .assets import CoalesceAsset, CoalesceQualityAsset
2	+ from .client import CoalesceClient, CoalesceCredentials

castor_extractor/transformation/coalesce/assets.py ADDED Viewed

@@ -0,0 +1,18 @@
+from ...types import ExternalAsset
+class CoalesceAsset(ExternalAsset):
+    """Coalesce assets"""
+    NODES = "nodes"
+class CoalesceQualityAsset(ExternalAsset):
+    """
+    Coalesce Quality Assets
+    Remark: having a dedicated Enum for Quality simplifies the process of
+    searching pushed files
+    """
+    NODES = "nodes"
+    RUN_RESULTS = "run_results"

castor_extractor/transformation/coalesce/client/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .client import CoalesceClient
2	+ from .credentials import CoalesceCredentials

castor_extractor/transformation/coalesce/client/client.py ADDED Viewed

@@ -0,0 +1,180 @@
+from http import HTTPStatus
+from typing import Iterator, Optional
+from ....utils import APIClient, BearerAuth, RequestSafeMode, SerializedAsset
+from ..assets import CoalesceAsset, CoalesceQualityAsset
+from .credentials import CoalesceCredentials
+from .endpoint import (
+    CoalesceEndpointFactory,
+)
+from .type import NodeIDToNamesMapping
+from .utils import column_names_per_node, is_test, test_names_per_node
+_LIMIT_MAX = 1_000
+_MAX_ERRORS = 50
+def _run_result_payload(result: dict, query_result: dict) -> dict:
+    return {
+        "node_id": result["nodeID"],
+        "node_name": result["name"],
+        "test_name": query_result["name"],
+        "start_time": query_result["startTime"],
+        "end_time": query_result["endTime"],
+        "status": query_result["status"],
+        "success": query_result["success"],
+        "isRunning": query_result["isRunning"],
+    }
+COALESCE_SAFE_MODE = RequestSafeMode(
+    status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
+    max_errors=_MAX_ERRORS,
+)
+COALESCE_TIMEOUT_SECONDS = 90
+class CoalesceBearerAuth(BearerAuth):
+    """Bearer Authentication for Coalesce"""
+    def fetch_token(self) -> Optional[str]:
+        pass
+    def __init__(self, token: str):
+        self._token = token
+class CoalesceClient(APIClient):
+    """REST API client to extract data from Coalesce"""
+    def __init__(
+        self,
+        credentials: CoalesceCredentials,
+    ):
+        auth = CoalesceBearerAuth(token=credentials.token)
+        super().__init__(
+            host=credentials.host,
+            auth=auth,
+            safe_mode=COALESCE_SAFE_MODE,
+            timeout=COALESCE_TIMEOUT_SECONDS,
+        )
+    def _fetch_environments(self) -> Iterator[dict]:
+        endpoint = CoalesceEndpointFactory.environments()
+        result = self._get(endpoint=endpoint)
+        return result["data"]
+    def _node_details(self, environment_id: int, node_id: str) -> dict:
+        endpoint = CoalesceEndpointFactory.nodes(
+            environment_id=environment_id, node_id=node_id
+        )
+        return self._get(endpoint=endpoint)
+    def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
+        endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
+        result = self._get(endpoint=endpoint)
+        nodes: list[dict] = []
+        for node in result["data"]:
+            details = self._node_details(environment_id, node["id"])
+            nodes.append({**node, **details})
+        return nodes
+    def _fetch_all_nodes(self) -> SerializedAsset:
+        nodes: list[dict] = []
+        for environment in self._fetch_environments():
+            environment_id = environment["id"]
+            nodes.extend(self._fetch_env_nodes(environment_id))
+        return nodes
+    def _fetch_runs(self, starting_from: str) -> SerializedAsset:
+        """
+        fetch runs, per environment;
+        we break per environment to lower the chance of exceeding the 1k limit
+        """
+        runs: list[dict] = []
+        for environment in self._fetch_environments():
+            environment_id = environment["id"]
+            runs.extend(
+                self._fetch_recent_runs_per_env(environment_id, starting_from)
+            )
+        return runs
+    def _fetch_recent_runs_per_env(
+        self, environment_id: int, starting_from: str
+    ) -> SerializedAsset:
+        endpoint = CoalesceEndpointFactory.runs()
+        params = {
+            "environmentID": environment_id,
+            "limit": _LIMIT_MAX,
+            "orderBy": "runEndTime",
+            "orderByDirection": "asc",
+            "startingFrom": starting_from,
+        }
+        result = self._get(endpoint=endpoint, params=params)
+        return result["data"]
+    def _fetch_run_results(self, run_id: str) -> SerializedAsset:
+        endpoint = CoalesceEndpointFactory.run_results(run_id)
+        result = self._get(endpoint=endpoint)
+        return result["data"]
+    def _run_results_by_run(
+        self,
+        run_id: str,
+        test_names: NodeIDToNamesMapping,
+        column_names: NodeIDToNamesMapping,
+    ) -> SerializedAsset:
+        run_results: list[dict] = []
+        for result in self._fetch_run_results(run_id):
+            node_id = result["nodeID"]
+            for query_result in result["queryResults"]:
+                _is_test = is_test(
+                    query_result,
+                    node_id,
+                    test_names,
+                    column_names,
+                )
+                if not _is_test:
+                    continue
+                run_result = _run_result_payload(result, query_result)
+                run_results.append(run_result)
+        return run_results
+    def _run_results_by_env(
+        self, environment_id: int, starting_from: str
+    ) -> SerializedAsset:
+        run_results: list[dict] = []
+        nodes = self._fetch_env_nodes(environment_id)
+        test_names = test_names_per_node(nodes)
+        column_names = column_names_per_node(nodes)
+        runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
+        for run in runs:
+            run_id = run["id"]
+            _results = self._run_results_by_run(
+                run_id, test_names, column_names
+            )
+            run_results.extend(_results)
+        return run_results
+    def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
+        run_results: list[dict] = []
+        for environment in self._fetch_environments():
+            environment_id = environment["id"]
+            _results = self._run_results_by_env(environment_id, starting_from)
+            run_results.extend(_results)
+        return run_results
+    def fetch(
+        self, asset: CoalesceAsset, starting_from=None
+    ) -> SerializedAsset:
+        """Extract the given Coalesce Asset"""
+        if asset in (CoalesceAsset.NODES, CoalesceQualityAsset.NODES):
+            return self._fetch_all_nodes()
+        elif asset == CoalesceQualityAsset.RUN_RESULTS:
+            return self._fetch_all_run_results(starting_from=starting_from)
+        raise AssertionError(
+            f"Asset {asset} is not supported by CoalesceClient"
+        )

castor_extractor/transformation/coalesce/client/credentials.py ADDED Viewed

@@ -0,0 +1,23 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+CASTOR_ENV_PREFIX = "CASTOR_COALESCE_"
+class CoalesceCredentials(BaseSettings):
+    """Class to handle Coalesce rest API permissions"""
+    model_config = SettingsConfigDict(
+        env_prefix=CASTOR_ENV_PREFIX,
+        extra="ignore",
+        populate_by_name=True,
+    )
+    host: str
+    token: str = Field(repr=False)
+    @property
+    def token_payload(self) -> dict[str, str]:
+        return {
+            "client_secret": self.token,
+        }

castor_extractor/transformation/coalesce/client/endpoint.py ADDED Viewed

@@ -0,0 +1,42 @@
+from typing import Optional
+class CoalesceEndpointFactory:
+    """Provide endpoints to hit Coalesce API"""
+    @classmethod
+    def environments(cls, environment_id: Optional[int] = None) -> str:
+        """
+        When specified, concatenate environment_id at the end to fetch details.
+        Otherwise, list existing environments.
+        """
+        base = "api/v1/environments"
+        if environment_id:
+            return base + f"/{environment_id}"
+        return base
+    @classmethod
+    def nodes(cls, environment_id: int, node_id: Optional[str] = None) -> str:
+        """
+        When specified, concatenate node_id at the end to fetch details.
+        Otherwise, list existing nodes in the given environment.
+        """
+        base = f"api/v1/environments/{environment_id}/nodes"
+        if node_id:
+            return base + f"/{node_id}"
+        return base
+    @classmethod
+    def runs(cls) -> str:
+        """
+        Get runs (additional filtering can be done in the body)
+        """
+        base = "api/v1/runs"
+        return base
+    @classmethod
+    def run_results(cls, run_id: str) -> str:
+        """
+        get run results (including success/fail for tests), given a run id
+        """
+        return f"api/v1/runs/{run_id}/results"

castor_extractor/transformation/coalesce/client/type.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ NodeIDToNamesMapping = dict[str, set[str]]

castor_extractor/transformation/coalesce/client/utils.py ADDED Viewed

@@ -0,0 +1,52 @@
+from ....utils import SerializedAsset
+from .type import NodeIDToNamesMapping
+_NULL_SUFFIX = ": Null"
+_UNIQUE_SUFFIX = ": Unique"
+def is_test(
+    query_result: dict,
+    node_id: str,
+    test_names: NodeIDToNamesMapping,
+    column_names: NodeIDToNamesMapping,
+) -> bool:
+    """
+    checks whether a query result is a test result or not.
+    all this implementation can soon be replaced by checking whether
+    query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
+    """
+    # test scoped on the node (table)
+    result_name = query_result["name"]
+    if result_name in test_names.get(node_id, {}):
+        return True
+    # test scoped on the column
+    if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
+        _UNIQUE_SUFFIX
+    ):
+        column_name = result_name.split(":")[0]
+        if column_name in column_names.get(node_id, {}):
+            return True
+    return False
+def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
+    """mapping nodeID: set(testName)"""
+    mapping: dict[str, set[str]] = {}
+    for node in nodes:
+        node_id = node["id"]
+        tests = node.get("metadata", {}).get("appliedNodeTests", [])
+        mapping[node_id] = {test["name"] for test in tests}
+    return mapping
+def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
+    """mapping nodeID: set(columnNames)"""
+    mapping: dict[str, set[str]] = {}
+    for node in nodes:
+        node_id = node["id"]
+        columns = node.get("metadata", {}).get("columns", [])
+        mapping[node_id] = {column["name"] for column in columns}
+    return mapping

castor_extractor/transformation/coalesce/client/utils_test.py ADDED Viewed

@@ -0,0 +1,54 @@
+from .utils import is_test
+def test_is_test():
+    test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
+    column_names = {"some-uuid": {"carthago", "delenda", "est"}}
+    happy_node_test = is_test(
+        query_result={"name": "check-mirrors"},
+        node_id="some-uuid",
+        test_names=test_names,
+        column_names=column_names,
+    )
+    assert happy_node_test is True
+    unknown_node_test = is_test(
+        query_result={"name": "check-engine"},
+        node_id="some-uuid",
+        test_names=test_names,
+        column_names=column_names,
+    )
+    assert unknown_node_test is False
+    happy_column_test_unique = is_test(
+        query_result={"name": "carthago: Unique"},
+        node_id="some-uuid",
+        test_names=test_names,
+        column_names=column_names,
+    )
+    assert happy_column_test_unique is True
+    happy_column_test_null = is_test(
+        query_result={"name": "carthago: Null"},
+        node_id="some-uuid",
+        test_names=test_names,
+        column_names=column_names,
+    )
+    assert happy_column_test_null is True
+    unknown_column_test = is_test(
+        query_result={"name": "rome: Unique"},
+        node_id="some-uuid",
+        test_names=test_names,
+        column_names=column_names,
+    )
+    assert unknown_column_test is False
+    unknown_node_id_test = is_test(
+        query_result={"name": "whatever: Unique"},
+        node_id="unknown-uuid",
+        test_names=test_names,
+        column_names=column_names,
+    )
+    assert unknown_node_id_test is False

castor_extractor/utils/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from .argument_parser import parse_filled_arguments
+from .batch import batch_of_length
 from .client import (
     AbstractSourceClient,
     APIClient,

castor_extractor/utils/batch.py ADDED Viewed

@@ -0,0 +1,16 @@
+from typing import Iterator, List, TypeVar
+T = TypeVar("T")
+def batch_of_length(
+    elements: List[T],
+    batch_size: int,
+) -> Iterator[List[T]]:
+    """
+    Split the given elements into smaller chunks
+    """
+    assert batch_size > 1, "batch size must be greater or equal to 1"
+    element_count = len(elements)
+    for index in range(0, element_count, batch_size):
+        yield elements[index : min((index + batch_size), element_count)]

castor_extractor/utils/batch_test.py ADDED Viewed

@@ -0,0 +1,27 @@
+import pytest
+from .batch import batch_of_length
+def test_batch_of_length():
+    elements = ["a", "b", "c", "d", "e", "f", "g", "h"]
+    result = list(batch_of_length(elements, 3))
+    assert result == [
+        ["a", "b", "c"],
+        ["d", "e", "f"],
+        ["g", "h"],
+    ]
+    result = list(batch_of_length(elements, 1000))
+    assert result == [
+        elements,
+    ]
+    result = list(batch_of_length(elements, 7))
+    assert result == [
+        ["a", "b", "c", "d", "e", "f", "g"],
+        ["h"],
+    ]
+    with pytest.raises(AssertionError):
+        list(batch_of_length(elements, -12))

castor_extractor/visualization/domo/client/client.py CHANGED Viewed

@@ -9,6 +9,7 @@ import requests
 from ....utils import (
     RequestSafeMode,
     at_midnight,
+    batch_of_length,
     current_date,
     empty_iterator,
     handle_response,
@@ -48,6 +49,8 @@ _RETRY_BASE_MS = 10 * 60 * 1000  # 10 minutes
 _PARENT_FOLDER = "/Dashboards"
+_CARDS_BATCH_SIZE = 100
 logger = logging.getLogger(__name__)
@@ -156,16 +159,19 @@ class DomoClient:
         return all_results
+    def _cards_metadata(self, card_ids: list[int]) -> Iterator[dict]:
+        # batch to avoid hitting the URL max length
+        for batch_card_ids in batch_of_length(card_ids, _CARDS_BATCH_SIZE):
+            endpoint = self._endpoint_factory.cards_metadata(batch_card_ids)
+            yield from self._get_element(endpoint)
     def _datasources(self, card_ids: list[int]) -> RawData:
         """Yields all distinct datasources associated to the given cards"""
         if not card_ids:
             return empty_iterator()
-        endpoint = self._endpoint_factory.cards_metadata(card_ids)
-        cards_metadata = self._get_element(endpoint)
         processed: set[str] = set()
-        for card in cards_metadata:
+        for card in self._cards_metadata(card_ids):
             for datasource in card["datasources"]:
                 id_ = datasource["dataSourceId"]
                 if id_ in processed:

castor_extractor/visualization/tableau/client/client_metadata_api.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 from collections.abc import Iterator
 from typing import Optional
@@ -9,15 +10,14 @@ from ..constants import DEFAULT_PAGE_SIZE
 from .errors import TableauApiError, TableauApiTimeout
 from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
+logger = logging.getLogger(__name__)
 # increase the value when extraction is too slow
 # decrease the value when timeouts arise
 _CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
-    # for some clients, extraction of columns tend to hit the node limit
-    # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
-    # the workaround is to reduce pagination
-    TableauAsset.COLUMN: 50,
     # fields are light but volumes are bigger
     TableauAsset.FIELD: 1000,
+    # tables are sometimes heavy
     TableauAsset.TABLE: 50,
 }
@@ -51,8 +51,9 @@ def _check_errors(answer: dict) -> None:
 def gql_query_scroll(
     server,
-    query: str,
     resource: str,
+    fields: str,
+    page_size: int,
 ) -> Iterator[SerializedAsset]:
     """
     Iterate over GQL query results, handling pagination and cursor
@@ -67,26 +68,58 @@ def gql_query_scroll(
         max_retries=_RETRY_COUNT,
         base_ms=_RETRY_BASE_MS,
     )
-    def _call(cursor: Optional[str]) -> dict:
-        # If cursor is defined it must be quoted else use null token
-        token = "null" if cursor is None else f'"{cursor}"'
-        query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
-        answer = server.metadata.query(query_)
+    def _call(first: int, offset: int) -> dict:
+        query = QUERY_TEMPLATE.format(
+            resource=resource,
+            fields=fields,
+            first=first,
+            offset=offset,
+        )
+        answer = server.metadata.query(query)
         _check_errors(answer)
         return answer["data"][f"{resource}Connection"]
-    cursor = None
+    current_offset = 0
     while True:
-        payload = _call(cursor)
+        payload = _call(first=page_size, offset=current_offset)
         yield payload["nodes"]
-        page_info = payload["pageInfo"]
-        if page_info["hasNextPage"]:
-            cursor = page_info["endCursor"]
-        else:
+        current_offset += len(payload["nodes"])
+        total = payload["totalCount"]
+        logger.info(f"Extracted {current_offset}/{total} {resource}")
+        if not payload["pageInfo"]["hasNextPage"]:
             break
+def _deduplicate(result_pages: Iterator[SerializedAsset]) -> SerializedAsset:
+    """
+    Sometimes assets are duplicated, which triggers UniqueViolation errors
+    during store_all down the line.
+    We suspect the offset pagination to be the root cause, because we had no
+    problem until recently, when we switched from cursor pagination to offset
+    pagination (for performance reasons)
+    https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_examples.html#pagination
+    This is a straightforward solution to remove these duplicates directly at
+    extraction.
+    We don't show warnings because duplicates are expected, and we keep only
+    the first occurrence since those duplicates are probably identical.
+    """
+    deduplicated: SerializedAsset = []
+    seen_ids: set[str] = set()
+    for page in result_pages:
+        for asset in page:
+            asset_id = asset["id"]
+            if asset_id in seen_ids:
+                # skip duplicate
+                continue
+            deduplicated.append(asset)
+            seen_ids.add(asset_id)
+    return deduplicated
 class TableauClientMetadataApi:
     """
     Calls the MetadataAPI, using graphQL
@@ -107,13 +140,13 @@ class TableauClientMetadataApi:
         fields: str,
         page_size: int = DEFAULT_PAGE_SIZE,
     ) -> SerializedAsset:
-        query = QUERY_TEMPLATE.format(
+        result_pages = gql_query_scroll(
+            self._server,
             resource=resource,
             fields=fields,
             page_size=page_size,
         )
-        result_pages = gql_query_scroll(self._server, query, resource)
-        return [asset for page in result_pages for asset in page]
+        return _deduplicate(result_pages)
     def _page_size(self, asset: TableauAsset) -> int:
         return (

castor_extractor/visualization/tableau/client/client_metadata_api_test.py ADDED Viewed

@@ -0,0 +1,31 @@
+from .client_metadata_api import _deduplicate
+def test__deduplicate():
+    result_pages = iter(
+        [
+            [
+                {"id": 1, "name": "workbook_1"},
+                {"id": 2, "name": "workbook_2"},
+            ],
+            [
+                {"id": 1, "name": "workbook_1"},
+                {"id": 3, "name": "workbook_3"},
+                {"id": 4, "name": "workbook_4"},
+            ],
+            [
+                {"id": 4, "name": "workbook_4"},
+                {"id": 5, "name": "workbook_5"},
+                {"id": 5, "name": "workbook_5"},
+                {"id": 5, "name": "workbook_5"},
+            ],
+            [
+                {"id": 1, "name": "workbook_1"},
+                {"id": 3, "name": "workbook_3"},
+            ],
+        ]
+    )
+    deduplicated = _deduplicate(result_pages)
+    assert len(deduplicated) == 5
+    deduplicated_keys = {item["id"] for item in deduplicated}
+    assert deduplicated_keys == {1, 2, 3, 4, 5}

castor_extractor/visualization/tableau/client/gql_queries.py CHANGED Viewed

@@ -2,7 +2,7 @@ from ..assets import TableauAsset
 QUERY_TEMPLATE = """
 {{
-  {resource}Connection(first: {page_size}, after: AFTER_TOKEN_SIGNAL) {{
+  {resource}Connection(first: {first}, offset: {offset}) {{
     nodes {{ {fields}
     }}
     pageInfo {{

castor_extractor/warehouse/databricks/format.py CHANGED Viewed

@@ -168,7 +168,7 @@ class DatabricksFormatter:
                 "schema_name": None,
                 "query_text": q["query_text"],
                 "user_id": q["user_id"],
-                "user_name": q["user_name"],
+                "user_name": q.get("user_name"),
                 "start_time": start_time,
                 "end_time": end_time,
             }

{castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: castor-extractor
-Version: 0.24.4
+Version: 0.24.9
 Summary: Extract your metadata assets.
 Home-page: https://www.castordoc.com/
 License: EULA
@@ -35,7 +35,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
 Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
 Requires-Dist: google-auth (>=2,<3)
 Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
-Requires-Dist: google-cloud-storage (>=2,<3)
+Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
 Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
 Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
 Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
@@ -51,7 +51,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
 Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
 Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
 Requires-Dist: requests (>=2.0.0,<3.0.0)
-Requires-Dist: setuptools (>=75.6)
+Requires-Dist: setuptools (>=78.1)
 Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
 Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
 Requires-Dist: sqlalchemy (>=1.4,<1.5)
@@ -210,6 +210,26 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
 # Changelog
+## 0.24.9 - 2025-04-16
+* Introduce API client for **Coalesce**
+## 0.24.8 - 2025-04-16
+* Tableau - remove duplicates introduced by `offset` pagination
+## 0.24.7 - 2025-04-07
+* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
+## 0.24.6 - 2025-04-03
+* Domo - extract cards metadata by batch to prevent from hitting URL max length
+## 0.24.5 - 2025-04-02
+* bump dependencies: google-cloud-storage
 ## 0.24.4 - 2025-03-19
 * Snowflake:

{castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-CHANGELOG.md,sha256=1Y5FmmQDspwZaOhKjnJosP2sNd898LeTOmVIMTBt9Bw,16387
+CHANGELOG.md,sha256=UKD2ldg9s00KOoVfWjnyB_m50R0fnpPLbpmkZHKoOQM,16821
 Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
 DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
 LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -68,6 +68,16 @@ castor_extractor/quality/soda/client/client.py,sha256=Gd3GaachWx5ZEH_nqgTxiBIbUq
 castor_extractor/quality/soda/client/credentials.py,sha256=R1g7nHpJlQ5hBjtUFN06QjjWAouQtb_V-je7cAXXIA4,514
 castor_extractor/quality/soda/client/endpoints.py,sha256=x3B-XlnDF8NJMuk-81N72_6HA-YZEzA895khLyj0j54,228
 castor_extractor/quality/soda/client/pagination.py,sha256=_7caQUNDNPGRufnZNrfYBN3oVXsk99_2wYr67I0ehAs,530
+castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
+castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
+castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
+castor_extractor/transformation/coalesce/client/client.py,sha256=yrPzIk-6VN4MDHwti3Yxy3PCfHmxE6znjuehl_-dYTg,6151
+castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
+castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
+castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
+castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
+castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
 castor_extractor/types.py,sha256=nHel2hv6NoHmdpOX_heEfO2-DnZPoYA2x0eJdbFvT0s,1276
 castor_extractor/uploader/__init__.py,sha256=A4bq_SrEtKAsl0r_D_duSTvL5WIQjVfsMy7tDx9IKg0,87
 castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
@@ -77,9 +87,11 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
 castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
 castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
 castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
-castor_extractor/utils/__init__.py,sha256=X7WOOgrpGf7Vh8r-7eNGjuC0rKs0g9GTO3d7hZ18gwo,1550
+castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
 castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
 castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
+castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
+castor_extractor/utils/batch_test.py,sha256=84JYXOxiTkZFAceVh0mzN6VtKxcqoFPbxkZfIDyLGlg,606
 castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
 castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHkaDy_uknKz8B0,2075
 castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
@@ -146,7 +158,7 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
 castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
 castor_extractor/visualization/domo/client/__init__.py,sha256=Do0fU4B8Hhlhahcv734gnJl_ryCztfTBDea7XNCKfB8,72
-castor_extractor/visualization/domo/client/client.py,sha256=vOMBY5dY6N3v55YJPdh9aoiddXnuLnGoFHLE5BeUKSg,9662
+castor_extractor/visualization/domo/client/client.py,sha256=bgzXWUm-UnTIwgyJKaJkoHzQpDYwWCGCe97MsMFw6ng,9930
 castor_extractor/visualization/domo/client/credentials.py,sha256=4gnsk4Tpt3ggdUYbvyNPJEXeCyTy12s-X24P5hFdULg,873
 castor_extractor/visualization/domo/client/endpoints.py,sha256=eIE9oeZ_cmJSWWDuyxh6JaAOs3y5bTJQQ265HYgpulE,2775
 castor_extractor/visualization/domo/client/pagination.py,sha256=ukVkHVzoH4mfZ29H9YcnC2YrdVolP10wv25J6Q3ehRw,821
@@ -264,12 +276,13 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
 castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
 castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
 castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
-castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fIBsSbRTypBABsCoigO2dkKsw4Eu3GrsEPTDfjY8A80,4303
+castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fARj7xroHfMd4nlo5CJK5jPok5UsHznOQpIpNaECVHw,5274
+castor_extractor/visualization/tableau/client/client_metadata_api_test.py,sha256=lbsq5mLtqeNc5EsmCw9Mvl8qcvMsTcJTepHwy1ToyvA,969
 castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
 castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
 castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
 castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
-castor_extractor/visualization/tableau/client/gql_queries.py,sha256=NISarYh33Ij7DhYxqjTdv681AHYpbft8kPwVUQbAZ7U,2190
+castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
 castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
 castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
 castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
@@ -315,7 +328,7 @@ castor_extractor/warehouse/databricks/credentials.py,sha256=ExtVcl2NpMXTx1Lg8vHQ
 castor_extractor/warehouse/databricks/endpoints.py,sha256=qPoL9CtPFJdwVuW9rJ37nmeMd-nChOBouEVYb4SlaUE,670
 castor_extractor/warehouse/databricks/enums.py,sha256=3T6BbVvbWvfWkD23krsYT1x0kKh1qRzNPl6WpcXe300,274
 castor_extractor/warehouse/databricks/extract.py,sha256=Z4VTEIf0QMiua0QGAlJdQ86kxmGAXekQ304aCKme6IY,7358
-castor_extractor/warehouse/databricks/format.py,sha256=FUBMrFFWSa_lX5PtixJCDR3eRYycqeMw0oKHt7AkA4o,6732
+castor_extractor/warehouse/databricks/format.py,sha256=S3BOcwJubc1pyKr-li26uftUUfsjfrm5Qf4LqmElXVk,6736
 castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
 castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
 castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
@@ -403,8 +416,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
 castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
 castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
 castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
-castor_extractor-0.24.4.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
-castor_extractor-0.24.4.dist-info/METADATA,sha256=eY2TPP3IDq9an2JJzoZcN-_rG5DJIGzbJOqEtGBhzd4,23543
-castor_extractor-0.24.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-castor_extractor-0.24.4.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
-castor_extractor-0.24.4.dist-info/RECORD,,
+castor_extractor-0.24.9.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
+castor_extractor-0.24.9.dist-info/METADATA,sha256=JDqbNB2dwsOO7_5PKUWP0r4FL217fi7OIEbVaOPljDQ,23985
+castor_extractor-0.24.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+castor_extractor-0.24.9.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
+castor_extractor-0.24.9.dist-info/RECORD,,

{castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/LICENCE RENAMED Viewed

File without changes

{castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{castor_extractor-0.24.4.dist-info → castor_extractor-0.24.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

castor-extractor 0.24.4__py3-none-any.whl → 0.24.9__py3-none-any.whl

Potentially problematic release.

castor-extractor 0.24.4py3-none-any.whl → 0.24.9py3-none-any.whl