PyPI - castor-extractor - Versions diffs - 0.24.33__py3-none-any.whl → 0.24.35__py3-none-any.whl - Mend

castor-extractor 0.24.33py3-none-any.whl → 0.24.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of castor-extractor might be problematic. Click here for more details.

Files changed (24) hide show

CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 # Changelog
+## 0.24.35 - 2025-07-29
+* Coalesce - Fix pagination issue
+## 0.24.34 - 2025-07-02
+* SQLServer: multiple databases
 ## 0.24.33 - 2025-07-10
 * Tableau - Add an option to skip fields ingestion
@@ -10,7 +18,7 @@
 ## 0.24.31 - 2025-07-02
-* Looker Studio: add option to list users via a provided JSON file
+* Looker Studio: add an option to list users via a provided JSON file
 ## 0.24.30 - 2025-06-26

castor_extractor/commands/extract_sqlserver.py CHANGED Viewed

@@ -23,6 +23,16 @@ def main():
         action="store_true",
         help="Skips files already extracted instead of replacing them",
     )
+    parser.add_argument(
+        "--db-allowed",
+        nargs="*",
+        help="List of databases that should be extracted",
+    )
+    parser.add_argument(
+        "--db-blocked",
+        nargs="*",
+        help="List of databases that should not be extracted",
+    )
     parser.set_defaults(skip_existing=False)
     args = parser.parse_args()
@@ -35,4 +45,6 @@ def main():
         password=args.password,
         output_directory=args.output,
         skip_existing=args.skip_existing,
+        db_allowed=args.db_allowed,
+        db_blocked=args.db_blocked,
     )

castor_extractor/transformation/coalesce/client/client.py CHANGED Viewed

@@ -1,31 +1,47 @@
 import logging
+from functools import partial
 from http import HTTPStatus
-from typing import Iterator, Optional
+from typing import Callable, Optional
-from requests import ConnectionError
+from pydantic import ValidationError
 from ....utils import (
     APIClient,
     BearerAuth,
     RequestSafeMode,
     SerializedAsset,
+    fetch_all_pages,
 )
 from ..assets import CoalesceAsset, CoalesceQualityAsset
 from .credentials import CoalesceCredentials
 from .endpoint import (
     CoalesceEndpointFactory,
 )
-from .type import NodeIDToNamesMapping
-from .utils import column_names_per_node, is_test, test_names_per_node
+from .pagination import CoalescePagination
+logger = logging.getLogger(__name__)
+COALESCE_PAGE_SIZE = 300
+COALESCE_PAGE_SIZE_RUN_RESULTS = 1_000
+COALESCE_TIMEOUT_SECONDS = 90
-_LIMIT_MAX = 1_000
 _MAX_ERRORS = 200
-logger = logging.getLogger(__name__)
+COALESCE_SAFE_MODE = RequestSafeMode(
+    status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
+    max_errors=_MAX_ERRORS,
+)
-def _run_result_payload(result: dict, query_result: dict) -> dict:
+def _run_result_payload(
+    environment_id: str,
+    result: dict,
+    query_result: dict,
+) -> dict:
     return {
+        "environment_id": environment_id,
         "node_id": result["nodeID"],
         "node_name": result["name"],
         "test_name": query_result["name"],
@@ -37,13 +53,6 @@ def _run_result_payload(result: dict, query_result: dict) -> dict:
     }
-COALESCE_SAFE_MODE = RequestSafeMode(
-    status_codes=(HTTPStatus.INTERNAL_SERVER_ERROR,),
-    max_errors=_MAX_ERRORS,
-)
-COALESCE_TIMEOUT_SECONDS = 90
 class CoalesceBearerAuth(BearerAuth):
     """Bearer Authentication for Coalesce"""
@@ -69,65 +78,74 @@ class CoalesceClient(APIClient):
             timeout=COALESCE_TIMEOUT_SECONDS,
         )
-    def _fetch_environments(self) -> Iterator[dict]:
-        endpoint = CoalesceEndpointFactory.environments()
-        result = self._get(endpoint=endpoint)
-        return result["data"]
-    def _node_details(self, environment_id: int, node_id: str) -> dict:
-        endpoint = CoalesceEndpointFactory.nodes(
-            environment_id=environment_id, node_id=node_id
+    def _get_paginated(
+        self,
+        endpoint: str,
+        limit: int = COALESCE_PAGE_SIZE,
+        params: Optional[dict] = None,
+    ) -> Callable:
+        return partial(
+            self._get,
+            retry_on_timeout=False,  # explained in the docstring
+            endpoint=endpoint,
+            params={
+                "limit": limit,
+                **(params or dict()),
+            },
         )
-        return self._get(endpoint=endpoint)
+    def _fetch_environments(self) -> SerializedAsset:
+        endpoint = CoalesceEndpointFactory.environments()
+        request = self._get_paginated(endpoint=endpoint)
+        result = fetch_all_pages(request, CoalescePagination)
+        return list(result)
     def _fetch_env_nodes(self, environment_id: int) -> SerializedAsset:
         endpoint = CoalesceEndpointFactory.nodes(environment_id=environment_id)
-        result = self._get(endpoint=endpoint)
-        nodes: list[dict] = []
-        for node in result["data"]:
-            try:
-                details = self._node_details(environment_id, node["id"])
-                nodes.append({**node, **details})
-            except ConnectionError as e:
-                node_id = node["id"]
-                message = f"ConnectionError, environment: {environment_id}, node: {node_id}"
-                logger.warning(message)
-                raise e
-        return nodes
+        request = self._get_paginated(
+            endpoint=endpoint,
+            params={"detail": "true"},
+        )
+        result = fetch_all_pages(request, CoalescePagination)
+        return [
+            {
+                **node,
+                "environment_id": environment_id,
+            }
+            for node in result
+        ]
     def _fetch_all_nodes(self) -> SerializedAsset:
+        environments = self._fetch_environments()
+        total = len(environments)
         nodes: list[dict] = []
-        for environment in self._fetch_environments():
-            environment_id = environment["id"]
-            nodes.extend(self._fetch_env_nodes(environment_id))
+        for index, env in enumerate(environments):
+            env_id = env["id"]
+            logger.info(f"Fetching nodes for env #{env_id} - {index}/{total}")
+            try:
+                nodes.extend(self._fetch_env_nodes(env_id))
+            except ValidationError as e:
+                # 500 Server Error: Internal Server Error on Coalesce API
+                logger.warning(
+                    f"Skipping nodes for {env_id} due to the following Error: {e}"
+                )
+            logger.info(f"{len(nodes)} nodes extracted so far")
         return nodes
     def _fetch_runs(self, starting_from: str) -> SerializedAsset:
-        """
-        fetch runs, per environment;
-        we break per environment to lower the chance of exceeding the 1k limit
-        """
-        runs: list[dict] = []
-        for environment in self._fetch_environments():
-            environment_id = environment["id"]
-            runs.extend(
-                self._fetch_recent_runs_per_env(environment_id, starting_from)
-            )
-        return runs
-    def _fetch_recent_runs_per_env(
-        self, environment_id: int, starting_from: str
-    ) -> SerializedAsset:
         endpoint = CoalesceEndpointFactory.runs()
         params = {
-            "environmentID": environment_id,
-            "limit": _LIMIT_MAX,
             "orderBy": "runEndTime",
             "orderByDirection": "asc",
             "startingFrom": starting_from,
         }
-        result = self._get(endpoint=endpoint, params=params)
-        return result["data"]
+        request = self._get_paginated(
+            endpoint=endpoint,
+            params=params,
+            limit=COALESCE_PAGE_SIZE_RUN_RESULTS,
+        )
+        return list(fetch_all_pages(request, CoalescePagination))
     def _fetch_run_results(self, run_id: str) -> SerializedAsset:
         endpoint = CoalesceEndpointFactory.run_results(run_id)
@@ -136,51 +154,37 @@ class CoalesceClient(APIClient):
     def _run_results_by_run(
         self,
+        environment_id: str,
         run_id: str,
-        test_names: NodeIDToNamesMapping,
-        column_names: NodeIDToNamesMapping,
     ) -> SerializedAsset:
         run_results: list[dict] = []
         for result in self._fetch_run_results(run_id):
-            node_id = result["nodeID"]
             for query_result in result["queryResults"]:
-                _is_test = is_test(
+                if query_result["type"] != "sqlTest":
+                    continue
+                run_result = _run_result_payload(
+                    environment_id,
+                    result,
                     query_result,
-                    node_id,
-                    test_names,
-                    column_names,
                 )
-                if not _is_test:
-                    continue
-                run_result = _run_result_payload(result, query_result)
                 run_results.append(run_result)
         return run_results
-    def _run_results_by_env(
-        self, environment_id: int, starting_from: str
+    def _fetch_all_run_results(
+        self,
+        starting_from: str,
     ) -> SerializedAsset:
         run_results: list[dict] = []
-        nodes = self._fetch_env_nodes(environment_id)
-        test_names = test_names_per_node(nodes)
-        column_names = column_names_per_node(nodes)
-        runs = self._fetch_recent_runs_per_env(environment_id, starting_from)
-        for run in runs:
-            run_id = run["id"]
-            _results = self._run_results_by_run(
-                run_id, test_names, column_names
-            )
-            run_results.extend(_results)
-        return run_results
-    def _fetch_all_run_results(self, starting_from: str) -> SerializedAsset:
-        run_results: list[dict] = []
-        for environment in self._fetch_environments():
-            environment_id = environment["id"]
-            _results = self._run_results_by_env(environment_id, starting_from)
-            run_results.extend(_results)
+        runs = self._fetch_runs(starting_from)
+        total = len(runs)
+        for index, run in enumerate(runs):
+            logger.info(f"Extracting run results ({index}/{total})")
+            run_id = run["id"]
+            environment_id = run["environmentID"]
+            current_results = self._run_results_by_run(environment_id, run_id)
+            run_results.extend(current_results)
         return run_results
     def fetch(

castor_extractor/transformation/coalesce/client/pagination.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Optional, Union
+from ....utils import PaginationModel
+class CoalescePagination(PaginationModel):
+    """
+    Class to handle paginated results for Coalesce
+    See their documentation here
+    https://docs.coalesce.io/docs/api
+    """
+    data: list
+    next: Union[Optional[str], Optional[int]] = None
+    def is_last(self) -> bool:
+        """Stopping condition for the pagination"""
+        return self.next is None
+    def next_page_payload(self):
+        """Payload enabling to generate the request for the next page"""
+        return {"startingFrom": self.next}
+    def page_results(self) -> list:
+        """List of results of the current page"""
+        return self.data

castor_extractor/utils/__init__.py CHANGED Viewed

@@ -19,7 +19,13 @@ from .client import (
     handle_response,
     uri_encode,
 )
-from .collection import deduplicate, empty_iterator, group_by, mapping_from_rows
+from .collection import (
+    deduplicate,
+    empty_iterator,
+    filter_items,
+    group_by,
+    mapping_from_rows,
+)
 from .constants import OUTPUT_DIR
 from .deprecate import deprecate_python
 from .env import from_env

castor_extractor/utils/client/api/pagination.py CHANGED Viewed

@@ -66,16 +66,19 @@ def fetch_all_pages(
     """
     page_number = 1
     response_payload = request()
     paginated_response = pagination_model(**response_payload)
     while not paginated_response.is_last():
         logger.debug(f"Fetching page number {page_number}")
         yield from paginated_response.page_results()
         next_page_parameters = paginated_response.next_page_parameters()
-        new_request = partial(request, **next_page_parameters)
+        request_with_pagination = partial(request, **next_page_parameters)
         if rate_limit:
             sleep(rate_limit)
         paginated_response = pagination_model(
-            current_page_payload=next_page_parameters, **new_request()
+            current_page_payload=next_page_parameters,
+            **request_with_pagination(),
         )
         page_number += 1

castor_extractor/utils/collection.py CHANGED Viewed

@@ -2,6 +2,8 @@ from collections import defaultdict
 from collections.abc import Iterable, Sequence
 from typing import (
     Any,
+    List,
+    Optional,
     TypeVar,
 )
@@ -80,3 +82,27 @@ def deduplicate(
         deduplicated.append(element)
     return deduplicated
+def filter_items(
+    items: Iterable[T],
+    allowed: Optional[Iterable[T]] = None,
+    blocked: Optional[Iterable[T]] = None,
+) -> List[T]:
+    """
+    Filters `items` by excluding any in `blocked` or including only those in `allowed`.
+    If both `allowed` and `blocked` are None, returns all items.
+    If both are provided, raise an error.
+    """
+    items = list(items)
+    if allowed and blocked:
+        raise AttributeError(
+            "Only one of `allowed` and `blocked` can be provided"
+        )
+    if blocked:
+        return [item for item in items if item not in blocked]
+    if allowed:
+        return [item for item in items if item in allowed]
+    return items

castor_extractor/utils/collection_test.py CHANGED Viewed

@@ -1,4 +1,6 @@
-from .collection import deduplicate, mapping_from_rows
+import pytest
+from .collection import deduplicate, filter_items, mapping_from_rows
 def test__mapping_from_rows__basic_mapping():
@@ -72,3 +74,31 @@ def test_deduplicate():
         {"id": "2", "name": "duplicate"},
     ]
     assert deduplicate("id", elements) == [e1, e2, e3]
+def test_sqlserver_databases():
+    databases = [
+        "prod",
+        "staging",
+        "test",
+    ]
+    # 1. No allowed or blocked: should return all
+    result1 = filter_items(databases)
+    assert result1 == [
+        "prod",
+        "staging",
+        "test",
+    ]
+    # 2. Block "prod": only staging and test should remain
+    result2 = filter_items(databases, blocked=["prod"])
+    assert result2 == ["staging", "test"]
+    # 3. Only allow "staging" and "test"
+    result3 = filter_items(databases, allowed=["staging", "test"])
+    assert result3 == ["staging", "test"]
+    # 4. allowed and blocked, should raise
+    with pytest.raises(AttributeError):
+        filter_items(databases, blocked=["prod"], allowed=["staging", "test"])

castor_extractor/visualization/looker_studio/client/queries/query.sql CHANGED Viewed

@@ -29,6 +29,7 @@ WITH ranked_by_datasource AS (
         `{project}.region-{region}.INFORMATION_SCHEMA.JOBS_BY_PROJECT`
     WHERE
         job_type = 'QUERY'
+        AND ARRAY_LENGTH(referenced_tables) > 0
         AND EXISTS (
             SELECT
                 1

castor_extractor/warehouse/sqlserver/client.py CHANGED Viewed

@@ -1,15 +1,20 @@
+import logging
 from collections.abc import Iterator
 from sqlalchemy import text
 from ...utils import ExtractionQuery, SqlalchemyClient, uri_encode
+logger = logging.getLogger(__name__)
 SERVER_URI = "{user}:{password}@{host}:{port}/{database}"
 MSSQL_URI = f"mssql+pymssql://{SERVER_URI}"
 DEFAULT_PORT = 1433
 _KEYS = ("user", "password", "host", "port", "database")
+_SYSTEM_DATABASES = ("master", "model", "msdb", "tempdb", "DBAdmin")
 def _check_key(credentials: dict) -> None:
     for key in _KEYS:
@@ -51,3 +56,13 @@ class MSSQLClient(SqlalchemyClient):
             yield from results
         finally:
             self.close()
+    def get_databases(self) -> list[str]:
+        result = self.execute(
+            ExtractionQuery("SELECT name FROM sys.databases", {})
+        )
+        return [
+            row["name"]
+            for row in result
+            if row["name"] not in _SYSTEM_DATABASES
+        ]

castor_extractor/warehouse/sqlserver/extract.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
-from ...utils import LocalStorage, from_env, write_summary
+from ...utils import LocalStorage, filter_items, from_env, write_summary
 from ..abstract import (
     CATALOG_ASSETS,
     EXTERNAL_LINEAGE_ASSETS,
@@ -51,7 +51,13 @@ def extract_all(**kwargs) -> None:
     client = MSSQLClient(credentials=_credentials(kwargs))
-    query_builder = MSSQLQueryBuilder()
+    databases = filter_items(
+        client.get_databases(), kwargs.get("allowed"), kwargs.get("blocked")
+    )
+    query_builder = MSSQLQueryBuilder(
+        databases=databases,
+    )
     storage = LocalStorage(directory=output_directory)

castor_extractor/warehouse/sqlserver/queries/column.sql CHANGED Viewed

@@ -11,7 +11,7 @@ WITH extended_tables AS (
         table_owner_id = principal_id,
         schema_id
     FROM
-        sys.tables
+        {database}.sys.tables
     UNION
@@ -21,7 +21,7 @@ WITH extended_tables AS (
         table_owner_id = principal_id,
         schema_id
     FROM
-        sys.views
+        {database}.sys.views
     UNION
@@ -31,7 +31,7 @@ WITH extended_tables AS (
         table_owner_id = principal_id,
         schema_id
     FROM
-        sys.external_tables
+        {database}.sys.external_tables
 ),
 /*
 `sys.columns` contains, among others:
@@ -54,11 +54,11 @@ column_ids AS (
         schema_name = ss.name,
         schema_id = ss.schema_id,
         comment = CONVERT(varchar(1024), ep.value)
-    FROM sys.columns AS sc
+    FROM {database}.sys.columns AS sc
     LEFT JOIN extended_tables AS et ON sc.object_id = et.table_id
-    LEFT JOIN sys.schemas AS ss ON et.schema_id = ss.schema_id
-    LEFT JOIN sys.databases AS sd ON sd.name = DB_NAME()
-    LEFT JOIN sys.extended_properties AS ep
+    LEFT JOIN {database}.sys.schemas AS ss ON et.schema_id = ss.schema_id
+    LEFT JOIN {database}.sys.databases AS sd ON sd.name = '{database}'
+    LEFT JOIN {database}.sys.extended_properties AS ep
         ON
             sc.object_id = ep.major_id
             AND sc.column_id = ep.minor_id
@@ -70,9 +70,9 @@ columns AS (
         i.database_name,
         i.database_id,
         schema_name = c.table_schema,
-        i.schema_id,
+        schema_id = CAST(i.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)),
         table_name = c.table_name,
-        i.table_id,
+        table_id = CAST(i.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)) + '_' + CAST(i.table_id AS VARCHAR(10)),
         c.column_name,
         c.data_type,
         c.ordinal_position,
@@ -87,7 +87,7 @@ columns AS (
         i.comment,
         column_id = CONCAT(i.table_id, '.', c.column_name)
     FROM
-        information_schema.columns AS c
+        {database}.information_schema.columns AS c
     LEFT JOIN column_ids AS i
         ON
             (

castor_extractor/warehouse/sqlserver/queries/database.sql CHANGED Viewed

@@ -2,4 +2,4 @@ SELECT
     db.database_id,
     database_name = db.name
 FROM sys.databases AS db
-WHERE db.name = DB_NAME()
+WHERE db.name NOT IN ('master', 'model', 'msdb', 'tempdb', 'DBAdmin');

castor_extractor/warehouse/sqlserver/queries/schema.sql CHANGED Viewed

@@ -3,21 +3,20 @@ WITH ids AS (
     SELECT DISTINCT
         table_catalog,
         table_schema
-    FROM information_schema.tables
-    WHERE table_catalog = DB_NAME()
+    FROM {database}.information_schema.tables
 )
 SELECT
     d.database_id,
     database_name = i.table_catalog,
     schema_name = s.name,
-    s.schema_id,
+    schema_id = CAST(d.database_id AS VARCHAR(10)) + '_' + CAST(s.schema_id AS VARCHAR(10)),
     schema_owner = u.name,
     schema_owner_id = u.uid
-FROM sys.schemas AS s
+FROM {database}.sys.schemas AS s
 INNER JOIN ids AS i
     ON s.name = i.table_schema
-LEFT JOIN sys.sysusers AS u
+LEFT JOIN {database}.sys.sysusers AS u
     ON s.principal_id = u.uid
-LEFT JOIN sys.databases AS d
+LEFT JOIN {database}.sys.databases AS d
     ON i.table_catalog = d.name

castor_extractor/warehouse/sqlserver/queries/table.sql CHANGED Viewed

@@ -11,7 +11,7 @@ WITH extended_tables AS (
         table_owner_id = principal_id,
         schema_id
     FROM
-        sys.tables
+        {database}.sys.tables
     UNION
@@ -21,7 +21,7 @@ WITH extended_tables AS (
         table_owner_id = principal_id,
         schema_id
     FROM
-        sys.views
+        {database}.sys.views
     UNION
@@ -31,14 +31,14 @@ WITH extended_tables AS (
         table_owner_id = principal_id,
         schema_id
     FROM
-        sys.external_tables
+        {database}.sys.external_tables
 ),
 -- Get the row count per table
 partitions AS (
     SELECT
         object_id,
         row_count = SUM(rows)
-    FROM sys.partitions
+    FROM {database}.sys.partitions
     GROUP BY object_id
 ),
 -- Append row count to table properties
@@ -69,13 +69,12 @@ table_ids AS (
         table_owner = u.name,
         row_count,
         comment = CONVERT(varchar(1024), ep.value)
-    FROM
-        extended_tables_with_row_count AS et
-    LEFT JOIN sys.schemas AS ss
+    FROM extended_tables_with_row_count AS et
+    LEFT JOIN {database}.sys.schemas AS ss
         ON et.schema_id = ss.schema_id
-    LEFT JOIN sys.sysusers AS u
+    LEFT JOIN {database}.sys.sysusers AS u
         ON et.table_owner_id = u.uid
-    LEFT JOIN sys.extended_properties AS ep
+    LEFT JOIN {database}.sys.extended_properties AS ep
         ON (
             et.table_id = ep.major_id
             AND ep.minor_id = 0
@@ -91,19 +90,18 @@ meta AS (
         t.table_name,
         t.table_type
     FROM
-        information_schema.tables AS t
-    LEFT JOIN sys.databases AS db
+        {database}.information_schema.tables AS t
+    LEFT JOIN {database}.sys.databases AS db
         ON t.table_catalog = db.name
-    WHERE t.table_catalog = db_name()
 )
 SELECT
     m.database_name,
     m.database_id,
     m.schema_name,
-    i.schema_id,
+    schema_id = CAST(m.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)),
     m.table_name,
-    i.table_id,
+    table_id = CAST(m.database_id AS VARCHAR(10)) + '_' + CAST(i.schema_id AS VARCHAR(10)) + '_' + CAST(i.table_id AS VARCHAR(10)),
     m.table_type,
     i.table_owner,
     i.table_owner_id,

castor_extractor/warehouse/sqlserver/query.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 from typing import Optional
 from ..abstract import (
@@ -7,6 +8,15 @@ from ..abstract import (
     WarehouseAsset,
 )
+logger = logging.getLogger(__name__)
+_DATABASE_REQUIRED = (
+    WarehouseAsset.SCHEMA,
+    WarehouseAsset.TABLE,
+    WarehouseAsset.COLUMN,
+)
 class MSSQLQueryBuilder(AbstractQueryBuilder):
     """
@@ -15,10 +25,29 @@ class MSSQLQueryBuilder(AbstractQueryBuilder):
     def __init__(
         self,
+        databases: list[str],
         time_filter: Optional[TimeFilter] = None,
     ):
         super().__init__(time_filter=time_filter)
+        self._databases = databases
+    @staticmethod
+    def _format(query: ExtractionQuery, values: dict) -> ExtractionQuery:
+        return ExtractionQuery(
+            statement=query.statement.format(**values),
+            params=query.params,
+        )
     def build(self, asset: WarehouseAsset) -> list[ExtractionQuery]:
         query = self.build_default(asset)
-        return [query]
+        if asset not in _DATABASE_REQUIRED:
+            return [query]
+        logger.info(
+            f"\tWill run queries with following database params: {self._databases}",
+        )
+        return [
+            self._format(query, {"database": database})
+            for database in self._databases
+        ]

{castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: castor-extractor
-Version: 0.24.33
+Version: 0.24.35
 Summary: Extract your metadata assets.
 Home-page: https://www.castordoc.com/
 License: EULA
@@ -215,6 +215,14 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
 # Changelog
+## 0.24.35 - 2025-07-29
+* Coalesce - Fix pagination issue
+## 0.24.34 - 2025-07-02
+* SQLServer: multiple databases
 ## 0.24.33 - 2025-07-10
 * Tableau - Add an option to skip fields ingestion
@@ -225,7 +233,7 @@ For any questions or bug report, contact us at [support@coalesce.io](mailto:supp
 ## 0.24.31 - 2025-07-02
-* Looker Studio: add option to list users via a provided JSON file
+* Looker Studio: add an option to list users via a provided JSON file
 ## 0.24.30 - 2025-06-26

{castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-CHANGELOG.md,sha256=jKQMJGiDeDEZG-753wDrtfOoOYa5Db5Liy0AsATdsuc,18779
+CHANGELOG.md,sha256=1S9O_c1LH8T4P78akRxlFS8Tv0i9Jgswy7V9zvd_UQw,18900
 Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
 DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
 LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -24,7 +24,7 @@ castor_extractor/commands/extract_salesforce.py,sha256=3j3YTmMkPAwocR-B1ozJQai0U
 castor_extractor/commands/extract_salesforce_reporting.py,sha256=FdANTNiLkIPdm80XMYxWReHjdycLsIa61pyeCD-sUDk,962
 castor_extractor/commands/extract_sigma.py,sha256=sxewHcZ1Doq35V2qnpX_zCKKXkrb1_9bYjUMg7BOW-k,643
 castor_extractor/commands/extract_snowflake.py,sha256=GwlrRxwEBjHqGs_3bs5vM9fzmv61_iwvBr1KcIgFgWM,2161
-castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
+castor_extractor/commands/extract_sqlserver.py,sha256=-20AlQbJ4W3oQytHLKdN8GX__UkrrQukOgSzy2l1WZY,1483
 castor_extractor/commands/extract_strategy.py,sha256=Q-pUymatPrBFGXobhyUPzFph0-t774-XOpjdCFF1dYo,821
 castor_extractor/commands/extract_tableau.py,sha256=LNtI29LbVk1vp4RNrn89GmdW6R_7QBYunRmkowDhbco,1982
 castor_extractor/commands/extract_thoughtspot.py,sha256=caAYJlH-vK7u5IUB6OKXxcaWfLgc7d_XqnFDWK6YNS4,639
@@ -76,12 +76,10 @@ castor_extractor/transformation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 castor_extractor/transformation/coalesce/__init__.py,sha256=CW_qdtEfwgJRsCyBlk5hNlxwEO-VV6mBXZvkRbND_J8,112
 castor_extractor/transformation/coalesce/assets.py,sha256=pzccYPP66c9PAnVroemx7-6MeRHw7Ft1OlTC6jIamAA,363
 castor_extractor/transformation/coalesce/client/__init__.py,sha256=VRmVpH29rOghtDQnCN7dAdA0dI0Lxseu4BC8rnwM9dU,80
-castor_extractor/transformation/coalesce/client/client.py,sha256=7EVJDDxnIm5_uMHLFZ2PD6JzfebVglKST9IiURwn4vs,6524
+castor_extractor/transformation/coalesce/client/client.py,sha256=3YB82ibaumeSRd510mlrPXKsWefV3lHQQVis9oEK-LQ,6133
 castor_extractor/transformation/coalesce/client/credentials.py,sha256=jbJxjbdPspf-dzYKfeb7oqL_8TXd1nvkJrjAcdAnLPc,548
 castor_extractor/transformation/coalesce/client/endpoint.py,sha256=0uLh7dpA1vsR9qr_50SEYV_-heQE4BwED9oNMgYsL-w,1272
-castor_extractor/transformation/coalesce/client/type.py,sha256=oiiVP9NL0ijTXyQmaB8aJVYckc7m-m8ZgMyNIAduUKE,43
-castor_extractor/transformation/coalesce/client/utils.py,sha256=jbxh3OCbYm3fKZD1QfqX5zm1ZD_jFIrpUQsX8paRP7g,1627
-castor_extractor/transformation/coalesce/client/utils_test.py,sha256=Q00Y1n0Q_sZ0LFnYn98yDGFumBsifzVJSc7_3PSBMfI,1543
+castor_extractor/transformation/coalesce/client/pagination.py,sha256=zynyWCMEzUQ7HA1Q5AP4BAOmxRQI6NA5jCPEo0lHn44,705
 castor_extractor/transformation/dbt/__init__.py,sha256=LHQROlMqYWCc7tcmhdjXtROFpJqUvCg9jPC8avHgD4I,107
 castor_extractor/transformation/dbt/assets.py,sha256=JY1nKEGySZ84wNoe7dnizwAYw2q0t8NVaIfqhB2rSw0,148
 castor_extractor/transformation/dbt/client.py,sha256=BIue1DNAn2b7kHeiXBkGNosq8jZA2DrgjP7Gi5epAPE,5684
@@ -96,7 +94,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
 castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
 castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
 castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
-castor_extractor/utils/__init__.py,sha256=_hC54hBfPH41TTuWMsqQcyYVF7SojrOevW3OAv8M05E,1652
+castor_extractor/utils/__init__.py,sha256=z_BdKTUyuug3I5AzCuSGrAVskfLax4_olfORIjhZw_M,1691
 castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
 castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
 castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
@@ -108,7 +106,7 @@ castor_extractor/utils/client/api/auth.py,sha256=lq0K3UEl1vwIIa_vKTdlpIQPdE5K1-5
 castor_extractor/utils/client/api/auth_test.py,sha256=LlyXytnatg6ZzR4Zkvzk0BH99FYhHX7qn_nyr2MSnDI,1305
 castor_extractor/utils/client/api/client.py,sha256=qmj7KoNqt6F-cmpdaMiz_aVxzwMCgbDNcgzXSbCdu1Y,5183
 castor_extractor/utils/client/api/client_test.py,sha256=FM3ZxsLLfMOBn44cXX6FIgnA31-5TTNIyp9D4LBwtXE,1222
-castor_extractor/utils/client/api/pagination.py,sha256=ph5TYqPiyFGgygsIhCATAHPIQ9UJNZyiTcqlyRdGEno,2460
+castor_extractor/utils/client/api/pagination.py,sha256=tNL89bvgnMJd0ajJA07wTTReH3PJOQm3xsa93SKHFss,2499
 castor_extractor/utils/client/api/pagination_test.py,sha256=jCOgXFXrH-jrCxe2dfk80ZksJF-EtmpJPU11BGabsqk,1385
 castor_extractor/utils/client/api/safe_request.py,sha256=5pvI2WPRDtitX9F1aYcXTIMPNmDikRK9dKTD3ctoeoQ,1774
 castor_extractor/utils/client/api/safe_request_test.py,sha256=LqS5FBxs6lLLcTkcgxIoLb6OinxShHXR5y4CWZpwmwg,2005
@@ -118,8 +116,8 @@ castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9
 castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
 castor_extractor/utils/client/uri.py,sha256=jmP9hY-6PRqdc3-vAOdtll_U6q9VCqSqmBAN6QRs3ZI,150
 castor_extractor/utils/client/uri_test.py,sha256=1XKF6qSseCeD4G4ckaNO07JXfGbt7XUVinOZdpEYrDQ,259
-castor_extractor/utils/collection.py,sha256=FiIJWZZ865oqNjtTm40gQ13R9zh--W2W5YsMBZJf2bk,2334
-castor_extractor/utils/collection_test.py,sha256=XJAGo0Veg0H8wZRCESIkU2t8bXxTNET0BdosomO3-Ls,2104
+castor_extractor/utils/collection.py,sha256=g2HmB0ievvYHWaZ8iEzkcPPkrBFsh6R6b_liBqcsMjc,3044
+castor_extractor/utils/collection_test.py,sha256=mlw33u4VidazQwWxJMvaFeYX3VB5CAj6rqRG-cRsLrw,2884
 castor_extractor/utils/constants.py,sha256=qBQprS9U66mS-RIBXiLujdTSV3WvGv40Bc0khP4Abdk,39
 castor_extractor/utils/deprecate.py,sha256=aBIN2QqZUx5CBNZMFfOUhi8QqtPqRcJtmrN6xqfm-y8,805
 castor_extractor/utils/env.py,sha256=TqdtB50U8LE0993WhhEhpy89TJrHbjtIKjvg6KQ-5q0,596
@@ -197,7 +195,7 @@ castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqN
 castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
 castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Phq378VEaFLD-nyP2_A1wge6HUP45jSthhlNjD7aqSg,4085
 castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
-castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=Ub4rdrJ5WTPWKI-eVmXrNMv0Ktmti4b-93zZBr0xEB0,1426
+castor_extractor/visualization/looker_studio/client/queries/query.sql,sha256=p7fiXu5--BlY1FKnoW2CAQF7kPKjcN1tYf_SwvCZus4,1474
 castor_extractor/visualization/looker_studio/extract.py,sha256=NU48xQ83UtRW3jXKJcvofzqgEM2lHGjtTzjbKOSB50A,4059
 castor_extractor/visualization/looker_studio/extract_test.py,sha256=ZckAxUMuoEjJ9RWkfRvt9M8SxblkQvsq-Grb8GSs-y0,492
 castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
@@ -420,18 +418,18 @@ castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsK
 castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
 castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
 castor_extractor/warehouse/sqlserver/__init__.py,sha256=PdOuYznmvKAbfWAm8UdN47MfEsd9jqPi_dDi3WEo1KY,116
-castor_extractor/warehouse/sqlserver/client.py,sha256=pO5JNykET9luT2h9iAeI2jX-WBkWklXaYmmYIxQMm1o,1601
-castor_extractor/warehouse/sqlserver/extract.py,sha256=2mBNx9clyrhoiirD635BW-5u6pPoxHyIsB071XoZjho,2087
+castor_extractor/warehouse/sqlserver/client.py,sha256=Bjfpw96IKAQfWPiU5SZYEDfetwfkqZrnKbQYoStcnZc,2007
+castor_extractor/warehouse/sqlserver/extract.py,sha256=-LoHY5wAGJk4vutrO3N0_PaRqts7rkEn7pADRHzoxiI,2249
 castor_extractor/warehouse/sqlserver/queries/.sqlfluff,sha256=yy0KQdz8I_67vnXyX8eeWwOWkxTXvHyVKSVwhURktd8,48
-castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=Szdf8hwcDffRTgtD6zf4ZuIyHIVijFgSDk1rZbKI3g8,2480
-castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4eO6ck-smsDYValYMHLf1CTZu_zIqYycN77jqJH5H7E,106
-castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=elM9s02I9d9F5E4MHfJBfria5QT1hHycZHrn06wn9tg,535
-castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx_LGkIf7LX6ojTjI8wgJDxm3f0,2542
+castor_extractor/warehouse/sqlserver/queries/column.sql,sha256=_K5OS63N7fM7kGPudnnjJEnIyaxR1xE2hoZgnJ_A3p8,2763
+castor_extractor/warehouse/sqlserver/queries/database.sql,sha256=4dPeBCn85MEOXr1f-DPXxiI3RvvoE_1n8lsbTs26E0I,150
+castor_extractor/warehouse/sqlserver/queries/schema.sql,sha256=UR3eTiYw7Iq5-GukelnNg_uq6haZ_dwg_SedZfOWUoA,619
+castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=4RgeSkHDWTWRyU2iLxaBR0KuSwIBvb3GbQGdkJYXbn0,2787
 castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
-castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
+castor_extractor/warehouse/sqlserver/query.py,sha256=7sW8cK3JzxPt6faTJ7e4lk9tE4fo_AeCymI-LqsSols,1276
 castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
-castor_extractor-0.24.33.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
-castor_extractor-0.24.33.dist-info/METADATA,sha256=vCEpwDM8sngoUEfrGtRPSjtCjTw6zxJGiJrnmj4eq_Y,26232
-castor_extractor-0.24.33.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-castor_extractor-0.24.33.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
-castor_extractor-0.24.33.dist-info/RECORD,,
+castor_extractor-0.24.35.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
+castor_extractor-0.24.35.dist-info/METADATA,sha256=-vrfKzS5B3r2qL7tjFjFBR-AizzuVIexEVJHCci7Z5s,26353
+castor_extractor-0.24.35.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+castor_extractor-0.24.35.dist-info/entry_points.txt,sha256=_F-qeZCybjoMkNb9ErEhnyqXuG6afHIFQhakdBHZsr4,1803
+castor_extractor-0.24.35.dist-info/RECORD,,

castor_extractor/transformation/coalesce/client/type.py DELETED Viewed

	@@ -1 +0,0 @@
1	- NodeIDToNamesMapping = dict[str, set[str]]

castor_extractor/transformation/coalesce/client/utils.py DELETED Viewed

@@ -1,52 +0,0 @@
-from ....utils import SerializedAsset
-from .type import NodeIDToNamesMapping
-_NULL_SUFFIX = ": Null"
-_UNIQUE_SUFFIX = ": Unique"
-def is_test(
-    query_result: dict,
-    node_id: str,
-    test_names: NodeIDToNamesMapping,
-    column_names: NodeIDToNamesMapping,
-) -> bool:
-    """
-    checks whether a query result is a test result or not.
-    all this implementation can soon be replaced by checking whether
-    query_result['type'] == 'sqlTest', which should be GA Apr 28th 2025
-    """
-    # test scoped on the node (table)
-    result_name = query_result["name"]
-    if result_name in test_names.get(node_id, {}):
-        return True
-    # test scoped on the column
-    if result_name.endswith(_NULL_SUFFIX) or result_name.endswith(
-        _UNIQUE_SUFFIX
-    ):
-        column_name = result_name.split(":")[0]
-        if column_name in column_names.get(node_id, {}):
-            return True
-    return False
-def test_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
-    """mapping nodeID: set(testName)"""
-    mapping: dict[str, set[str]] = {}
-    for node in nodes:
-        node_id = node["id"]
-        tests = node.get("metadata", {}).get("appliedNodeTests", [])
-        mapping[node_id] = {test["name"] for test in tests}
-    return mapping
-def column_names_per_node(nodes: SerializedAsset) -> NodeIDToNamesMapping:
-    """mapping nodeID: set(columnNames)"""
-    mapping: dict[str, set[str]] = {}
-    for node in nodes:
-        node_id = node["id"]
-        columns = node.get("metadata", {}).get("columns", [])
-        mapping[node_id] = {column["name"] for column in columns}
-    return mapping

castor_extractor/transformation/coalesce/client/utils_test.py DELETED Viewed

@@ -1,54 +0,0 @@
-from .utils import is_test
-def test_is_test():
-    test_names = {"some-uuid": {"check-mirrors", "check-seatbelt"}}
-    column_names = {"some-uuid": {"carthago", "delenda", "est"}}
-    happy_node_test = is_test(
-        query_result={"name": "check-mirrors"},
-        node_id="some-uuid",
-        test_names=test_names,
-        column_names=column_names,
-    )
-    assert happy_node_test is True
-    unknown_node_test = is_test(
-        query_result={"name": "check-engine"},
-        node_id="some-uuid",
-        test_names=test_names,
-        column_names=column_names,
-    )
-    assert unknown_node_test is False
-    happy_column_test_unique = is_test(
-        query_result={"name": "carthago: Unique"},
-        node_id="some-uuid",
-        test_names=test_names,
-        column_names=column_names,
-    )
-    assert happy_column_test_unique is True
-    happy_column_test_null = is_test(
-        query_result={"name": "carthago: Null"},
-        node_id="some-uuid",
-        test_names=test_names,
-        column_names=column_names,
-    )
-    assert happy_column_test_null is True
-    unknown_column_test = is_test(
-        query_result={"name": "rome: Unique"},
-        node_id="some-uuid",
-        test_names=test_names,
-        column_names=column_names,
-    )
-    assert unknown_column_test is False
-    unknown_node_id_test = is_test(
-        query_result={"name": "whatever: Unique"},
-        node_id="unknown-uuid",
-        test_names=test_names,
-        column_names=column_names,
-    )
-    assert unknown_node_id_test is False

{castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/LICENCE RENAMED Viewed

File without changes

{castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/WHEEL RENAMED Viewed

File without changes

{castor_extractor-0.24.33.dist-info → castor_extractor-0.24.35.dist-info}/entry_points.txt RENAMED Viewed

File without changes

castor-extractor 0.24.33__py3-none-any.whl → 0.24.35__py3-none-any.whl

Potentially problematic release.

castor-extractor 0.24.33py3-none-any.whl → 0.24.35py3-none-any.whl