PyPI - castor-extractor - Versions diffs - 0.16.11__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

castor-extractor 0.16.11py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of castor-extractor might be problematic. Click here for more details.

Files changed (25) hide show

CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,25 @@
 # Changelog
+## 0.17.0 - 2024-06-10
+* Uploader: redirect to the proxy, replace credentials with token
+## 0.16.15 - 2024-06-07
+* Tableau: extract database_name for CustomSQLTables
+## 0.16.14 - 2024-06-06
+* Snowflake: Extract SQL user defined function
+## 0.16.13 - 2024-06-05
+* Tableau: extract database_name for tables
+## 0.16.12 - 2024-06-04
+* Databricks: Extract lineage
 ## 0.16.11 - 2024-06-03
 * Tableau: add extra fields to optimise storage

castor_extractor/commands/upload.py CHANGED Viewed

@@ -13,10 +13,10 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
 def _args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "-c",
-        "--credentials",
+        "-k",
+        "--token",
         required=True,
-        help="""Path to credentials or credentials as string""",
+        help="""API token provided by Castor""",
     )
     parser.add_argument(
         "-s",
@@ -44,7 +44,7 @@ def _args():
     )
     parsed = parser.parse_args()
     return {
-        "credentials": parsed.credentials,
+        "token": parsed.token,
         "source_id": parsed.source_id,
         "file_path": parsed.file_path,
         "directory_path": parsed.directory_path,

castor_extractor/uploader/constant.py CHANGED Viewed

@@ -1,6 +1,13 @@
 from enum import Enum
-EXTRACTION_BUCKET = "extraction-storage"
+from ..utils import RetryStrategy
+# url of the gcs proxy
+INGEST_URL = "https://ingest.castordoc.com"
+RETRY_BASE_MS = 10_000
+RETRY_JITTER_MS = 1_000
+RETRY_STRATEGY = RetryStrategy.LINEAR
 class FileType(Enum):

castor_extractor/uploader/upload.py CHANGED Viewed

@@ -1,83 +1,100 @@
 #!/usr/bin/env python3
-import json
 import logging
 import ntpath
 from datetime import datetime
-from typing import Iterable, Optional, Union
+from typing import Dict, Iterable, Optional, Tuple
 from uuid import UUID
-from google.cloud import storage  # type: ignore
-from .constant import EXTRACTION_BUCKET, PATH_TEMPLATES, FileType
+import requests
+from ..utils.retry import retry
+from .constant import (
+    INGEST_URL,
+    PATH_TEMPLATES,
+    RETRY_BASE_MS,
+    RETRY_JITTER_MS,
+    RETRY_STRATEGY,
+    FileType,
+)
 from .env import get_blob_env
-from .utils import file_exist, iter_files
+from .utils import iter_files
 logger = logging.getLogger(__name__)
+_EXCEPTIONS = (
+    requests.exceptions.Timeout,
+    requests.exceptions.ConnectTimeout,
+)
-def _client(credentials: Union[str, dict]) -> storage.Client:
-    """supports dict, string or path to the JSON file"""
-    if isinstance(credentials, dict):
-        return storage.Client.from_service_account_info(credentials)
-    if file_exist(credentials):
-        return storage.Client.from_service_account_json(credentials)
-    if isinstance(credentials, str):
-        credentials = json.loads(credentials)
-        return storage.Client.from_service_account_info(credentials)
-    raise ValueError("needs path or dict for credentials")
+def _path_and_url(
+    source_id: UUID,
+    file_type: FileType,
+    file_path: str,
+) -> Tuple[str, str]:
-def _path(source_id: UUID, file_type: FileType, file_path: str) -> str:
     now = datetime.utcnow()
     timestamp = int(now.timestamp())
     filename = ntpath.basename(file_path)
     path_template = PATH_TEMPLATES[file_type]
-    return path_template.format(
+    path = path_template.format(
         timestamp=timestamp,
         source_id=source_id,
         filename=filename,
     )
+    url = f"{INGEST_URL}/{path}"
-def _get_blob(
-    credentials: Union[str, dict],
-    source_id: UUID,
-    file_path: str,
-    file_type: FileType,
-) -> storage.Blob:
-    """get the target blob to upload to"""
-    client = _client(credentials)
-    path = _path(source_id, file_type, file_path)
+    return path, url
-    bucket = client.bucket(EXTRACTION_BUCKET)
-    return bucket.blob(path)
+def _headers(token: str) -> Dict:
+    return {
+        "Authorization": f"Token {token}",
+        "Accept": "text/csv, application/json",
+    }
 def _upload(
-    credentials: Union[str, dict],
+    token: str,
     source_id: UUID,
     file_path: str,
     file_type: FileType,
 ) -> None:
     """
-    credentials: path to file or dict
-    source_id: id for the source
-    file_type: type of file to upload
-    file_path: path to the local file to upload
+    Upload the given file to Google Cloud Storage (GCS)
+    - Don't call GCS API directly
+    - Call the ingestion proxy which handles authorisation and uploading
     """
-    timeout, retries = get_blob_env()
+    path, url = _path_and_url(source_id, file_type, file_path)
+    headers = _headers(token)
+    timeout, max_retries = get_blob_env()
-    blob = _get_blob(credentials, source_id, file_path, file_type)
-    with open(file_path, "rb") as f:
-        blob.upload_from_file(f, timeout=timeout, num_retries=retries)
-    logger.info(
-        f"uploaded {file_path} as {file_type.value} to {blob.public_url}",
-    )
+    with open(file_path, "rb") as file_content:
+        @retry(
+            exceptions=_EXCEPTIONS,
+            max_retries=max_retries,
+            base_ms=RETRY_BASE_MS,
+            jitter_ms=RETRY_JITTER_MS,
+            strategy=RETRY_STRATEGY,
+        )
+        def _request_post():
+            response = requests.post(
+                url=url,
+                headers=headers,
+                data=file_content,
+                timeout=timeout,
+            )
+            response.raise_for_status()
+        _request_post()
+    logger.info(f"Uploaded {file_path} as {file_type.value} to {path}")
 def upload_manifest(
-    credentials: Union[str, dict],
+    token: str,
     source_id: UUID,
     file_path: str,
 ) -> None:
@@ -86,11 +103,11 @@ def upload_manifest(
     source_id: id for the source
     file_path: path to the local manifest to upload
     """
-    _upload(credentials, source_id, file_path, FileType.DBT)
+    _upload(token, source_id, file_path, FileType.DBT)
 def upload(
-    credentials: Union[str, dict],
+    token: str,
     source_id: UUID,
     file_type: FileType,
     file_path: Optional[str] = None,
@@ -113,4 +130,4 @@ def upload(
         raise ValueError(message)
     for file_ in files:
-        _upload(credentials, source_id, file_, file_type)
+        _upload(token, source_id, file_, file_type)

castor_extractor/uploader/upload_test.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from uuid import UUID
-from .constant import FileType
-from .upload import _path
+from .constant import INGEST_URL, FileType
+from .upload import _path_and_url
 def test__path():
@@ -9,5 +9,6 @@ def test__path():
     file_type = FileType.VIZ
     file_path = "filename"
-    path = _path(source_id, file_type, file_path)
+    path, url = _path_and_url(source_id, file_type, file_path)
     assert path == f"visualization-{source_id}/{file_path}"
+    assert url == f"{INGEST_URL}/{path}"

castor_extractor/uploader/utils.py CHANGED Viewed

@@ -1,13 +1,23 @@
+import logging
 import os
 from typing import Iterator
+logger = logging.getLogger(__name__)
+_ALLOWED_EXTENSION = (".json", ".csv")
 def iter_files(repository_path: str) -> Iterator[str]:
     """
     Given a repository path yield all files in that repository
+    Removes file whose extension is not allowed
     """
     for file in os.listdir(repository_path):
+        _, ext = os.path.splitext(file)
+        if ext not in _ALLOWED_EXTENSION:
+            logger.info(f"Forbidden file extension : skipping {file}")
+            continue
         file_path = os.path.join(repository_path, file)
         if os.path.isfile(file_path):

castor_extractor/utils/client/api.py CHANGED Viewed

@@ -5,7 +5,7 @@ import requests
 logger = logging.getLogger(__name__)
-DEFAULT_TIMEOUT_MS = 30_000
+DEFAULT_TIMEOUT_S = 30
 # https://requests.readthedocs.io/en/latest/api/#requests.request
 HttpMethod = Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]
@@ -20,7 +20,7 @@ class APIClient:
     def __init__(self, host: str, token: Optional[str] = None):
         self._host = host
         self._token = token or ""
-        self._timeout = DEFAULT_TIMEOUT_MS
+        self._timeout = DEFAULT_TIMEOUT_S
     @staticmethod
     def build_url(host: str, path: str):
@@ -44,7 +44,12 @@ class APIClient:
     ) -> Any:
         logger.debug(f"Calling {method} on {url}")
         result = requests.request(
-            method, url, headers=self._headers(), params=params, json=data
+            method,
+            url,
+            headers=self._headers(),
+            params=params,
+            json=data,
+            timeout=self._timeout,
         )
         result.raise_for_status()

castor_extractor/utils/retry.py CHANGED Viewed

@@ -68,7 +68,8 @@ class Retry(BaseModel):
         self._retry_attempts += 1
         wait_ms = self.base() + self.jitter()
         wait_s = float(wait_ms) / MS_IN_SEC
-        logger.warning(f"Attempting a new call in {wait_s} seconds")
+        msg = f"Attempting a new call in {wait_s} seconds, {self._retry_attempts} attempt(s) / {self.max_retries} max retries"
+        logger.warning(msg)
         time.sleep(wait_s)
         return True
@@ -93,6 +94,7 @@ def retry(
             try:
                 return None, callable(*args, **kwargs)
             except exceptions_ as err:
+                logger.warning(f"Exception within {callable.__name__}")
                 return err, None
         def _func(*args, **kwargs) -> Any:

castor_extractor/visualization/tableau_revamp/client/client.py CHANGED Viewed

@@ -31,8 +31,11 @@ _TSC_ASSETS = (
 # increase the value when extraction is too slow
 # decrease the value when timeouts arise
 _CUSTOM_PAGE_SIZE: Dict[TableauRevampAsset, int] = {
-    # fields and columns are light but volumes are bigger
-    TableauRevampAsset.COLUMN: 200,
+    # for some clients, extraction of columns tend to hit the node limit
+    # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
+    # the workaround is to reduce pagination
+    TableauRevampAsset.COLUMN: 50,
+    # fields are light but volumes are bigger
     TableauRevampAsset.FIELD: 1000,
     TableauRevampAsset.TABLE: 50,
 }

castor_extractor/visualization/tableau_revamp/client/gql_queries.py CHANGED Viewed

@@ -63,12 +63,21 @@ downstreamWorkbooks { id }
 id
 name
 ... on DatabaseTable {
-    connectionType
     fullName
     schema
+    database {
+        connectionType
+        id
+        name
+    }
 }
 ... on CustomSQLTable {
     query
+    database {
+        connectionType
+        id
+        name
+    }
 }
 """

castor_extractor/warehouse/abstract/__init__.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from .asset import (
+    ADDITIONAL_LINEAGE_ASSETS,
     CATALOG_ASSETS,
     EXTERNAL_LINEAGE_ASSETS,
+    FUNCTIONS_ASSETS,
     QUERIES_ASSETS,
     VIEWS_ASSETS,
     SupportedAssets,

castor_extractor/warehouse/abstract/asset.py CHANGED Viewed

@@ -7,6 +7,8 @@ from ...types import ExternalAsset, classproperty
 class WarehouseAsset(ExternalAsset):
     """Assets that can be extracted from warehouses"""
+    ADDITIONAL_COLUMN_LINEAGE = "additional_column_lineage"
+    ADDITIONAL_TABLE_LINEAGE = "additional_table_lineage"
     COLUMN = "column"
     COLUMN_LINEAGE = "column_lineage"  # specific to snowflake
     DATABASE = "database"
@@ -19,22 +21,28 @@ class WarehouseAsset(ExternalAsset):
     ROLE = "role"
     SCHEMA = "schema"
     TABLE = "table"
+    FUNCTION = "function"
     USER = "user"
     VIEW_DDL = "view_ddl"
     @classproperty
     def optional(cls) -> Set["WarehouseAsset"]:
         return {
+            WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
+            WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
             WarehouseAsset.EXTERNAL_COLUMN_LINEAGE,
             WarehouseAsset.EXTERNAL_TABLE_LINEAGE,
+            WarehouseAsset.FUNCTION,
         }
 class WarehouseAssetGroup(Enum):
     """Groups of assets that can be extracted together"""
+    ADDITIONAL_LINEAGE = "additional_lineage"
     CATALOG = "catalog"
     EXTERNAL_LINEAGE = "external_lineage"
+    FUNCTION = "function"
     QUERY = "query"
     ROLE = "role"
     SNOWFLAKE_LINEAGE = "snowflake_lineage"
@@ -53,6 +61,7 @@ CATALOG_ASSETS = (
 )
 # shared by technologies supporting queries
+FUNCTIONS_ASSETS = (WarehouseAsset.FUNCTION,)
 QUERIES_ASSETS = (WarehouseAsset.QUERY,)
 VIEWS_ASSETS = (WarehouseAsset.VIEW_DDL,)
@@ -61,6 +70,11 @@ EXTERNAL_LINEAGE_ASSETS = (
     WarehouseAsset.EXTERNAL_TABLE_LINEAGE,
 )
+ADDITIONAL_LINEAGE_ASSETS = (
+    WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE,
+    WarehouseAsset.ADDITIONAL_TABLE_LINEAGE,
+)
 NON_EXTRACTABLE_ASSETS = {WarehouseAssetGroup.EXTERNAL_LINEAGE}

castor_extractor/warehouse/databricks/client.py CHANGED Viewed

@@ -1,18 +1,38 @@
 import logging
+from concurrent.futures import ThreadPoolExecutor
 from datetime import date
 from functools import partial
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Optional, Set, Tuple, cast
-from ...utils import at_midnight, date_after, mapping_from_rows
+import requests
+from ...utils import (
+    SafeMode,
+    at_midnight,
+    date_after,
+    mapping_from_rows,
+    retry,
+    safe_mode,
+)
 from ...utils.client.api import APIClient
 from ...utils.pager import PagerOnToken
 from ..abstract.time_filter import TimeFilter
 from .credentials import DatabricksCredentials
 from .format import DatabricksFormatter
-from .types import TablesColumns
+from .types import Link, Ostr, OTimestampedLink, TablesColumns, TimestampedLink
 logger = logging.getLogger(__name__)
+_MAX_NUMBER_OF_LINEAGE_ERRORS = 1000
+_MAX_THREADS = 10
+_RETRY_ATTEMPTS = 3
+_RETRY_BASE_MS = 1000
+_RETRY_EXCEPTIONS = [
+    requests.exceptions.ConnectTimeout,
+]
+safe_params = SafeMode((BaseException,), _MAX_NUMBER_OF_LINEAGE_ERRORS)
 def _day_to_epoch_ms(day: date) -> int:
     return int(at_midnight(day).timestamp() * 1000)
@@ -22,6 +42,30 @@ def _day_hour_to_epoch_ms(day: date, hour: int) -> int:
     return int(at_midnight(day).timestamp() * 1000) + (hour * 3600 * 1000)
+class LineageLinks:
+    """
+    helper class that handles lineage deduplication and filtering
+    """
+    def __init__(self):
+        self.lineage: Dict[Link, Ostr] = dict()
+    def add(self, timestamped_link: TimestampedLink) -> None:
+        """
+        keep the most recent lineage link, adding to `self.lineage`
+        """
+        parent, child, timestamp = timestamped_link
+        link = (parent, child)
+        if not self.lineage.get(link):
+            self.lineage[link] = timestamp
+        else:
+            if not timestamp:
+                return
+            # keep most recent link; cast for mypy
+            recent = max(cast(str, self.lineage[link]), cast(str, timestamp))
+            self.lineage[link] = recent
 class DatabricksClient(APIClient):
     """Databricks Client"""
@@ -123,6 +167,198 @@ class DatabricksClient(APIClient):
             columns.extend(c_to_add)
         return tables, columns
+    @staticmethod
+    def _to_table_path(table: dict) -> Ostr:
+        if table.get("name"):
+            return f"{table['catalog_name']}.{table['schema_name']}.{table['name']}"
+        return None
+    @staticmethod
+    def _to_column_path(column: dict) -> Ostr:
+        if column.get("name"):
+            return f"{column['catalog_name']}.{column['schema_name']}.{column['table_name']}.{column['name']}"
+        return None
+    def _link(
+        self, path_from: Ostr, path_to: Ostr, timestamp: Ostr
+    ) -> OTimestampedLink:
+        """exclude missing path and self-lineage"""
+        if (not path_from) or (not path_to):
+            return None
+        is_self_lineage = path_from.lower() == path_to.lower()
+        if is_self_lineage:
+            return None
+        return (path_from, path_to, timestamp)
+    def _single_table_lineage_links(
+        self, table_path: str, single_table_lineage: dict
+    ) -> List[TimestampedLink]:
+        """
+        process databricks lineage API response for a given table
+        returns a list of (parent, child, timestamp)
+        Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
+        we could also have `notebookInfos` or `fileInfo`
+        """
+        links: List[OTimestampedLink] = []
+        # add parent:
+        for link in single_table_lineage.get("upstreams", []):
+            parent = link.get("tableInfo", {})
+            parent_path = self._to_table_path(parent)
+            timestamp: Ostr = parent.get("lineage_timestamp")
+            links.append(self._link(parent_path, table_path, timestamp))
+        # add children:
+        for link in single_table_lineage.get("downstreams", []):
+            child = link.get("tableInfo", {})
+            child_path = self._to_table_path(child)
+            timestamp = child.get("lineage_timestamp")
+            links.append(self._link(table_path, child_path, timestamp))
+        return list(filter(None, links))
+    @safe_mode(safe_params, lambda: [])
+    @retry(
+        exceptions=_RETRY_EXCEPTIONS,
+        max_retries=_RETRY_ATTEMPTS,
+        base_ms=_RETRY_BASE_MS,
+    )
+    def get_single_table_lineage(
+        self, table_path: str
+    ) -> List[TimestampedLink]:
+        """
+        Helper function used in get_lineage_links.
+        Call data lineage API and return the content of the result
+        eg table_path: broward_prd.bronze.account_adjustments
+        FYI: Maximum rate of 50 requests per SECOND
+        """
+        path = "api/2.0/lineage-tracking/table-lineage"
+        payload = {"table_name": table_path, "include_entity_lineage": True}
+        content = self.get(path=path, payload=payload)
+        return self._single_table_lineage_links(table_path, content)
+    def _deduplicate_lineage(self, lineages: List[TimestampedLink]) -> dict:
+        deduplicated_lineage = LineageLinks()
+        for timestamped_link in lineages:
+            deduplicated_lineage.add(timestamped_link)
+        return deduplicated_lineage.lineage
+    def table_lineage(self, tables: List[dict]) -> List[dict]:
+        """
+        Wrapper function that retrieves all table lineage
+        """
+        # retrieve table lineage
+        with ThreadPoolExecutor(max_workers=_MAX_THREADS) as executor:
+            table_paths = [
+                ".".join([table["schema_id"], table["table_name"]])
+                for table in tables
+            ]
+            results = executor.map(self.get_single_table_lineage, table_paths)
+        lineages = [link for links in results for link in links]
+        deduplicated = self._deduplicate_lineage(lineages)
+        return self.formatter.format_lineage(deduplicated)
+    @staticmethod
+    def _paths_for_column_lineage(
+        tables: List[dict], columns: List[dict], table_lineage: List[dict]
+    ) -> List[Tuple[str, str]]:
+        """
+        helper providing a list of candidate columns to look lineage for:
+        we only look for column lineage where there is table lineage
+        """
+        # mapping between table id and its path db.schema.table
+        # table["schema_id"] follows the pattern `db.schema`
+        mapping = {
+            table["id"]: ".".join([table["schema_id"], table["table_name"]])
+            for table in tables
+        }
+        tables_with_lineage: Set[str] = set()
+        for t in table_lineage:
+            tables_with_lineage.add(t["parent_path"])
+            tables_with_lineage.add(t["child_path"])
+        paths_to_return: List[Tuple[str, str]] = []
+        for column in columns:
+            table_path = mapping[column["table_id"]]
+            if table_path not in tables_with_lineage:
+                continue
+            column_ = (table_path, column["column_name"])
+            paths_to_return.append(column_)
+        return paths_to_return
+    def _single_column_lineage_links(
+        self, column_path: str, single_column_lineage: dict
+    ) -> List[TimestampedLink]:
+        """
+        process databricks lineage API response for a given table
+        returns a list of (parent, child, timestamp)
+        Note: in `upstreams` or `downstreams` we only care about `tableInfo`,
+        we could also have `notebookInfos` or `fileInfo`
+        """
+        links: List[OTimestampedLink] = []
+        # add parent:
+        for link in single_column_lineage.get("upstream_cols", []):
+            parent_path = self._to_column_path(link)
+            timestamp: Ostr = link.get("lineage_timestamp")
+            links.append(self._link(parent_path, column_path, timestamp))
+        # add children:
+        for link in single_column_lineage.get("downstream_cols", []):
+            child_path = self._to_column_path(link)
+            timestamp = link.get("lineage_timestamp")
+            links.append(self._link(column_path, child_path, timestamp))
+        return list(filter(None, links))
+    @safe_mode(safe_params, lambda: [])
+    @retry(
+        exceptions=_RETRY_EXCEPTIONS,
+        max_retries=_RETRY_ATTEMPTS,
+        base_ms=_RETRY_BASE_MS,
+    )
+    def get_single_column_lineage(
+        self,
+        names: Tuple[str, str],
+    ) -> List[TimestampedLink]:
+        """
+        Helper function used in get_lineage_links.
+        Call data lineage API and return the content of the result
+        eg table_path: broward_prd.bronze.account_adjustments
+        FYI: Maximum rate of 10 requests per SECOND
+        """
+        table_path, column_name = names
+        api_path = "api/2.0/lineage-tracking/column-lineage"
+        payload = {
+            "table_name": table_path,
+            "column_name": column_name,
+            "include_entity_lineage": True,
+        }
+        content = self.get(path=api_path, payload=payload)
+        column_path = f"{table_path}.{column_name}"
+        return self._single_column_lineage_links(column_path, content)
+    def column_lineage(
+        self, tables: List[dict], columns: List[dict], table_lineage: List[dict]
+    ) -> List[dict]:
+        """
+        Wrapper function that retrieves all column lineage
+        we only try to retrieve column lineage if we found table lineage
+        """
+        candidate_paths = self._paths_for_column_lineage(
+            tables, columns, table_lineage
+        )
+        lineages: List[TimestampedLink] = [
+            link
+            for paths in candidate_paths
+            for link in self.get_single_column_lineage(paths)
+        ]
+        deduplicated = self._deduplicate_lineage(lineages)
+        return self.formatter.format_lineage(deduplicated)
     @staticmethod
     def _time_filter(time_filter: Optional[TimeFilter]) -> dict:
         """time filter to retrieve Databricks' queries"""

castor_extractor/warehouse/databricks/client_test.py CHANGED Viewed

@@ -1,9 +1,16 @@
 from datetime import date
+from unittest.mock import Mock, patch
 from freezegun import freeze_time
 from ..abstract.time_filter import TimeFilter
-from .client import DatabricksClient, _day_hour_to_epoch_ms
+from .client import DatabricksClient, LineageLinks, _day_hour_to_epoch_ms
+from .test_constants import (
+    CLOSER_DATE,
+    MOCK_TABLES_FOR_TABLE_LINEAGE,
+    OLDER_DATE,
+    TABLE_LINEAGE_SIDE_EFFECT,
+)
 def test__day_hour_to_epoch_ms():
@@ -97,3 +104,56 @@ def test_DatabricksClient__match_table_with_user():
     table_without_owner = {"id": 1, "owner_email": None}
     actual = client._match_table_with_user(table_without_owner, user_mapping)
     assert actual == table_without_owner
+@patch(
+    "source.packages.extractor.castor_extractor.warehouse.databricks.client.DatabricksClient.get",
+    side_effect=TABLE_LINEAGE_SIDE_EFFECT,
+)
+def test_DatabricksClient_table_lineage(mock_get):
+    client = DatabricksClient(Mock())
+    lineage = client.table_lineage(MOCK_TABLES_FOR_TABLE_LINEAGE)
+    assert len(lineage) == 2
+    expected_link_1 = {
+        "parent_path": "dev.silver.pre_analytics",
+        "child_path": "dev.silver.analytics",
+        "timestamp": OLDER_DATE,
+    }
+    expected_link_2 = {
+        "parent_path": "dev.bronze.analytics",
+        "child_path": "dev.silver.analytics",
+        "timestamp": CLOSER_DATE,
+    }
+    assert expected_link_1 in lineage
+    assert expected_link_2 in lineage
+def test_LineageLinks_add():
+    links = LineageLinks()
+    timestamped_link = ("parent", "child", None)
+    expected_key = ("parent", "child")
+    links.add(timestamped_link)
+    assert expected_key in links.lineage
+    assert links.lineage[expected_key] is None
+    # we replace None by an actual timestamp
+    timestamped_link = ("parent", "child", OLDER_DATE)
+    links.add(timestamped_link)
+    assert expected_key in links.lineage
+    assert links.lineage[expected_key] == OLDER_DATE
+    # we update with the more recent timestamp
+    timestamped_link = ("parent", "child", CLOSER_DATE)
+    links.add(timestamped_link)
+    assert expected_key in links.lineage
+    assert links.lineage[expected_key] == CLOSER_DATE
+    # we keep the more recent timestamp
+    timestamped_link = ("parent", "child", OLDER_DATE)
+    links.add(timestamped_link)
+    assert expected_key in links.lineage
+    assert links.lineage[expected_key] == CLOSER_DATE

castor_extractor/warehouse/databricks/extract.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Dict, Optional
 from ...utils import AbstractStorage, LocalStorage, write_summary
 from ..abstract import (
+    ADDITIONAL_LINEAGE_ASSETS,
     CATALOG_ASSETS,
     EXTERNAL_LINEAGE_ASSETS,
     QUERIES_ASSETS,
@@ -17,6 +18,7 @@ from .client import DatabricksClient
 from .credentials import to_credentials
 DATABRICKS_ASSETS: SupportedAssets = {
+    WarehouseAssetGroup.ADDITIONAL_LINEAGE: ADDITIONAL_LINEAGE_ASSETS,
     WarehouseAssetGroup.CATALOG: CATALOG_ASSETS,
     WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
     WarehouseAssetGroup.ROLE: (WarehouseAsset.USER,),
@@ -94,6 +96,39 @@ class DatabricksExtractionProcessor:
         logger.info(f"Extracted {len(columns)} columns to {location}")
         return catalog_locations
+    def extract_lineage(self) -> Paths:
+        if self._should_not_reextract(WarehouseAssetGroup.ADDITIONAL_LINEAGE):
+            return self._existing_group_paths(
+                WarehouseAssetGroup.ADDITIONAL_LINEAGE
+            )
+        lineage_locations: Dict[str, str] = dict()
+        # extract catalog
+        databases = self._client.databases()
+        schemas = self._client.schemas(databases)
+        users = self._client.users()
+        tables, columns = self._client.tables_and_columns(schemas, users)
+        logger.info("Extracted pre-requisite catalog. Next comes lineage")
+        # extract table lineage
+        table_lineage = self._client.table_lineage(tables)
+        table_lineage_key = WarehouseAsset.ADDITIONAL_TABLE_LINEAGE.value
+        location = self._storage.put(table_lineage_key, table_lineage)
+        lineage_locations[table_lineage_key] = location
+        msg = f"Extracted {len(table_lineage)} table lineage to {location}"
+        logger.info(msg)
+        # extract column lineage
+        column_lineage = self._client.column_lineage(
+            tables, columns, table_lineage
+        )
+        column_lineage_key = WarehouseAsset.ADDITIONAL_COLUMN_LINEAGE.value
+        location = self._storage.put(column_lineage_key, column_lineage)
+        lineage_locations[column_lineage_key] = location
+        msg = f"Extracted {len(column_lineage)} column lineage to {location}"
+        logger.info(msg)
+        return lineage_locations
     def extract_query(self, time_filter: OTimeFilter = None) -> Paths:
         """extract yesterday's queries and return their location"""
         if self._should_not_reextract(WarehouseAssetGroup.QUERY):
@@ -149,6 +184,7 @@ def extract_all(**kwargs) -> None:
     )
     extractor.extract_catalog()
+    extractor.extract_lineage()
     extractor.extract_query()
     extractor.extract_role()
     extractor.extract_view_ddl()

castor_extractor/warehouse/databricks/format.py CHANGED Viewed

@@ -95,6 +95,19 @@ class DatabricksFormatter:
         return tables, columns
+    @staticmethod
+    def format_lineage(timestamps: dict) -> List[dict]:
+        lineage: List[dict] = []
+        for link, timestamp in timestamps.items():
+            parent_path, child_path = link
+            link_ = {
+                "parent_path": parent_path,
+                "child_path": child_path,
+                "timestamp": timestamp,
+            }
+            lineage.append(link_)
+        return lineage
     @staticmethod
     def format_query(raw_queries: List[dict]) -> List[dict]:
         queries = []

castor_extractor/warehouse/databricks/test_constants.py ADDED Viewed

@@ -0,0 +1,79 @@
+OLDER_DATE = "2024-04-18 20:20:20.0"
+CLOSER_DATE = "2024-04-19 20:20:20.0"
+MOCK_TABLES_FOR_TABLE_LINEAGE = [
+    {
+        "id": "f51ba2ca-8cc3-4de6-8f8b-730359e8f40f",
+        "schema_id": "dev.silver",
+        "table_name": "analytics",
+    },
+    {
+        "id": "4e140bdc-a67c-4b68-8a07-c684657d8b44",
+        "schema_id": "dev.silver",
+        "table_name": "pre_analytics",
+    },
+    {
+        "id": "7d403198-55ea-4a40-9995-6ee2f4c79dfa",
+        "schema_id": "dev.bronze",
+        "table_name": "analytics",
+    },
+]
+_RAW_LINEAGE_DEV_SILVER_ANALYTICS = {
+    "upstreams": [
+        {  # there could be other keys: jobInfos, notebookInfos, queryInfos
+            "tableInfo": {
+                "name": "pre_analytics",
+                "catalog_name": "dev",
+                "schema_name": "silver",
+                "table_type": "PERSISTED_VIEW",  # not used
+                "lineage_timestamp": OLDER_DATE,
+            }
+        },
+        {
+            "tableInfo": {
+                "name": "analytics",
+                "catalog_name": "dev",
+                "schema_name": "bronze",
+                "table_type": "PERSISTED_VIEW",  # not used
+                "lineage_timestamp": CLOSER_DATE,
+            }
+        },
+    ],
+    "downstreams": [],
+}
+_RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS = {
+    "upstreams": [],
+    "downstreams": [
+        {
+            "tableInfo": {
+                "name": "analytics",
+                "catalog_name": "dev",
+                "schema_name": "silver",
+                "table_type": "PERSISTED_VIEW",  # not used
+                "lineage_timestamp": OLDER_DATE,
+            }
+        },
+    ],
+}
+_RAW_LINEAGE_DEV_BRONZE_ANALYTICS = {
+    "upstreams": [],
+    "downstreams": [
+        {
+            "tableInfo": {
+                "name": "analytics",
+                "catalog_name": "dev",
+                "schema_name": "silver",
+                "table_type": "PERSISTED_VIEW",  # not used
+                "lineage_timestamp": OLDER_DATE,
+            }
+        },
+    ],
+}
+# should be in the same order as MOCK_TABLES_FOR_TABLE_LINEAGE
+TABLE_LINEAGE_SIDE_EFFECT: tuple = (
+    _RAW_LINEAGE_DEV_SILVER_ANALYTICS,
+    _RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS,
+    _RAW_LINEAGE_DEV_BRONZE_ANALYTICS,
+)

castor_extractor/warehouse/databricks/types.py CHANGED Viewed

@@ -1,3 +1,8 @@
-from typing import List, Tuple
+from typing import List, Optional, Tuple
+Link = Tuple[str, str]
 TablesColumns = Tuple[List[dict], List[dict]]
+Ostr = Optional[str]
+TimestampedLink = Tuple[str, str, Ostr]
+OTimestampedLink = Optional[TimestampedLink]

castor_extractor/warehouse/snowflake/extract.py CHANGED Viewed

@@ -4,6 +4,7 @@ from ...utils import LocalStorage, from_env, write_summary
 from ..abstract import (
     CATALOG_ASSETS,
     EXTERNAL_LINEAGE_ASSETS,
+    FUNCTIONS_ASSETS,
     QUERIES_ASSETS,
     VIEWS_ASSETS,
     SQLExtractionProcessor,
@@ -20,6 +21,7 @@ logger = logging.getLogger(__name__)
 SNOWFLAKE_ASSETS: SupportedAssets = {
     WarehouseAssetGroup.CATALOG: CATALOG_ASSETS,
+    WarehouseAssetGroup.FUNCTION: FUNCTIONS_ASSETS,
     WarehouseAssetGroup.QUERY: QUERIES_ASSETS,
     WarehouseAssetGroup.VIEW_DDL: VIEWS_ASSETS,
     WarehouseAssetGroup.ROLE: (

castor_extractor/warehouse/snowflake/queries/function.sql ADDED Viewed

@@ -0,0 +1,10 @@
+SELECT
+    f.function_name AS name,
+    CONCAT(f.function_catalog, '.', f.function_schema, '.', f.function_name) AS path,
+    f.argument_signature AS signature,
+    f.function_definition AS definition
+FROM snowflake.account_usage.functions f
+WHERE TRUE
+    AND f.function_catalog NOT IN ('SNOWFLAKE', 'UTIL_DB')
+    AND f.function_language = 'SQL'
+    AND deleted IS NULL

{castor_extractor-0.16.11.dist-info → castor_extractor-0.17.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: castor-extractor
-Version: 0.16.11
+Version: 0.17.0
 Summary: Extract your metadata assets.
 Home-page: https://www.castordoc.com/
 License: EULA

{castor_extractor-0.16.11.dist-info → castor_extractor-0.17.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-CHANGELOG.md,sha256=CuRENmJ6p4IM6b8vrmt6QI8uN8mX4a-FI_hJ4cQkPps,10588
+CHANGELOG.md,sha256=EVZ9vhIVN7HLn5PYkRyBWyT3hk72Nt3i1SghwSipfR4,10957
 Dockerfile,sha256=HcX5z8OpeSvkScQsN-Y7CNMUig_UB6vTMDl7uqzuLGE,303
 LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
 README.md,sha256=uF6PXm9ocPITlKVSh9afTakHmpLx3TvawLf-CbMP3wM,3578
@@ -23,7 +23,7 @@ castor_extractor/commands/extract_snowflake.py,sha256=vYiruxRoo--GeMemOGsSE1w9kc
 castor_extractor/commands/extract_sqlserver.py,sha256=lwhbcNChaXHZgMgSOch3faVr7WJw-sDU6GHl3lzBt_0,1141
 castor_extractor/commands/extract_tableau.py,sha256=u-6UCd-kfXwyhNWYxZusqtgTTYkf4gAJS1vRIYWsAVU,1415
 castor_extractor/commands/file_check.py,sha256=PkXFK-kPoG8PpvBP-PCvVdreqwYw6Y1fTh2QzDxACsM,2684
-castor_extractor/commands/upload.py,sha256=tWN9hUn1aKJrGEmNHI_qjSciFiAoL9x7YolbIsYbg8Y,1956
+castor_extractor/commands/upload.py,sha256=tAyHfIDOdUpD0yMJe2f64nXpaVnIbgYXi4bYx2nNvNU,1922
 castor_extractor/file_checker/__init__.py,sha256=OSt6YLhUT42U_Cp3LCLHMVruwDkksL75Ij13X2UPnVk,119
 castor_extractor/file_checker/column.py,sha256=fMchy5v-Sd-0xuYS0V9mob7wnljslzWLhQGqrKGybdk,3097
 castor_extractor/file_checker/column_test.py,sha256=1j8PxvmvmJgpd-mk30iMYOme32ovPSIn4yCXywFoXrg,1935
@@ -38,16 +38,16 @@ castor_extractor/file_checker/templates/generic_warehouse.py,sha256=zvnWnYB8FNvh
 castor_extractor/logger.py,sha256=ovf1mBEKwbJBskBXoqHbcAomBrp58mUwSrCWtEMlYPM,1197
 castor_extractor/types.py,sha256=-QgiOaq--nXUsYLy_oESDrYbRMxs353-YiQnG1blJvU,1303
 castor_extractor/uploader/__init__.py,sha256=SSRtwjg-dNoxME-RJy9G1flASiUKAC5bH1htq3CURQg,75
-castor_extractor/uploader/constant.py,sha256=hEJlWYx0dyBzgo59XUBKCYIKEODpIc2DyzwAZIiNO8g,718
+castor_extractor/uploader/constant.py,sha256=yTigLHDlYwoRr6CpFIl7ReElFsQd4H-qkluMZJPWSx0,865
 castor_extractor/uploader/env.py,sha256=5HSniVSOYVg4u38O4k8TB_qaJq9s8yJ1hjedkq_gdVg,878
 castor_extractor/uploader/env_test.py,sha256=ClCWWtwd2N-5ClIDUxVMeKkWfhhOTxpppsXUDmdjxSg,472
-castor_extractor/uploader/upload.py,sha256=5Aj3UOx8cpSVvzjYRz7S6nLk249IqUiCia70utU_970,3363
-castor_extractor/uploader/upload_test.py,sha256=BfGjAYEEDBmEcUS6_b3SlKyiQNR1iRf6-qmADDirTJI,328
-castor_extractor/uploader/utils.py,sha256=NCe0tkB28BVhqzOaDhDjaSfODjjcPWB17X6chnvyCWs,478
+castor_extractor/uploader/upload.py,sha256=bTWD1_-hmJ6q1qcEosjZ96wsBtWDnWoCt692NYX_Nko,3228
+castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
+castor_extractor/uploader/utils.py,sha256=Tx_i875L2vJ8btOLV3-L0UMEFiyhH8E5n0XXRyLjO0Y,793
 castor_extractor/utils/__init__.py,sha256=bmzAOc-PKsVreMJtF7DGpPQeHrVqxWel_BblRftt6Ag,1186
 castor_extractor/utils/client/__init__.py,sha256=CRE-xJKm6fVV9dB8ljzB5YoOxX4I1sCD1KSgqs3Y8_Y,161
 castor_extractor/utils/client/abstract.py,sha256=aA5Qcb9TwWDSMq8WpXbGkOB20hehwX2VTpqQAwV76wk,2048
-castor_extractor/utils/client/api.py,sha256=tHa7eC11sS_eOCXhlnvUa2haRfOLENmjKgjB09Ijt0s,1664
+castor_extractor/utils/client/api.py,sha256=z1o4fteWx1HxNTqCYihl9sGkIgSQTbd8lW_B9Y2wyeQ,1742
 castor_extractor/utils/client/api_test.py,sha256=NSMdXg1FLc37erqHp2FZsIsogWVv6lFSs7rDXHikr-E,542
 castor_extractor/utils/client/postgres.py,sha256=n6ulaT222WWPY0_6qAZ0MHF0m91HtI9mMqL71nyygo0,866
 castor_extractor/utils/client/query.py,sha256=O6D5EjD1KmBlwa786Uw4D4kzxx97_HH50xIIeSWt0B8,205
@@ -80,7 +80,7 @@ castor_extractor/utils/pager/pager_on_id_test.py,sha256=CfAXhXaAmCXnm0oflj8_82An
 castor_extractor/utils/pager/pager_on_token.py,sha256=G442SKl4BXJFMPbYIIgCk5M8wl7V3jMg3K1WUUkl0I0,1579
 castor_extractor/utils/pager/pager_on_token_test.py,sha256=w2GCUGKR3cD5lfmtFAsNvExtzxkYdBR0pusBrGKFQ08,2548
 castor_extractor/utils/pager/pager_test.py,sha256=QPBVShSXhkiYZUfnAMs43xnys6CD8pAhL3Jhj-Ov2Xc,1705
-castor_extractor/utils/retry.py,sha256=vYdJMiM-Nr82H1MuD7_KZdqbFz98ffQGqJ4Owbr6mpY,3252
+castor_extractor/utils/retry.py,sha256=OsUS3qysHCkgWge8BgBwyuvoWcJ6pR_RQmQDcHlors4,3410
 castor_extractor/utils/retry_test.py,sha256=nsMttlmyKygVcffX3Hay8U2S1BspkGPiCmzIXPpLKyk,2230
 castor_extractor/utils/safe.py,sha256=jpfIimwdBSVUvU2DPFrhqpKC_DSYwxQqd08MlIkSODY,1967
 castor_extractor/utils/safe_test.py,sha256=IHN1Z761tYMFslYC-2HAfkXmFPh4LYSqNLs4QZwykjk,2160
@@ -244,16 +244,16 @@ castor_extractor/visualization/tableau/usage.py,sha256=LlFwlbEr-EnYUJjKZha99CRCR
 castor_extractor/visualization/tableau_revamp/__init__.py,sha256=a3DGjQhaz17gBqW-E84TAgupKbqLC40y5Ajo1yn-ot4,156
 castor_extractor/visualization/tableau_revamp/assets.py,sha256=owlwaI2E4UKk1YhkaHgaAXx6gu3Op6EqZ7bjp0tHI6s,351
 castor_extractor/visualization/tableau_revamp/client/__init__.py,sha256=wmS9uLtUiqNYVloi0-DgD8d2qzu3RVZEAtWiaDp6G_M,90
-castor_extractor/visualization/tableau_revamp/client/client.py,sha256=T7v84dnT97sFqVdzJdk1aOZ7S6U9u6d-j3KBqVj91eY,9532
+castor_extractor/visualization/tableau_revamp/client/client.py,sha256=RSoHDfz79ma0YJRGpiCihnwLGmoxLzphYrxRVyvByHI,9742
 castor_extractor/visualization/tableau_revamp/client/credentials.py,sha256=fHG32egq6ll2U4BNazalMof_plzfCMQjrN9WOs6kezk,3014
 castor_extractor/visualization/tableau_revamp/client/errors.py,sha256=dTe1shqmWmAXpDpCz-E24m8dGYjt6rvIGV9qQb4jnvI,150
-castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=VP6xXi1mWKDGVnkWPLstLHqc3T4GVSnywyyoT6BJkFY,2153
+castor_extractor/visualization/tableau_revamp/client/gql_queries.py,sha256=-V3ToD5Gi7nmfVB2OxTOZw8dcOiF7_ciSWjjW2UdvvI,2270
 castor_extractor/visualization/tableau_revamp/client/tsc_fields.py,sha256=WsDliPCo-XsQ7wN-j0gpW9bdxCHvgH-aePywiltzfbU,688
 castor_extractor/visualization/tableau_revamp/constants.py,sha256=PcdudAogQhi3e-knalhgliMKjy5ahN0em_-7XSLrnxM,87
 castor_extractor/visualization/tableau_revamp/extract.py,sha256=2SLUxp5okM4AcEJJ61ZgcC2ikfZZl9MH17CEXMXmgl0,1450
 castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-castor_extractor/warehouse/abstract/__init__.py,sha256=QNwFRsLpH6aqVpl37qzklLr62iA85Yx6nZAivHDhpyk,366
-castor_extractor/warehouse/abstract/asset.py,sha256=qe5ugm7fnkvjbzdELRAeywbuKH4OLq2YHlXdjepehxE,2159
+castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
+castor_extractor/warehouse/abstract/asset.py,sha256=Qs7T2Iw7KHgWVT2aAoBfCQ8tB143cUZY-DRUSkpgvGU,2689
 castor_extractor/warehouse/abstract/asset_test.py,sha256=_kd4ybNlWSAdSdEgJKC-jhJTa1nMRa9i8RO3YbqKLM4,758
 castor_extractor/warehouse/abstract/extract.py,sha256=fVBhdE-yMI_g6RBYZcr7q-ZVW7jK7WVkO_GO_KfkRqg,2908
 castor_extractor/warehouse/abstract/query.py,sha256=GAgeISCmAdrkTKzFGO79hQDf6SA6EFrrlW43w-LiXKo,2632
@@ -277,13 +277,14 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
 castor_extractor/warehouse/bigquery/query.py,sha256=hrFfjd5jW2oQnZ6ozlkn-gDe6sCIzu5zSX19T9W6fIk,4162
 castor_extractor/warehouse/bigquery/types.py,sha256=LZVWSmE57lOemNbB5hBRyYmDk9bFAU4nbRaJWALl6N8,140
 castor_extractor/warehouse/databricks/__init__.py,sha256=bTvDxjGQGM2J3hOnVhfNmFP1y8DK0tySiD_EXe5_xWE,200
-castor_extractor/warehouse/databricks/client.py,sha256=FsHlpHZ9JTG92Rf_8Z7277o9HBaAD0CKxSEHiujOgXg,8271
-castor_extractor/warehouse/databricks/client_test.py,sha256=Y-LBveZFRVaaL49Lo2MwbcJReBcYLNRdHtR_w7xWNWQ,3381
+castor_extractor/warehouse/databricks/client.py,sha256=oHR_htE25p5tiAAFZKbF48efo7tqIENW4dAGA7yEqHg,16895
+castor_extractor/warehouse/databricks/client_test.py,sha256=KNp4Hi_CC6GwiW2QDJQQwqALfUebuT9D_qL6FuP_8tY,5246
 castor_extractor/warehouse/databricks/credentials.py,sha256=PpGv5_GP320UQjV_gvaxSpOw58AmqSznmjGhGfe6bdU,655
-castor_extractor/warehouse/databricks/extract.py,sha256=-vJhAIxSu1lD_xGl-GXZYTmc5BGu0aXM3l-U0UghREM,5773
-castor_extractor/warehouse/databricks/format.py,sha256=Nd5L89yWhpIl0OEMV7WK1H3JYUa9WGPC0c-NUOT_uXM,5101
+castor_extractor/warehouse/databricks/extract.py,sha256=VX-3uo5dZucenrg-wnPur3CxOgpC5H7Ds92TO7OTAjc,7379
+castor_extractor/warehouse/databricks/format.py,sha256=2bRy2fa45NW3uk030rmyba4n2Em-NnyZPBurUslEbcw,5522
 castor_extractor/warehouse/databricks/format_test.py,sha256=iPmdJof43fBYL1Sa_fBrCWDQHCHgm7IWCZag1kWkj9E,1970
-castor_extractor/warehouse/databricks/types.py,sha256=T2SyLy9pY_olLtstdC77moPxIiikVsuQLMxh92YMJQo,78
+castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
+castor_extractor/warehouse/databricks/types.py,sha256=hD6gC8oiT3QSWEvbtgUOGK_lLzzz36sEauB3lS_wxlE,218
 castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
 castor_extractor/warehouse/mysql/client.py,sha256=IwoJvbmE5VZkMCP9yHf6ta3_AQPEuBPrZZ3meefbcJs,974
 castor_extractor/warehouse/mysql/client_test.py,sha256=wRTv-3c5chy_HKj-buasNiYOOCIfynYqbabM4Hxdh5E,1052
@@ -334,11 +335,12 @@ castor_extractor/warehouse/snowflake/client.py,sha256=XT0QLVNff_586SDuMe40iu8FCw
 castor_extractor/warehouse/snowflake/client_test.py,sha256=ihWtOOAQfh8pu5JTr_EWfqefKOVIaJXznACURzaU1Qs,1432
 castor_extractor/warehouse/snowflake/credentials.py,sha256=wbUdbx9jVSHzg2kNDhMFuDstbVTyZOcGAwnSeGeFIqs,875
 castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
-castor_extractor/warehouse/snowflake/extract.py,sha256=x-qCz51wAsPyeP91-nuGqT1Q-AH-5iXGUKCiIV6tlFY,2882
+castor_extractor/warehouse/snowflake/extract.py,sha256=fcze0VBe9OOAFSr25T9L6CY506Vm_xDEvvy8NWuLW1s,2956
 castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
 castor_extractor/warehouse/snowflake/queries/column.sql,sha256=pAW2UNnut0a483OY2rjOXCdCtQg0g254g61Bt51CIB4,1803
 castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
 castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
+castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
 castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
 castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
 castor_extractor/warehouse/snowflake/queries/query.sql,sha256=-OYcWUvdPBkpOfezkZaW7hrOdDz3JyoqjNdRm_88Rsk,1779
@@ -368,8 +370,8 @@ castor_extractor/warehouse/synapse/queries/schema.sql,sha256=aX9xNrBD_ydwl-znGSF
 castor_extractor/warehouse/synapse/queries/table.sql,sha256=mCE8bR1Vb7j7SwZW2gafcXidQ2fo1HwxcybA8wP2Kfs,1049
 castor_extractor/warehouse/synapse/queries/user.sql,sha256=sTb_SS7Zj3AXW1SggKPLNMCd0qoTpL7XI_BJRMaEpBg,67
 castor_extractor/warehouse/synapse/queries/view_ddl.sql,sha256=3EVbp5_yTgdByHFIPLHmnoOnqqLE77SrjAwFDvu4e54,249
-castor_extractor-0.16.11.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
-castor_extractor-0.16.11.dist-info/METADATA,sha256=sVbdD6MsgGVPxckw8tREx_xeajevgThiIkuU2IFYBaM,6583
-castor_extractor-0.16.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-castor_extractor-0.16.11.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
-castor_extractor-0.16.11.dist-info/RECORD,,
+castor_extractor-0.17.0.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
+castor_extractor-0.17.0.dist-info/METADATA,sha256=mPiUyxCqXFifcPbhcOPFsnkPAV4OcWXoYzGeUKlbkoo,6582
+castor_extractor-0.17.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+castor_extractor-0.17.0.dist-info/entry_points.txt,sha256=SbyPk58Gh-FRztfCNnUZQ6w7SatzNJFZ6GIJLNsy7tI,1427
+castor_extractor-0.17.0.dist-info/RECORD,,

{castor_extractor-0.16.11.dist-info → castor_extractor-0.17.0.dist-info}/LICENCE RENAMED Viewed

File without changes

{castor_extractor-0.16.11.dist-info → castor_extractor-0.17.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{castor_extractor-0.16.11.dist-info → castor_extractor-0.17.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

castor-extractor 0.16.11__py3-none-any.whl → 0.17.0__py3-none-any.whl

Potentially problematic release.

castor-extractor 0.16.11py3-none-any.whl → 0.17.0py3-none-any.whl