PyPI - castor-extractor - Versions diffs - 0.24.2__py3-none-any.whl → 0.24.7__py3-none-any.whl - Mend

castor-extractor 0.24.2py3-none-any.whl → 0.24.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of castor-extractor might be problematic. Click here for more details.

Files changed (21) hide show

CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,29 @@
 # Changelog
+## 0.24.7 - 2025-04-07
+* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
+## 0.24.6 - 2025-04-03
+* Domo - extract cards metadata by batch to prevent from hitting URL max length
+## 0.24.5 - 2025-04-02
+* bump dependencies: google-cloud-storage
+## 0.24.4 - 2025-03-19
+* Snowflake:
+  * improve the list of ignored queries in the query history extraction
+    * ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
+    * ignore queries with empty text
+  * filter out schemas with empty names
+## 0.24.3 - 2025-03-18
+* Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
 ## 0.24.2 - 2025-03-17
 * Rename Revamped Tableau Connector classes

castor_extractor/utils/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from .argument_parser import parse_filled_arguments
+from .batch import batch_of_length
 from .client import (
     AbstractSourceClient,
     APIClient,

castor_extractor/utils/batch.py ADDED Viewed

@@ -0,0 +1,16 @@
+from typing import Iterator, List, TypeVar
+T = TypeVar("T")
+def batch_of_length(
+    elements: List[T],
+    batch_size: int,
+) -> Iterator[List[T]]:
+    """
+    Split the given elements into smaller chunks
+    """
+    assert batch_size > 1, "batch size must be greater or equal to 1"
+    element_count = len(elements)
+    for index in range(0, element_count, batch_size):
+        yield elements[index : min((index + batch_size), element_count)]

castor_extractor/utils/batch_test.py ADDED Viewed

@@ -0,0 +1,27 @@
+import pytest
+from .batch import batch_of_length
+def test_batch_of_length():
+    elements = ["a", "b", "c", "d", "e", "f", "g", "h"]
+    result = list(batch_of_length(elements, 3))
+    assert result == [
+        ["a", "b", "c"],
+        ["d", "e", "f"],
+        ["g", "h"],
+    ]
+    result = list(batch_of_length(elements, 1000))
+    assert result == [
+        elements,
+    ]
+    result = list(batch_of_length(elements, 7))
+    assert result == [
+        ["a", "b", "c", "d", "e", "f", "g"],
+        ["h"],
+    ]
+    with pytest.raises(AssertionError):
+        list(batch_of_length(elements, -12))

castor_extractor/visualization/domo/client/client.py CHANGED Viewed

@@ -9,6 +9,7 @@ import requests
 from ....utils import (
     RequestSafeMode,
     at_midnight,
+    batch_of_length,
     current_date,
     empty_iterator,
     handle_response,
@@ -48,6 +49,8 @@ _RETRY_BASE_MS = 10 * 60 * 1000  # 10 minutes
 _PARENT_FOLDER = "/Dashboards"
+_CARDS_BATCH_SIZE = 100
 logger = logging.getLogger(__name__)
@@ -156,16 +159,19 @@ class DomoClient:
         return all_results
+    def _cards_metadata(self, card_ids: list[int]) -> Iterator[dict]:
+        # batch to avoid hitting the URL max length
+        for batch_card_ids in batch_of_length(card_ids, _CARDS_BATCH_SIZE):
+            endpoint = self._endpoint_factory.cards_metadata(batch_card_ids)
+            yield from self._get_element(endpoint)
     def _datasources(self, card_ids: list[int]) -> RawData:
         """Yields all distinct datasources associated to the given cards"""
         if not card_ids:
             return empty_iterator()
-        endpoint = self._endpoint_factory.cards_metadata(card_ids)
-        cards_metadata = self._get_element(endpoint)
         processed: set[str] = set()
-        for card in cards_metadata:
+        for card in self._cards_metadata(card_ids):
             for datasource in card["datasources"]:
                 id_ = datasource["dataSourceId"]
                 if id_ in processed:

castor_extractor/visualization/tableau/client/client_metadata_api.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 from collections.abc import Iterator
 from typing import Optional
@@ -9,15 +10,14 @@ from ..constants import DEFAULT_PAGE_SIZE
 from .errors import TableauApiError, TableauApiTimeout
 from .gql_queries import FIELDS_QUERIES, GQL_QUERIES, QUERY_TEMPLATE
+logger = logging.getLogger(__name__)
 # increase the value when extraction is too slow
 # decrease the value when timeouts arise
 _CUSTOM_PAGE_SIZE: dict[TableauAsset, int] = {
-    # for some clients, extraction of columns tend to hit the node limit
-    # https://community.tableau.com/s/question/0D54T00000YuK60SAF/metadata-query-nodelimitexceeded-error
-    # the workaround is to reduce pagination
-    TableauAsset.COLUMN: 50,
     # fields are light but volumes are bigger
     TableauAsset.FIELD: 1000,
+    # tables are sometimes heavy
     TableauAsset.TABLE: 50,
 }
@@ -51,8 +51,9 @@ def _check_errors(answer: dict) -> None:
 def gql_query_scroll(
     server,
-    query: str,
     resource: str,
+    fields: str,
+    page_size: int,
 ) -> Iterator[SerializedAsset]:
     """
     Iterate over GQL query results, handling pagination and cursor
@@ -67,23 +68,27 @@ def gql_query_scroll(
         max_retries=_RETRY_COUNT,
         base_ms=_RETRY_BASE_MS,
     )
-    def _call(cursor: Optional[str]) -> dict:
-        # If cursor is defined it must be quoted else use null token
-        token = "null" if cursor is None else f'"{cursor}"'
-        query_ = query.replace("AFTER_TOKEN_SIGNAL", token)
-        answer = server.metadata.query(query_)
+    def _call(first: int, offset: int) -> dict:
+        query = QUERY_TEMPLATE.format(
+            resource=resource,
+            fields=fields,
+            first=first,
+            offset=offset,
+        )
+        answer = server.metadata.query(query)
         _check_errors(answer)
         return answer["data"][f"{resource}Connection"]
-    cursor = None
+    current_offset = 0
     while True:
-        payload = _call(cursor)
+        payload = _call(first=page_size, offset=current_offset)
         yield payload["nodes"]
-        page_info = payload["pageInfo"]
-        if page_info["hasNextPage"]:
-            cursor = page_info["endCursor"]
-        else:
+        current_offset += len(payload["nodes"])
+        total = payload["totalCount"]
+        logger.info(f"Extracted {current_offset}/{total} {resource}")
+        if not payload["pageInfo"]["hasNextPage"]:
             break
@@ -107,12 +112,12 @@ class TableauClientMetadataApi:
         fields: str,
         page_size: int = DEFAULT_PAGE_SIZE,
     ) -> SerializedAsset:
-        query = QUERY_TEMPLATE.format(
+        result_pages = gql_query_scroll(
+            self._server,
             resource=resource,
             fields=fields,
             page_size=page_size,
         )
-        result_pages = gql_query_scroll(self._server, query, resource)
         return [asset for page in result_pages for asset in page]
     def _page_size(self, asset: TableauAsset) -> int:

castor_extractor/visualization/tableau/client/gql_queries.py CHANGED Viewed

@@ -2,7 +2,7 @@ from ..assets import TableauAsset
 QUERY_TEMPLATE = """
 {{
-  {resource}Connection(first: {page_size}, after: AFTER_TOKEN_SIGNAL) {{
+  {resource}Connection(first: {first}, offset: {offset}) {{
     nodes {{ {fields}
     }}
     pageInfo {{

castor_extractor/visualization/thoughtspot/client/client.py CHANGED Viewed

@@ -1,13 +1,17 @@
+import logging
 from collections.abc import Iterator
-from typing import Optional
+from functools import partial
+from typing import Iterable, Optional
 import requests
+from requests import Response
 from ....utils import (
     APIClient,
     BearerAuth,
     RequestSafeMode,
     build_url,
+    fetch_all_pages,
     handle_response,
 )
 from ..assets import (
@@ -19,9 +23,7 @@ from .credentials import (
 from .endpoints import (
     ThoughtspotEndpointFactory,
 )
-from .utils import (
-    usage_liveboard_reader,
-)
+from .pagination import METADATA_BATCH_SIZE, ThoughtSpotPagination
 _AUTH_TIMEOUT_S = 60
 _THOUGHTSPOT_HEADERS = {
@@ -29,7 +31,6 @@ _THOUGHTSPOT_HEADERS = {
     "Accept": "application/json",
     "Content-Type": "application/json",
 }
-_METADATA_BATCH_SIZE = 100
 # https://docs.thoughtspot.com/cloud/latest/object-usage-liveboard
 _OBJECT_USAGE_LIVEBOARD = "Object Usage"
 _ANSWER_USAGE_VIZ = "Answer Usage, by User"
@@ -40,6 +41,9 @@ _LIVEBOARD_USAGE_VIZ = "Popular Liveboards Last 30 Days"
 THOUGHTSPOT_SAFE_MODE = RequestSafeMode()
+logger = logging.getLogger(__name__)
 class ThoughtspotBearerAuth(BearerAuth):
     def __init__(self, host: str, token_payload: dict[str, str]):
         auth_endpoint = ThoughtspotEndpointFactory.authentication()
@@ -86,7 +90,7 @@ class ThoughtspotClient(APIClient):
             search_filters = {
                 "metadata": [{"type": metadata_type}],
                 "include_details": True,
-                "record_size": _METADATA_BATCH_SIZE,
+                "record_size": METADATA_BATCH_SIZE,
                 "record_offset": offset,
             }
             if identifier:
@@ -100,9 +104,9 @@ class ThoughtspotClient(APIClient):
                 data=search_filters,
             )
             yield from metadata
-            if len(metadata) < _METADATA_BATCH_SIZE:
+            if len(metadata) < METADATA_BATCH_SIZE:
                 break
-            offset = offset + _METADATA_BATCH_SIZE
+            offset = offset + METADATA_BATCH_SIZE
     def _get_all_answers(self) -> Iterator[dict]:
         yield from self._metadata_search(metadata_type="ANSWER")
@@ -120,7 +124,7 @@ class ThoughtspotClient(APIClient):
         self,
         liveboard_name: str,
         visualization_name: str,
-    ) -> Iterator[dict]:
+    ) -> Iterator[list[list]]:
         """
         Yields the data of a given visualization in the given liveboard.
         ThoughtSpot maintains two system liveboards with stats about data usage,
@@ -133,29 +137,62 @@ class ThoughtspotClient(APIClient):
         )
         liveboard_id = usage_liveboard["metadata_id"]
-        data = self._post(
-            endpoint=ThoughtspotEndpointFactory.liveboard(),
-            headers={"Accept": "application/octet-stream"},
+        def handler(response: Response) -> dict:
+            response_dict = response.json()
+            contents = response_dict.get("contents", [])
+            if not contents:
+                logger.warning("No data found in response")
+                return dict()
+            return contents[0]
+        request = partial(
+            self._post,
+            endpoint=ThoughtspotEndpointFactory.liveboard_data(),
             data={
                 "metadata_identifier": liveboard_id,
-                "file_format": "CSV",
                 "visualization_identifiers": [visualization_name],
+                "record_offset": 0,
+                "record_size": METADATA_BATCH_SIZE,
             },
-            handler=lambda x: x.text,
+            handler=handler,
         )
-        yield from usage_liveboard_reader(data)
+        yield from fetch_all_pages(request, ThoughtSpotPagination)
     def _get_answer_usages(self) -> Iterator[dict]:
-        return self._get_usages(
+        """
+        Returns the usage data of saved Answers, which is found in a visualization
+        of the "Object Usage" liveboard.
+        Each data row returned by the API is transformed from a list into a dictionary.
+        The columns are explicitly listed here because in the API response,
+        there is a mismatch between the number of column names and the number
+        of values per data row.
+        """
+        data: Iterable[list[list]] = self._get_usages(
             liveboard_name=_OBJECT_USAGE_LIVEBOARD,
             visualization_name=_ANSWER_USAGE_VIZ,
         )
+        columns = (
+            "Answer name",
+            "Number of unique users",
+            "Count of object interactions",
+        )
+        for row in data:
+            yield dict(zip(columns, row))
     def _get_liveboards_usages(self) -> Iterator[dict]:
-        return self._get_usages(
+        """
+        Returns the usage data of Liveboards, which is found in a visualization
+        of the "User Adoption" liveboard.
+        Each data row returned by the API is transformed from a list into a dictionary.
+        See `_get_answer_usages` regarding the columns list.
+        """
+        data: Iterable[list[list]] = self._get_usages(
             liveboard_name=_USER_ADOPTION_LIVEBOARD,
             visualization_name=_LIVEBOARD_USAGE_VIZ,
         )
+        columns = ("Pinboard", "Unique Number of User", "Pinboard Views")
+        for row in data:
+            yield dict(zip(columns, row))
     def fetch(self, asset: ThoughtspotAsset) -> Iterator[dict]:
         if asset == ThoughtspotAsset.ANSWERS:

castor_extractor/visualization/thoughtspot/client/endpoints.py CHANGED Viewed

@@ -8,5 +8,5 @@ class ThoughtspotEndpointFactory:
         return "api/rest/2.0/metadata/search"
     @classmethod
-    def liveboard(cls) -> str:
-        return "api/rest/2.0/report/liveboard"
+    def liveboard_data(cls) -> str:
+        return "api/rest/2.0/metadata/liveboard/data"

castor_extractor/visualization/thoughtspot/client/pagination.py ADDED Viewed

@@ -0,0 +1,25 @@
+from pydantic import ConfigDict, Field
+from ....utils import PaginationModel
+METADATA_BATCH_SIZE = 100
+class ThoughtSpotPagination(PaginationModel):
+    data_rows: list = Field(default_factory=list)
+    record_offset: int
+    record_size: int
+    model_config = ConfigDict(
+        populate_by_name=True,
+        from_attributes=True,
+    )
+    def is_last(self) -> bool:
+        return len(self.data_rows) < METADATA_BATCH_SIZE
+    def next_page_payload(self) -> dict:
+        return {"record_offset": self.record_offset + METADATA_BATCH_SIZE}
+    def page_results(self) -> list:
+        return self.data_rows

castor_extractor/warehouse/snowflake/queries/column.sql CHANGED Viewed

@@ -47,7 +47,9 @@ FROM snowflake.account_usage.columns AS c
     JOIN snowflake.account_usage.tables AS t ON t.table_id = c.table_id
     JOIN tags_agg_columns ta ON c.column_id = ta.column_id
 WHERE TRUE
-    AND COALESCE(c.column_name, '') != ''
+    AND TRIM(COALESCE(c.column_name, '')) != ''
+    AND TRIM(COALESCE(t.table_name, '')) != ''
+    AND TRIM(COALESCE(s.schema_name, '')) != ''
     AND UPPER(c.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
     AND (
         c.deleted IS NULL

castor_extractor/warehouse/snowflake/queries/query.sql CHANGED Viewed

@@ -51,20 +51,28 @@ WHERE TRUE
     AND HOUR(CONVERT_TIMEZONE('UTC', start_time)) BETWEEN :hour_min AND :hour_max
     AND execution_status = 'SUCCESS'
     AND query_text != 'SELECT 1'
+    AND TRIM(COALESCE(query_text, '')) != ''
     AND query_type NOT IN (
-        'SHOW',
-        'USE',
-        'ROLLBACK',
-        'DESCRIBE',
         'ALTER_SESSION',
-        'PUT_FILES',
+        'BEGIN_TRANSACTION',
+        'CALL',
+        'COMMENT',
+        'COMMIT',
         'CREATE', -- create objects: stage|function|schema|procedure|file|storage|pipe|notification integration
-        'SET',
+        'DESCRIBE',
+        'DROP',
+        'EXPLAIN',
+        'GET_FILES',
         'GRANT',
-        'COMMIT',
+        'PUT_FILES',
+        'REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION',
+        'REMOVE_FILES',
+        'REVOKE',
+        'ROLLBACK',
+        'SET',
+        'SHOW',
+        'TRUNCATE_TABLE',
+        'UNDROP',
         'UNLOAD',
-        'GET_FILES',
-        'DROP',
-        'BEGIN_TRANSACTION',
-        'REMOVE_FILES'
+        'USE'
     )

castor_extractor/warehouse/snowflake/queries/schema.sql CHANGED Viewed

@@ -16,6 +16,7 @@ WHERE TRUE
         deleted IS NULL
         OR deleted > CURRENT_TIMESTAMP - INTERVAL '1 day'
     )
+    AND TRIM(COALESCE(schema_name, '')) != ''
     {database_allowed}
     {database_blocked}
     AND CASE {has_fetch_transient} WHEN FALSE THEN NOT s.is_transient::BOOLEAN ELSE TRUE END

castor_extractor/warehouse/snowflake/queries/table.sql CHANGED Viewed

@@ -41,8 +41,8 @@ FROM snowflake.account_usage.tables AS t
     JOIN snowflake.account_usage.schemata AS s ON s.schema_id = t.table_schema_id
     JOIN tags_agg_tables ta ON t.table_id = ta.table_id
 WHERE TRUE
-    AND t.table_name IS NOT NULL
-    AND t.table_name != ''
+    AND TRIM(COALESCE(t.table_name, '')) != ''
+    AND TRIM(COALESCE(s.schema_name, '')) != ''
     AND UPPER(t.table_catalog) NOT IN ('SNOWFLAKE', 'UTIL_DB')
     AND (
         t.deleted IS NULL

{castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: castor-extractor
-Version: 0.24.2
+Version: 0.24.7
 Summary: Extract your metadata assets.
 Home-page: https://www.castordoc.com/
 License: EULA
@@ -35,7 +35,7 @@ Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
 Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
 Requires-Dist: google-auth (>=2,<3)
 Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
-Requires-Dist: google-cloud-storage (>=2,<3)
+Requires-Dist: google-cloud-storage (>=3.1.0,<4.0.0)
 Requires-Dist: google-resumable-media (>=2.0.3,<3.0.0)
 Requires-Dist: googleapis-common-protos (>=1.53.0,<2.0.0)
 Requires-Dist: looker-sdk (>=25.0.0,<26.0.0) ; extra == "looker" or extra == "all"
@@ -51,7 +51,7 @@ Requires-Dist: pymssql (>=2.2.11,<3.0.0) ; extra == "sqlserver" or extra == "all
 Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all"
 Requires-Dist: python-dateutil (>=2.0.0,<=3.0.0)
 Requires-Dist: requests (>=2.0.0,<3.0.0)
-Requires-Dist: setuptools (>=75.6)
+Requires-Dist: setuptools (>=78.1)
 Requires-Dist: snowflake-connector-python (>=3.4.0,<4.0.0) ; extra == "snowflake" or extra == "all"
 Requires-Dist: snowflake-sqlalchemy (!=1.2.5,<2.0.0) ; extra == "snowflake" or extra == "all"
 Requires-Dist: sqlalchemy (>=1.4,<1.5)
@@ -210,6 +210,30 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
 # Changelog
+## 0.24.7 - 2025-04-07
+* Tableau - switch from `cursor` to `offset` pagination to mitigate timeout issues
+## 0.24.6 - 2025-04-03
+* Domo - extract cards metadata by batch to prevent from hitting URL max length
+## 0.24.5 - 2025-04-02
+* bump dependencies: google-cloud-storage
+## 0.24.4 - 2025-03-19
+* Snowflake:
+  * improve the list of ignored queries in the query history extraction
+    * ignore the following query types : CALL, COMMENT, EXPLAIN, REFRESH_DYNAMIC_TABLE_AT_REFRESH_VERSION, REVOKE, TRUNCATE_TABLE, UNDROP
+    * ignore queries with empty text
+  * filter out schemas with empty names
+## 0.24.3 - 2025-03-18
+* Replace ThoughtSpot endpoint `/api/rest/2.0/report/liveboard` with `/api/rest/2.0/metadata/liveboard/data` following the deprecation of the CSV option
 ## 0.24.2 - 2025-03-17
 * Rename Revamped Tableau Connector classes

{castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-CHANGELOG.md,sha256=8iEypB0lozhyFumiedys3lbpowlX3HXCPnK-3QvjueE,15884
+CHANGELOG.md,sha256=UWuENqrKnLu244f4Of6dtZ59XZ7jrLWkcQni3MqXPBg,16667
 Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
 DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
 LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -77,9 +77,11 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
 castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
 castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
 castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
-castor_extractor/utils/__init__.py,sha256=X7WOOgrpGf7Vh8r-7eNGjuC0rKs0g9GTO3d7hZ18gwo,1550
+castor_extractor/utils/__init__.py,sha256=KQkr_CmxWG0Vpu7CaqjbJkffUeEWcyeA9Cbm394Hygk,1585
 castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
 castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
+castor_extractor/utils/batch.py,sha256=SFlLmJgVjV2nVhIrjVIEp8wJ9du4dKKHq8YVYubnwQQ,448
+castor_extractor/utils/batch_test.py,sha256=84JYXOxiTkZFAceVh0mzN6VtKxcqoFPbxkZfIDyLGlg,606
 castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
 castor_extractor/utils/client/abstract.py,sha256=CWF7_afNpEZ3jor-22wXbKIvM20ukHkaDy_uknKz8B0,2075
 castor_extractor/utils/client/api/__init__.py,sha256=vlG7WXznYgLTn3XyMGsyUkgRkup8FbKM14EXJ8mv-b0,264
@@ -146,7 +148,7 @@ castor_extractor/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 castor_extractor/visualization/domo/__init__.py,sha256=1axOCPm4RpdIyUt9LQEvlMvbOPllW8rk63h6EjVgJ0Y,111
 castor_extractor/visualization/domo/assets.py,sha256=bK1urFR2tnlWkVkkhR32mAKMoKbESNlop-CNGx-65PY,206
 castor_extractor/visualization/domo/client/__init__.py,sha256=Do0fU4B8Hhlhahcv734gnJl_ryCztfTBDea7XNCKfB8,72
-castor_extractor/visualization/domo/client/client.py,sha256=vOMBY5dY6N3v55YJPdh9aoiddXnuLnGoFHLE5BeUKSg,9662
+castor_extractor/visualization/domo/client/client.py,sha256=bgzXWUm-UnTIwgyJKaJkoHzQpDYwWCGCe97MsMFw6ng,9930
 castor_extractor/visualization/domo/client/credentials.py,sha256=4gnsk4Tpt3ggdUYbvyNPJEXeCyTy12s-X24P5hFdULg,873
 castor_extractor/visualization/domo/client/endpoints.py,sha256=eIE9oeZ_cmJSWWDuyxh6JaAOs3y5bTJQQ265HYgpulE,2775
 castor_extractor/visualization/domo/client/pagination.py,sha256=ukVkHVzoH4mfZ29H9YcnC2YrdVolP10wv25J6Q3ehRw,821
@@ -264,23 +266,22 @@ castor_extractor/visualization/tableau/__init__.py,sha256=eFI_1hjdkxyUiAYiy3szwy
 castor_extractor/visualization/tableau/assets.py,sha256=HbCRd8VCj1WBEeqg9jwnygnT7xOFJ6PQD7Lq7sV-XR0,635
 castor_extractor/visualization/tableau/client/__init__.py,sha256=P8RKFKOC63WkH5hdEytJOwHS9vzQ8GXreLfXZetmMP8,78
 castor_extractor/visualization/tableau/client/client.py,sha256=zzqhzIqKyJygo4ZNGk6cZh0e6Z9R1W5T0P9un52KC1M,7626
-castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=fIBsSbRTypBABsCoigO2dkKsw4Eu3GrsEPTDfjY8A80,4303
+castor_extractor/visualization/tableau/client/client_metadata_api.py,sha256=VHNV1Q0EVKuiFKm1yKSx4tIuPGww4Mlw3yui2DgKe7I,4196
 castor_extractor/visualization/tableau/client/client_rest_api.py,sha256=x4dNw4PPJdalTlGowwkANwqiS2ZhGxzpQytkHq3KbpY,3988
 castor_extractor/visualization/tableau/client/client_tsc.py,sha256=VI_PJyd1ty3HSYXHHQjshmG2ziowIbrwJRonRPCHbks,1820
 castor_extractor/visualization/tableau/client/credentials.py,sha256=uQICIgeXmLZfOroTgZt7PuKNKTyqQllRGSTcOmIfrKU,1893
 castor_extractor/visualization/tableau/client/errors.py,sha256=ecT8Tit5VtzrOBB9ykblA0nvd75j5-_QDFupjV48zJQ,300
-castor_extractor/visualization/tableau/client/gql_queries.py,sha256=NISarYh33Ij7DhYxqjTdv681AHYpbft8kPwVUQbAZ7U,2190
+castor_extractor/visualization/tableau/client/gql_queries.py,sha256=XJAfhpMZ5S7-AhfpOaoHMHCAdil-l5e5xB-CH4NC38M,2177
 castor_extractor/visualization/tableau/client/rest_fields.py,sha256=ZKYYuMxg9PXhczVXaD4rXNk7dYyWJ1_bVM8FLEXju7s,888
 castor_extractor/visualization/tableau/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
 castor_extractor/visualization/tableau/extract.py,sha256=FnjmmUdNA9MEf3S5Tw37x6ZXxVsK8R3YnVk1UVYbaZk,1423
 castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
 castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
 castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
-castor_extractor/visualization/thoughtspot/client/client.py,sha256=mtwMCPI1-1tyZb1gSYYr-O2QZMTFQwNgillU6ycsOU4,5552
+castor_extractor/visualization/thoughtspot/client/client.py,sha256=lRNkigPV2MTozgBzFkij7mCXMMRqXzPtNs8EEi_f3tk,7127
 castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
-castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
-castor_extractor/visualization/thoughtspot/client/utils.py,sha256=3LgbIWoG1e39VW8rYaV4ot_0EFipziwf3rFAZKxrlEY,1072
-castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=2XysRU7a58KA2JgNwU2j4GPrN0rkN7Gvk8kQCJlYXVk,2469
+castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=XLDGs7v2e2S2VdJX8cQjMh80KNCHb_H5A9I8ejP1ZPs,342
+castor_extractor/visualization/thoughtspot/client/pagination.py,sha256=iosYUJ7ZMT1G_Jm6AXPwczYnXFzS6Yez-B9-tRFiV_w,619
 castor_extractor/visualization/thoughtspot/extract.py,sha256=mcXS0jGFpa50td98AVbbTqxchyI5wDCpB-v1o5iRc3g,1354
 castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
@@ -380,16 +381,16 @@ castor_extractor/warehouse/snowflake/credentials.py,sha256=u0sZ6xPtcZmmvnUsAejJk
 castor_extractor/warehouse/snowflake/credentials_test.py,sha256=Lkc-DHXOvr50KrqAW4nt_x0IA0Mu_CsBVu6ATnzQB6I,673
 castor_extractor/warehouse/snowflake/extract.py,sha256=3yc9kcVtt2c1uWJOJJgeZchV4VmRr9EeYM3W6gl8zQQ,3201
 castor_extractor/warehouse/snowflake/queries/.sqlfluff,sha256=vttrwcr64JVIuvc7WIg9C54cbOkjg_VjXNR7YnTGOPE,31
-castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Wy-arvS_3Dh0HFrzdpRmBsI58mMlN_5U097s5kMNluQ,1781
+castor_extractor/warehouse/snowflake/queries/column.sql,sha256=Ru-yC0s76I9LehOA4aCZ--xz6D9H1Hyr3OZdILOBHAw,1882
 castor_extractor/warehouse/snowflake/queries/column_lineage.sql,sha256=YKBiZ6zySSNcXLDXwm31EjGIIkkkZc0-S6hI1SRM80o,1179
 castor_extractor/warehouse/snowflake/queries/database.sql,sha256=ifZXoKUXtsrGOxml6AcNhA4yybIyatH5va7bcp-lgCU,483
 castor_extractor/warehouse/snowflake/queries/function.sql,sha256=8LRh0ybhd-RldJ8UZspWUm3yv52evq11O2uqIO4KqeQ,372
 castor_extractor/warehouse/snowflake/queries/grant_to_role.sql,sha256=O7AJ1LzoXGDFmiVvQ8EMJ5x8FSAnaxRPdmRyAlEmkUM,272
 castor_extractor/warehouse/snowflake/queries/grant_to_user.sql,sha256=7AalVajU5vRRpIiys1igSwmDXirbwpMTvJr2ihSz2NE,143
-castor_extractor/warehouse/snowflake/queries/query.sql,sha256=-OYcWUvdPBkpOfezkZaW7hrOdDz3JyoqjNdRm_88Rsk,1779
+castor_extractor/warehouse/snowflake/queries/query.sql,sha256=w4T6-TgwUozDgaF3Fk-qex7bDdEIHLkkB5XEe2VJXZQ,1992
 castor_extractor/warehouse/snowflake/queries/role.sql,sha256=D0VvGxLZMwug2SvefhAsNR9YIun0fZvcDWkz891xSYM,96
-castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=HCDEw0Nj_GPHBNH3Ik_5BF4rkD5yBfSyeN9UaiFGrI4,730
-castor_extractor/warehouse/snowflake/queries/table.sql,sha256=qTwkAJ7-kM8vX03RP16U_5_euWW5ZTQAKuiLPsbj2hs,1438
+castor_extractor/warehouse/snowflake/queries/schema.sql,sha256=iLn6_y5rn63KigjE4GEAMp8ZuZZofhMXYGb8saPDGUc,776
+castor_extractor/warehouse/snowflake/queries/table.sql,sha256=CbSLfJAylyyyD3mkGPSLLE7BHrGjlY499kzO9RN0e4Y,1473
 castor_extractor/warehouse/snowflake/queries/user.sql,sha256=88V8eRj1NDaD_ufclsKOHHlqCtBMQHOV54yy6RKJaXk,570
 castor_extractor/warehouse/snowflake/queries/view_ddl.sql,sha256=eWsci_50cxiYIv3N7BKkbXVM3RoIzqSDtohqRnE5kg4,673
 castor_extractor/warehouse/snowflake/query.py,sha256=C2LTdPwBzMQ_zMncg0Kq4_WkoY7K9as5tvxBDrIOlwI,1763
@@ -404,8 +405,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
 castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
 castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
 castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
-castor_extractor-0.24.2.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
-castor_extractor-0.24.2.dist-info/METADATA,sha256=FNJlgmFPbgSmHoVwHx-hXj9rvHYw2wctlcEXeGck52I,23040
-castor_extractor-0.24.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-castor_extractor-0.24.2.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
-castor_extractor-0.24.2.dist-info/RECORD,,
+castor_extractor-0.24.7.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
+castor_extractor-0.24.7.dist-info/METADATA,sha256=qWp3OBv1FO123RJqz2YKTEd12WzhKoDmcxVZLhvzn6M,23831
+castor_extractor-0.24.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+castor_extractor-0.24.7.dist-info/entry_points.txt,sha256=FQNShG4w4nRO95_bZnagh7FQ2oiZ-40bdt8ZdTW1-uI,1731
+castor_extractor-0.24.7.dist-info/RECORD,,

castor_extractor/visualization/thoughtspot/client/utils.py DELETED Viewed

@@ -1,31 +0,0 @@
-import csv
-import re
-from collections.abc import Iterator
-from io import StringIO
-_END_OF_GENERATED_TEXT = r'^""$'
-def usage_liveboard_reader(usage_liveboard_csv: str) -> Iterator[dict]:
-    """
-    Converts a CSV string into an iterator of dictionaries after
-    ignoring the generated text that preceeds the actual CSV header row.
-    The generated block ends with a row containing only two double quotes.
-    Here is an example:
-        "Data extract produced by Castor on 09/19/2024 06:54"
-        "Filters applied on data :"
-        "User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
-        "Pinboard NOT IN [mlm - availability pinboard,null]"
-        "Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
-        "Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
-        ""
-    """
-    csv_file = StringIO(usage_liveboard_csv)
-    line = next(csv_file)
-    while not re.match(_END_OF_GENERATED_TEXT, line.strip()):
-        line = next(csv_file)
-    yield from csv.DictReader(csv_file)

castor_extractor/visualization/thoughtspot/client/utils_test.py DELETED Viewed

@@ -1,75 +0,0 @@
-from .utils import (
-    usage_liveboard_reader,
-)
-VALID_CSV_1 = '''"Data extract produced by Castor on 09/19/2024 06:54"
-"Filters applied on data :"
-"User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
-"Pinboard NOT IN [mlm - availability pinboard,null]"
-"Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
-"Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
-""
-"Pinboard","Pinboard Views","Unique Number of User"
-"Market Report","559","19"
-"Retailer report","204","14"
-"Second-hand market","72","6"
-"September test","25","2"'''
-VALID_CSV_2 = '''"Data extract produced by Castor on 01/07/2025 16:07"
-"Filters applied on data :"
-"Timestamp >= 20241208 00:00:00 < 20250107 00:00:00"
-""
-"Answer name","User name","Number of unique users","Count of object interactions"
-"toto","tata","1","666"'''
-# Invalid CSV input (missing data rows)
-INVALID_CSV = '''"Data extract produced by Castor on 09/19/2024 06:54"
-"Filters applied on data :"
-"User Action IN [pinboard_embed_view,pinboard_tspublic_no_runtime_filter,pinboard_tspublic_runtime_filter,pinboard_view]"
-"Pinboard NOT IN [mlm - availability pinboard,null]"
-"Timestamp >= 20240820 00:00:00 < 20240919 00:00:00"
-"Timestamp >= 20240919 00:00:00 < 20240920 00:00:00"
-""'''
-def test_usage_liveboard_reader():
-    expected_output_1 = [
-        {
-            "Pinboard": "Market Report",
-            "Pinboard Views": "559",
-            "Unique Number of User": "19",
-        },
-        {
-            "Pinboard": "Retailer report",
-            "Pinboard Views": "204",
-            "Unique Number of User": "14",
-        },
-        {
-            "Pinboard": "Second-hand market",
-            "Pinboard Views": "72",
-            "Unique Number of User": "6",
-        },
-        {
-            "Pinboard": "September test",
-            "Pinboard Views": "25",
-            "Unique Number of User": "2",
-        },
-    ]
-    expected_output_2 = [
-        {
-            "Answer name": "toto",
-            "User name": "tata",
-            "Number of unique users": "1",
-            "Count of object interactions": "666",
-        }
-    ]
-    result = list(usage_liveboard_reader(VALID_CSV_1))
-    assert result == expected_output_1
-    result = list(usage_liveboard_reader(VALID_CSV_2))
-    assert result == expected_output_2
-    result = list(usage_liveboard_reader(INVALID_CSV))
-    assert result == []  # Expect an empty result since there is no data

{castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/LICENCE RENAMED Viewed

File without changes

{castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{castor_extractor-0.24.2.dist-info → castor_extractor-0.24.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

castor-extractor 0.24.2__py3-none-any.whl → 0.24.7__py3-none-any.whl

Potentially problematic release.

castor-extractor 0.24.2py3-none-any.whl → 0.24.7py3-none-any.whl