PyPI - clickzetta-semantic-model-generator - Versions diffs - 1.0.13__tar.gz → 1.0.15__tar.gz - Mend

clickzetta-semantic-model-generator 1.0.13tar.gz → 1.0.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{clickzetta_semantic_model_generator-1.0.13 → clickzetta_semantic_model_generator-1.0.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: clickzetta-semantic-model-generator
-Version: 1.0.13
+Version: 1.0.15
 Summary: Curate a Semantic Model for ClickZetta Lakehouse
 License: Apache Software License; BSD License
 Author: qililiang

{clickzetta_semantic_model_generator-1.0.13 → clickzetta_semantic_model_generator-1.0.15}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "clickzetta-semantic-model-generator"
-version = "1.0.13"
+version = "1.0.15"
 description = "Curate a Semantic Model for ClickZetta Lakehouse"
 authors = ["qililiang <qililiang@clickzetta.com>"]
 license = "Apache Software License; BSD License"

{clickzetta_semantic_model_generator-1.0.13 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/clickzetta_utils/clickzetta_connector.py RENAMED Viewed

@@ -4,7 +4,7 @@ import concurrent.futures
 import re
 from collections import defaultdict
 from contextlib import contextmanager
-from typing import Any, Dict, Generator, List, Optional, TypeVar, Union
+from typing import Any, Dict, Generator, List, Optional, Tuple, TypeVar, Union
 import pandas as pd
 from clickzetta.zettapark.session import Session
@@ -176,6 +176,25 @@ def _sanitize_identifier(value: Any, fallback: str = "") -> str:
     return normalized
+def _split_identifier(
+    identifier: Any,
+) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+    """
+    Split a potentially qualified identifier into catalog, schema, and table parts.
+    Returns normalized segments without surrounding quotes/backticks.
+    """
+    text = normalize_identifier(identifier)
+    if not text:
+        return None, None, None
+    parts = [part.strip() for part in text.split(".") if part.strip()]
+    if len(parts) >= 3:
+        return parts[-3], parts[-2], parts[-1]
+    if len(parts) == 2:
+        return None, parts[0], parts[1]
+    return None, None, parts[0]
 def _normalize_column_type(raw: Any) -> str:
     if raw is None:
         return ""
@@ -449,8 +468,14 @@ def _build_information_schema_query(
     if table_schema:
         where_conditions.append(f"upper(t.table_schema) = '{table_schema.upper()}'")
     if table_names:
-        formatted_names = ", ".join(f"'{name.upper()}'" for name in table_names)
-        where_conditions.append(f"upper(t.table_name) IN ({formatted_names})")
+        normalized_names: List[str] = []
+        for name in table_names:
+            _, _, table_only = _split_identifier(name)
+            if table_only:
+                normalized_names.append(table_only.upper())
+        if normalized_names:
+            formatted_names = ", ".join(f"'{name}'" for name in normalized_names)
+            where_conditions.append(f"upper(t.table_name) IN ({formatted_names})")
     where_clause = " AND ".join(where_conditions)
     return f"""
@@ -490,22 +515,39 @@ def _fetch_columns_via_show(
         if not table_token:
             continue
+        override_catalog, override_schema, override_table = _split_identifier(table_token)
+        table_leaf = override_table or table_token
+        if not table_leaf:
+            continue
+        catalog_token = override_catalog or catalog
+        schema_token_override = override_schema or schema_token
         identifier_candidates: List[str] = []
-        fully_qualified = join_quoted_identifiers(
-            *(part for part in (catalog, schema_token, table_token) if part)
-        )
-        if fully_qualified:
-            identifier_candidates.append(fully_qualified)
-        schema_qualified = (
-            join_quoted_identifiers(schema_token, table_token)
-            if schema_token
-            else ""
-        )
-        if schema_qualified:
-            identifier_candidates.append(schema_qualified)
-        bare_identifier = join_quoted_identifiers(table_token)
-        if bare_identifier:
-            identifier_candidates.append(bare_identifier)
+        seen_identifiers: set[str] = set()
+        def _add_identifier(parts: Tuple[str, ...], *, quoted: bool) -> None:
+            tokens = [part.strip() for part in parts if part and part.strip()]
+            if not tokens:
+                return
+            if quoted:
+                identifier = ".".join(quote_identifier(token) for token in tokens)
+            else:
+                identifier = ".".join(tokens)
+            if identifier and identifier not in seen_identifiers:
+                identifier_candidates.append(identifier)
+                seen_identifiers.add(identifier)
+        raw_parts = (catalog_token, schema_token_override, table_leaf)
+        schema_parts = (schema_token_override, table_leaf)
+        table_parts = (table_leaf,)
+        _add_identifier(raw_parts, quoted=False)
+        _add_identifier(schema_parts, quoted=False)
+        _add_identifier(table_parts, quoted=False)
+        _add_identifier(raw_parts, quoted=True)
+        _add_identifier(schema_parts, quoted=True)
+        _add_identifier(table_parts, quoted=True)
         df = pd.DataFrame()
         df_source = ""
@@ -573,10 +615,10 @@ def _fetch_columns_via_show(
         normalized[_TABLE_SCHEMA_COL] = (
             df[schema_col]
             if schema_col
-            else (schema_token or table_schema or "")
+            else (schema_token_override or table_schema or "")
         )
         normalized[_TABLE_NAME_COL] = (
-            df[table_col] if table_col else table_token
+            df[table_col] if table_col else table_leaf
         )
         normalized[_COLUMN_NAME_COL] = (
             df[column_col] if column_col else df.index.astype(str)
@@ -729,17 +771,11 @@ def fetch_tables_views_in_schema(
     try:
         if workspace_token and schema_token:
-            if is_shared_catalog:
-                scope = ".".join(
-                    part for part in (workspace_token, schema_token) if part
-                )
-                df = session.sql(f"SHOW TABLES IN SHARE {scope}").to_pandas()
-            else:
-                scope = join_quoted_identifiers(
-                    workspace_token,
-                    schema_token,
-                )
-                df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
+            scope = join_quoted_identifiers(
+                workspace_token,
+                schema_token,
+            )
+            df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
         else:
             df = session.sql("SHOW TABLES").to_pandas()
     except Exception as exc:  # pragma: no cover

{clickzetta_semantic_model_generator-1.0.13 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/relationships/discovery.py RENAMED Viewed

@@ -50,7 +50,15 @@ class RelationshipDiscoveryResult:
 def _normalize_table_names(table_names: Optional[Iterable[str]]) -> Optional[List[str]]:
     if table_names is None:
         return None
-    return [name.upper() for name in table_names]
+    normalized: List[str] = []
+    for name in table_names:
+        parts = [
+            part.strip().strip("`").strip('"')
+            for part in str(name).split(".")
+            if part and part.strip()
+        ]
+        normalized.append(".".join(parts))
+    return normalized
 def _build_tables_from_dataframe(

{clickzetta_semantic_model_generator-1.0.13 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/clickzetta_connector_test.py RENAMED Viewed

@@ -86,3 +86,86 @@ def test_get_valid_columns_falls_back_to_show_columns():
     assert not df.empty
     assert df["TABLE_NAME"].iloc[0] == "PARTSUPP"
     assert df["COLUMN_NAME"].iloc[0] == "PS_PARTKEY"
+def test_get_valid_columns_handles_fully_qualified_filters():
+    class DummyResult:
+        def __init__(self, df: pd.DataFrame):
+            self._df = df
+        def to_pandas(self) -> pd.DataFrame:
+            return self._df
+    table_df = pd.DataFrame(
+        {
+            "schema_name": ["S1"],
+            "table_name": ["TABLE_ONE"],
+            "column_name": ["ID"],
+            "data_type": ["INT"],
+            "comment": [""],
+        }
+    )
+    call_log: list[str] = []
+    def sql_side_effect(query: str):
+        call_log.append(query)
+        if "information_schema" in query:
+            raise RuntimeError("info schema unavailable")
+        if query == "SHOW COLUMNS IN TEST_WS.S1.TABLE_ONE":
+            return DummyResult(table_df)
+        raise RuntimeError("unsupported query")
+    session = mock.MagicMock()
+    session.sql.side_effect = sql_side_effect
+    connector._CATALOG_CATEGORY_CACHE.clear()
+    df = connector.get_valid_schemas_tables_columns_df(
+        session=session,
+        workspace="TEST_WS",
+        table_schema="S1",
+        table_names=["TEST_WS.S1.TABLE_ONE"],
+    )
+    assert not df.empty
+    assert any("SHOW COLUMNS IN TEST_WS.S1.TABLE_ONE" in q for q in call_log)
+    assert all("TEST_WS.S1.TEST_WS.S1" not in q for q in call_log)
+def test_fetch_tables_views_in_schema_shared_catalog_does_not_use_share_clause():
+    class DummyResult:
+        def __init__(self, df: pd.DataFrame):
+            self._df = df
+        def to_pandas(self) -> pd.DataFrame:
+            return self._df
+    tables_df = pd.DataFrame(
+        {
+            "workspace_name": ["lakehouse_ai"],
+            "schema_name": ["schema_for_opencatalog"],
+            "table_name": ["czcustomer"],
+            "is_view": [False],
+            "is_materialized_view": [False],
+        }
+    )
+    executed_queries: list[str] = []
+    def sql_side_effect(query: str):
+        executed_queries.append(query)
+        if query.startswith("SHOW TABLES IN"):
+            return DummyResult(tables_df)
+        raise RuntimeError("Unexpected query")
+    session = mock.MagicMock()
+    session.sql.side_effect = sql_side_effect
+    connector._CATALOG_CATEGORY_CACHE.clear()
+    with mock.patch.object(connector, "_catalog_category", return_value="SHARED"):
+        tables = connector.fetch_tables_views_in_schema(
+            session=session, schema_name="lakehouse_ai.schema_for_opencatalog"
+        )
+    assert tables == ["lakehouse_ai.schema_for_opencatalog.czcustomer"]
+    assert all("IN SHARE" not in query for query in executed_queries)