clickzetta-semantic-model-generator 1.0.14__tar.gz → 1.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/PKG-INFO +1 -1
  2. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/pyproject.toml +1 -1
  3. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/clickzetta_utils/clickzetta_connector.py +45 -18
  4. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/relationships/discovery.py +9 -1
  5. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/clickzetta_connector_test.py +83 -0
  6. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/LICENSE +0 -0
  7. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/README.md +0 -0
  8. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/__init__.py +0 -0
  9. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/clickzetta_utils/env_vars.py +0 -0
  10. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/clickzetta_utils/utils.py +0 -0
  11. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/data_processing/__init__.py +0 -0
  12. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/data_processing/cte_utils.py +0 -0
  13. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/data_processing/cte_utils_test.py +0 -0
  14. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/data_processing/data_types.py +0 -0
  15. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/data_processing/proto_utils.py +0 -0
  16. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/generate_model.py +0 -0
  17. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/llm/__init__.py +0 -0
  18. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/llm/dashscope_client.py +0 -0
  19. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/llm/enrichment.py +0 -0
  20. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/llm/progress_tracker.py +0 -0
  21. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/output_models/.keep +0 -0
  22. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/protos/semantic_model.proto +0 -0
  23. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/protos/semantic_model_pb2.py +0 -0
  24. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/protos/semantic_model_pb2.pyi +0 -0
  25. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/relationships/__init__.py +0 -0
  26. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/cte_utils_test.py +0 -0
  27. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/generate_model_classification_test.py +0 -0
  28. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/llm_enrichment_test.py +0 -0
  29. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/relationship_discovery_test.py +0 -0
  30. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/relationships_filters_test.py +0 -0
  31. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/samples/validate_yamls.py +0 -0
  32. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/utils_test.py +0 -0
  33. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/validate_model_test.py +0 -0
  34. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/tests/yaml_to_semantic_model_test.py +0 -0
  35. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/validate/context_length.py +0 -0
  36. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/validate/keywords.py +0 -0
  37. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/validate/schema.py +0 -0
  38. {clickzetta_semantic_model_generator-1.0.14 → clickzetta_semantic_model_generator-1.0.15}/semantic_model_generator/validate_model.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: clickzetta-semantic-model-generator
3
- Version: 1.0.14
3
+ Version: 1.0.15
4
4
  Summary: Curate a Semantic Model for ClickZetta Lakehouse
5
5
  License: Apache Software License; BSD License
6
6
  Author: qililiang
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "clickzetta-semantic-model-generator"
3
- version = "1.0.14"
3
+ version = "1.0.15"
4
4
  description = "Curate a Semantic Model for ClickZetta Lakehouse"
5
5
  authors = ["qililiang <qililiang@clickzetta.com>"]
6
6
  license = "Apache Software License; BSD License"
@@ -176,6 +176,25 @@ def _sanitize_identifier(value: Any, fallback: str = "") -> str:
176
176
  return normalized
177
177
 
178
178
 
179
+ def _split_identifier(
180
+ identifier: Any,
181
+ ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
182
+ """
183
+ Split a potentially qualified identifier into catalog, schema, and table parts.
184
+ Returns normalized segments without surrounding quotes/backticks.
185
+ """
186
+
187
+ text = normalize_identifier(identifier)
188
+ if not text:
189
+ return None, None, None
190
+ parts = [part.strip() for part in text.split(".") if part.strip()]
191
+ if len(parts) >= 3:
192
+ return parts[-3], parts[-2], parts[-1]
193
+ if len(parts) == 2:
194
+ return None, parts[0], parts[1]
195
+ return None, None, parts[0]
196
+
197
+
179
198
  def _normalize_column_type(raw: Any) -> str:
180
199
  if raw is None:
181
200
  return ""
@@ -449,8 +468,14 @@ def _build_information_schema_query(
449
468
  if table_schema:
450
469
  where_conditions.append(f"upper(t.table_schema) = '{table_schema.upper()}'")
451
470
  if table_names:
452
- formatted_names = ", ".join(f"'{name.upper()}'" for name in table_names)
453
- where_conditions.append(f"upper(t.table_name) IN ({formatted_names})")
471
+ normalized_names: List[str] = []
472
+ for name in table_names:
473
+ _, _, table_only = _split_identifier(name)
474
+ if table_only:
475
+ normalized_names.append(table_only.upper())
476
+ if normalized_names:
477
+ formatted_names = ", ".join(f"'{name}'" for name in normalized_names)
478
+ where_conditions.append(f"upper(t.table_name) IN ({formatted_names})")
454
479
 
455
480
  where_clause = " AND ".join(where_conditions)
456
481
  return f"""
@@ -490,6 +515,14 @@ def _fetch_columns_via_show(
490
515
  if not table_token:
491
516
  continue
492
517
 
518
+ override_catalog, override_schema, override_table = _split_identifier(table_token)
519
+ table_leaf = override_table or table_token
520
+ if not table_leaf:
521
+ continue
522
+
523
+ catalog_token = override_catalog or catalog
524
+ schema_token_override = override_schema or schema_token
525
+
493
526
  identifier_candidates: List[str] = []
494
527
  seen_identifiers: set[str] = set()
495
528
 
@@ -505,9 +538,9 @@ def _fetch_columns_via_show(
505
538
  identifier_candidates.append(identifier)
506
539
  seen_identifiers.add(identifier)
507
540
 
508
- raw_parts = (catalog, schema_token, table_token)
509
- schema_parts = (schema_token, table_token)
510
- table_parts = (table_token,)
541
+ raw_parts = (catalog_token, schema_token_override, table_leaf)
542
+ schema_parts = (schema_token_override, table_leaf)
543
+ table_parts = (table_leaf,)
511
544
 
512
545
  _add_identifier(raw_parts, quoted=False)
513
546
  _add_identifier(schema_parts, quoted=False)
@@ -582,10 +615,10 @@ def _fetch_columns_via_show(
582
615
  normalized[_TABLE_SCHEMA_COL] = (
583
616
  df[schema_col]
584
617
  if schema_col
585
- else (schema_token or table_schema or "")
618
+ else (schema_token_override or table_schema or "")
586
619
  )
587
620
  normalized[_TABLE_NAME_COL] = (
588
- df[table_col] if table_col else table_token
621
+ df[table_col] if table_col else table_leaf
589
622
  )
590
623
  normalized[_COLUMN_NAME_COL] = (
591
624
  df[column_col] if column_col else df.index.astype(str)
@@ -738,17 +771,11 @@ def fetch_tables_views_in_schema(
738
771
 
739
772
  try:
740
773
  if workspace_token and schema_token:
741
- if is_shared_catalog:
742
- scope = ".".join(
743
- part for part in (workspace_token, schema_token) if part
744
- )
745
- df = session.sql(f"SHOW TABLES IN SHARE {scope}").to_pandas()
746
- else:
747
- scope = join_quoted_identifiers(
748
- workspace_token,
749
- schema_token,
750
- )
751
- df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
774
+ scope = join_quoted_identifiers(
775
+ workspace_token,
776
+ schema_token,
777
+ )
778
+ df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
752
779
  else:
753
780
  df = session.sql("SHOW TABLES").to_pandas()
754
781
  except Exception as exc: # pragma: no cover
@@ -50,7 +50,15 @@ class RelationshipDiscoveryResult:
50
50
  def _normalize_table_names(table_names: Optional[Iterable[str]]) -> Optional[List[str]]:
51
51
  if table_names is None:
52
52
  return None
53
- return [name.upper() for name in table_names]
53
+ normalized: List[str] = []
54
+ for name in table_names:
55
+ parts = [
56
+ part.strip().strip("`").strip('"')
57
+ for part in str(name).split(".")
58
+ if part and part.strip()
59
+ ]
60
+ normalized.append(".".join(parts))
61
+ return normalized
54
62
 
55
63
 
56
64
  def _build_tables_from_dataframe(
@@ -86,3 +86,86 @@ def test_get_valid_columns_falls_back_to_show_columns():
86
86
  assert not df.empty
87
87
  assert df["TABLE_NAME"].iloc[0] == "PARTSUPP"
88
88
  assert df["COLUMN_NAME"].iloc[0] == "PS_PARTKEY"
89
+
90
+
91
+ def test_get_valid_columns_handles_fully_qualified_filters():
92
+ class DummyResult:
93
+ def __init__(self, df: pd.DataFrame):
94
+ self._df = df
95
+
96
+ def to_pandas(self) -> pd.DataFrame:
97
+ return self._df
98
+
99
+ table_df = pd.DataFrame(
100
+ {
101
+ "schema_name": ["S1"],
102
+ "table_name": ["TABLE_ONE"],
103
+ "column_name": ["ID"],
104
+ "data_type": ["INT"],
105
+ "comment": [""],
106
+ }
107
+ )
108
+
109
+ call_log: list[str] = []
110
+
111
+ def sql_side_effect(query: str):
112
+ call_log.append(query)
113
+ if "information_schema" in query:
114
+ raise RuntimeError("info schema unavailable")
115
+ if query == "SHOW COLUMNS IN TEST_WS.S1.TABLE_ONE":
116
+ return DummyResult(table_df)
117
+ raise RuntimeError("unsupported query")
118
+
119
+ session = mock.MagicMock()
120
+ session.sql.side_effect = sql_side_effect
121
+ connector._CATALOG_CATEGORY_CACHE.clear()
122
+
123
+ df = connector.get_valid_schemas_tables_columns_df(
124
+ session=session,
125
+ workspace="TEST_WS",
126
+ table_schema="S1",
127
+ table_names=["TEST_WS.S1.TABLE_ONE"],
128
+ )
129
+
130
+ assert not df.empty
131
+ assert any("SHOW COLUMNS IN TEST_WS.S1.TABLE_ONE" in q for q in call_log)
132
+ assert all("TEST_WS.S1.TEST_WS.S1" not in q for q in call_log)
133
+
134
+
135
+ def test_fetch_tables_views_in_schema_shared_catalog_does_not_use_share_clause():
136
+ class DummyResult:
137
+ def __init__(self, df: pd.DataFrame):
138
+ self._df = df
139
+
140
+ def to_pandas(self) -> pd.DataFrame:
141
+ return self._df
142
+
143
+ tables_df = pd.DataFrame(
144
+ {
145
+ "workspace_name": ["lakehouse_ai"],
146
+ "schema_name": ["schema_for_opencatalog"],
147
+ "table_name": ["czcustomer"],
148
+ "is_view": [False],
149
+ "is_materialized_view": [False],
150
+ }
151
+ )
152
+
153
+ executed_queries: list[str] = []
154
+
155
+ def sql_side_effect(query: str):
156
+ executed_queries.append(query)
157
+ if query.startswith("SHOW TABLES IN"):
158
+ return DummyResult(tables_df)
159
+ raise RuntimeError("Unexpected query")
160
+
161
+ session = mock.MagicMock()
162
+ session.sql.side_effect = sql_side_effect
163
+ connector._CATALOG_CATEGORY_CACHE.clear()
164
+
165
+ with mock.patch.object(connector, "_catalog_category", return_value="SHARED"):
166
+ tables = connector.fetch_tables_views_in_schema(
167
+ session=session, schema_name="lakehouse_ai.schema_for_opencatalog"
168
+ )
169
+
170
+ assert tables == ["lakehouse_ai.schema_for_opencatalog.czcustomer"]
171
+ assert all("IN SHARE" not in query for query in executed_queries)