clickzetta-semantic-model-generator 1.0.4__tar.gz → 1.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/PKG-INFO +1 -1
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/pyproject.toml +1 -1
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/clickzetta_utils/clickzetta_connector.py +36 -11
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/relationships/discovery.py +33 -3
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/relationship_discovery_test.py +44 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/LICENSE +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/README.md +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/__init__.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/clickzetta_utils/env_vars.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/clickzetta_utils/utils.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/__init__.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/cte_utils.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/cte_utils_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/data_types.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/proto_utils.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/generate_model.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/__init__.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/dashscope_client.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/enrichment.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/progress_tracker.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/output_models/.keep +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/protos/semantic_model.proto +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/protos/semantic_model_pb2.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/protos/semantic_model_pb2.pyi +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/relationships/__init__.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/clickzetta_connector_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/cte_utils_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/generate_model_classification_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/llm_enrichment_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/relationships_filters_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/samples/validate_yamls.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/utils_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/validate_model_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/yaml_to_semantic_model_test.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate/context_length.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate/keywords.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate/schema.py +0 -0
- {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate_model.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "clickzetta-semantic-model-generator"
|
3
|
-
version = "1.0.
|
3
|
+
version = "1.0.6"
|
4
4
|
description = "Curate a Semantic Model for ClickZetta Lakehouse"
|
5
5
|
authors = ["qililiang <qililiang@clickzetta.com>"]
|
6
6
|
license = "Apache Software License; BSD License"
|
@@ -480,15 +480,30 @@ def _fetch_columns_via_show(
|
|
480
480
|
return pd.DataFrame()
|
481
481
|
|
482
482
|
rows: List[pd.DataFrame] = []
|
483
|
-
|
484
|
-
|
483
|
+
category = _catalog_category(session, workspace)
|
484
|
+
is_shared_catalog = category in {"SHARED", "EXTERNAL"}
|
485
|
+
catalog = workspace if is_shared_catalog else workspace.upper()
|
486
|
+
schema = (
|
487
|
+
table_schema or ""
|
488
|
+
)
|
489
|
+
if schema and not is_shared_catalog:
|
490
|
+
schema = schema.upper()
|
485
491
|
|
486
492
|
for table_name in table_names:
|
487
493
|
qualified_parts = [
|
488
|
-
part
|
494
|
+
part
|
495
|
+
for part in (
|
496
|
+
catalog,
|
497
|
+
schema,
|
498
|
+
table_name.upper() if not is_shared_catalog else table_name,
|
499
|
+
)
|
500
|
+
if part
|
489
501
|
]
|
490
502
|
qualified_table = ".".join(qualified_parts)
|
491
|
-
|
503
|
+
if is_shared_catalog:
|
504
|
+
query = f"SHOW COLUMNS IN SHARE {qualified_table}"
|
505
|
+
else:
|
506
|
+
query = f"SHOW COLUMNS IN {qualified_table}"
|
492
507
|
try:
|
493
508
|
df = session.sql(query).to_pandas()
|
494
509
|
except Exception as exc:
|
@@ -646,13 +661,25 @@ def fetch_tables_views_in_schema(
|
|
646
661
|
parts = schema_name.split(".", maxsplit=1)
|
647
662
|
workspace = parts[0]
|
648
663
|
schema = parts[1] if len(parts) > 1 else ""
|
649
|
-
|
650
|
-
|
664
|
+
category = _catalog_category(session, workspace)
|
665
|
+
is_shared_catalog = category in {"SHARED", "EXTERNAL"}
|
666
|
+
|
667
|
+
workspace_token = workspace if is_shared_catalog else workspace.upper()
|
668
|
+
schema_token = schema if is_shared_catalog else schema.upper()
|
651
669
|
|
652
670
|
try:
|
653
|
-
if
|
654
|
-
|
655
|
-
|
671
|
+
if workspace_token and schema_token:
|
672
|
+
if is_shared_catalog:
|
673
|
+
scope = ".".join(
|
674
|
+
part for part in (workspace_token, schema_token) if part
|
675
|
+
)
|
676
|
+
df = session.sql(f"SHOW TABLES IN SHARE {scope}").to_pandas()
|
677
|
+
else:
|
678
|
+
scope = join_quoted_identifiers(
|
679
|
+
workspace_token,
|
680
|
+
schema_token,
|
681
|
+
)
|
682
|
+
df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
|
656
683
|
else:
|
657
684
|
df = session.sql("SHOW TABLES").to_pandas()
|
658
685
|
except Exception as exc: # pragma: no cover
|
@@ -688,8 +715,6 @@ def fetch_tables_views_in_schema(
|
|
688
715
|
row.get("IS_MATERIALIZED_VIEW")
|
689
716
|
):
|
690
717
|
continue
|
691
|
-
if _value_is_true(row.get("IS_EXTERNAL")):
|
692
|
-
continue
|
693
718
|
# keep materialized views
|
694
719
|
catalog_part = _sanitize_identifier(
|
695
720
|
row[catalog_column] if catalog_column else workspace, workspace
|
@@ -112,22 +112,36 @@ def _tables_payload_to_raw_tables(
|
|
112
112
|
if not isinstance(table_entry, Mapping):
|
113
113
|
raise TypeError("Each table definition must be a mapping of table metadata")
|
114
114
|
|
115
|
-
|
115
|
+
raw_table_identifier = str(
|
116
116
|
table_entry.get("table_name")
|
117
117
|
or table_entry.get("name")
|
118
118
|
or table_entry.get("table")
|
119
119
|
or ""
|
120
120
|
).strip()
|
121
|
-
if not
|
121
|
+
if not raw_table_identifier:
|
122
122
|
raise ValueError("Table definition missing 'table_name'")
|
123
123
|
|
124
|
-
|
124
|
+
identifier_workspace, identifier_schema, identifier_table = _split_table_identifier(
|
125
|
+
raw_table_identifier
|
126
|
+
)
|
127
|
+
|
128
|
+
workspace = str(
|
129
|
+
table_entry.get("workspace")
|
130
|
+
or table_entry.get("database")
|
131
|
+
or identifier_workspace
|
132
|
+
or default_workspace
|
133
|
+
).strip() or default_workspace
|
125
134
|
schema = str(
|
126
135
|
table_entry.get("schema")
|
127
136
|
or table_entry.get("schema_name")
|
137
|
+
or identifier_schema
|
128
138
|
or default_schema
|
129
139
|
).strip() or default_schema
|
130
140
|
|
141
|
+
table_name = identifier_table.strip()
|
142
|
+
if not table_name:
|
143
|
+
raise ValueError(f"Unable to parse table name from '{raw_table_identifier}'")
|
144
|
+
|
131
145
|
columns_payload = table_entry.get("columns")
|
132
146
|
if not isinstance(columns_payload, Sequence) or not columns_payload:
|
133
147
|
raise ValueError(
|
@@ -370,3 +384,19 @@ def discover_relationships_from_schema(
|
|
370
384
|
timeout_seconds=timeout_seconds,
|
371
385
|
max_tables=max_tables,
|
372
386
|
)
|
387
|
+
def _split_table_identifier(identifier: str) -> Tuple[Optional[str], Optional[str], str]:
|
388
|
+
"""
|
389
|
+
Split a table identifier that may include workspace/schema prefixes.
|
390
|
+
|
391
|
+
Supported formats:
|
392
|
+
- workspace.schema.table
|
393
|
+
- schema.table
|
394
|
+
- table
|
395
|
+
"""
|
396
|
+
|
397
|
+
parts = [part.strip() for part in identifier.split(".") if part.strip()]
|
398
|
+
if len(parts) == 3:
|
399
|
+
return parts[0], parts[1], parts[2]
|
400
|
+
if len(parts) == 2:
|
401
|
+
return None, parts[0], parts[1]
|
402
|
+
return None, None, parts[0]
|
@@ -176,3 +176,47 @@ def test_discover_relationships_from_table_definitions_filters_generic_ids() ->
|
|
176
176
|
|
177
177
|
assert result.summary.total_relationships_found == 0
|
178
178
|
assert not result.relationships
|
179
|
+
|
180
|
+
|
181
|
+
def test_table_definitions_support_fully_qualified_names() -> None:
|
182
|
+
payload = [
|
183
|
+
{
|
184
|
+
"table_name": "demo.sales.orders",
|
185
|
+
"columns": [
|
186
|
+
{"name": "order_id", "type": "NUMBER", "is_primary_key": True},
|
187
|
+
{"name": "customer_id", "type": "NUMBER"},
|
188
|
+
],
|
189
|
+
},
|
190
|
+
{
|
191
|
+
"table_name": "sales.customers",
|
192
|
+
"workspace": "demo",
|
193
|
+
"columns": [
|
194
|
+
{"name": "customer_id", "type": "NUMBER", "is_primary_key": True},
|
195
|
+
{"name": "name", "type": "STRING"},
|
196
|
+
],
|
197
|
+
},
|
198
|
+
{
|
199
|
+
"table_name": "products",
|
200
|
+
"workspace": "demo",
|
201
|
+
"schema": "sales",
|
202
|
+
"columns": [
|
203
|
+
{"name": "product_id", "type": "NUMBER", "is_primary_key": True},
|
204
|
+
{"name": "name", "type": "STRING"},
|
205
|
+
],
|
206
|
+
},
|
207
|
+
]
|
208
|
+
|
209
|
+
result = discover_relationships_from_table_definitions(
|
210
|
+
payload,
|
211
|
+
default_workspace="fallback",
|
212
|
+
default_schema="fallback_schema",
|
213
|
+
)
|
214
|
+
|
215
|
+
table_names = {table.name for table in result.tables}
|
216
|
+
assert table_names == {"ORDERS", "CUSTOMERS", "PRODUCTS"}
|
217
|
+
|
218
|
+
# Ensure relationships include the orders -> customers edge despite mixed identifiers
|
219
|
+
assert any(
|
220
|
+
rel.left_table == "ORDERS" and rel.right_table == "CUSTOMERS"
|
221
|
+
for rel in result.relationships
|
222
|
+
)
|
{clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/LICENSE
RENAMED
File without changes
|
{clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/README.md
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|