clickzetta-semantic-model-generator 1.0.4__tar.gz → 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/PKG-INFO +1 -1
  2. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/pyproject.toml +1 -1
  3. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/clickzetta_utils/clickzetta_connector.py +36 -11
  4. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/relationships/discovery.py +33 -3
  5. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/relationship_discovery_test.py +44 -0
  6. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/LICENSE +0 -0
  7. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/README.md +0 -0
  8. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/__init__.py +0 -0
  9. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/clickzetta_utils/env_vars.py +0 -0
  10. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/clickzetta_utils/utils.py +0 -0
  11. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/__init__.py +0 -0
  12. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/cte_utils.py +0 -0
  13. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/cte_utils_test.py +0 -0
  14. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/data_types.py +0 -0
  15. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/data_processing/proto_utils.py +0 -0
  16. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/generate_model.py +0 -0
  17. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/__init__.py +0 -0
  18. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/dashscope_client.py +0 -0
  19. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/enrichment.py +0 -0
  20. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/llm/progress_tracker.py +0 -0
  21. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/output_models/.keep +0 -0
  22. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/protos/semantic_model.proto +0 -0
  23. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/protos/semantic_model_pb2.py +0 -0
  24. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/protos/semantic_model_pb2.pyi +0 -0
  25. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/relationships/__init__.py +0 -0
  26. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/clickzetta_connector_test.py +0 -0
  27. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/cte_utils_test.py +0 -0
  28. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/generate_model_classification_test.py +0 -0
  29. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/llm_enrichment_test.py +0 -0
  30. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/relationships_filters_test.py +0 -0
  31. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/samples/validate_yamls.py +0 -0
  32. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/utils_test.py +0 -0
  33. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/validate_model_test.py +0 -0
  34. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/tests/yaml_to_semantic_model_test.py +0 -0
  35. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate/context_length.py +0 -0
  36. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate/keywords.py +0 -0
  37. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate/schema.py +0 -0
  38. {clickzetta_semantic_model_generator-1.0.4 → clickzetta_semantic_model_generator-1.0.6}/semantic_model_generator/validate_model.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: clickzetta-semantic-model-generator
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: Curate a Semantic Model for ClickZetta Lakehouse
5
5
  License: Apache Software License; BSD License
6
6
  Author: qililiang
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "clickzetta-semantic-model-generator"
3
- version = "1.0.4"
3
+ version = "1.0.6"
4
4
  description = "Curate a Semantic Model for ClickZetta Lakehouse"
5
5
  authors = ["qililiang <qililiang@clickzetta.com>"]
6
6
  license = "Apache Software License; BSD License"
@@ -480,15 +480,30 @@ def _fetch_columns_via_show(
480
480
  return pd.DataFrame()
481
481
 
482
482
  rows: List[pd.DataFrame] = []
483
- catalog = workspace.upper()
484
- schema = table_schema.upper() if table_schema else ""
483
+ category = _catalog_category(session, workspace)
484
+ is_shared_catalog = category in {"SHARED", "EXTERNAL"}
485
+ catalog = workspace if is_shared_catalog else workspace.upper()
486
+ schema = (
487
+ table_schema or ""
488
+ )
489
+ if schema and not is_shared_catalog:
490
+ schema = schema.upper()
485
491
 
486
492
  for table_name in table_names:
487
493
  qualified_parts = [
488
- part for part in (catalog, schema, table_name.upper()) if part
494
+ part
495
+ for part in (
496
+ catalog,
497
+ schema,
498
+ table_name.upper() if not is_shared_catalog else table_name,
499
+ )
500
+ if part
489
501
  ]
490
502
  qualified_table = ".".join(qualified_parts)
491
- query = f"SHOW COLUMNS IN {qualified_table}"
503
+ if is_shared_catalog:
504
+ query = f"SHOW COLUMNS IN SHARE {qualified_table}"
505
+ else:
506
+ query = f"SHOW COLUMNS IN {qualified_table}"
492
507
  try:
493
508
  df = session.sql(query).to_pandas()
494
509
  except Exception as exc:
@@ -646,13 +661,25 @@ def fetch_tables_views_in_schema(
646
661
  parts = schema_name.split(".", maxsplit=1)
647
662
  workspace = parts[0]
648
663
  schema = parts[1] if len(parts) > 1 else ""
649
- workspace_upper = workspace.upper()
650
- schema_upper = schema.upper()
664
+ category = _catalog_category(session, workspace)
665
+ is_shared_catalog = category in {"SHARED", "EXTERNAL"}
666
+
667
+ workspace_token = workspace if is_shared_catalog else workspace.upper()
668
+ schema_token = schema if is_shared_catalog else schema.upper()
651
669
 
652
670
  try:
653
- if workspace_upper and schema_upper:
654
- scope = join_quoted_identifiers(workspace_upper, schema_upper)
655
- df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
671
+ if workspace_token and schema_token:
672
+ if is_shared_catalog:
673
+ scope = ".".join(
674
+ part for part in (workspace_token, schema_token) if part
675
+ )
676
+ df = session.sql(f"SHOW TABLES IN SHARE {scope}").to_pandas()
677
+ else:
678
+ scope = join_quoted_identifiers(
679
+ workspace_token,
680
+ schema_token,
681
+ )
682
+ df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
656
683
  else:
657
684
  df = session.sql("SHOW TABLES").to_pandas()
658
685
  except Exception as exc: # pragma: no cover
@@ -688,8 +715,6 @@ def fetch_tables_views_in_schema(
688
715
  row.get("IS_MATERIALIZED_VIEW")
689
716
  ):
690
717
  continue
691
- if _value_is_true(row.get("IS_EXTERNAL")):
692
- continue
693
718
  # keep materialized views
694
719
  catalog_part = _sanitize_identifier(
695
720
  row[catalog_column] if catalog_column else workspace, workspace
@@ -112,22 +112,36 @@ def _tables_payload_to_raw_tables(
112
112
  if not isinstance(table_entry, Mapping):
113
113
  raise TypeError("Each table definition must be a mapping of table metadata")
114
114
 
115
- table_name = str(
115
+ raw_table_identifier = str(
116
116
  table_entry.get("table_name")
117
117
  or table_entry.get("name")
118
118
  or table_entry.get("table")
119
119
  or ""
120
120
  ).strip()
121
- if not table_name:
121
+ if not raw_table_identifier:
122
122
  raise ValueError("Table definition missing 'table_name'")
123
123
 
124
- workspace = str(table_entry.get("workspace") or default_workspace).strip() or default_workspace
124
+ identifier_workspace, identifier_schema, identifier_table = _split_table_identifier(
125
+ raw_table_identifier
126
+ )
127
+
128
+ workspace = str(
129
+ table_entry.get("workspace")
130
+ or table_entry.get("database")
131
+ or identifier_workspace
132
+ or default_workspace
133
+ ).strip() or default_workspace
125
134
  schema = str(
126
135
  table_entry.get("schema")
127
136
  or table_entry.get("schema_name")
137
+ or identifier_schema
128
138
  or default_schema
129
139
  ).strip() or default_schema
130
140
 
141
+ table_name = identifier_table.strip()
142
+ if not table_name:
143
+ raise ValueError(f"Unable to parse table name from '{raw_table_identifier}'")
144
+
131
145
  columns_payload = table_entry.get("columns")
132
146
  if not isinstance(columns_payload, Sequence) or not columns_payload:
133
147
  raise ValueError(
@@ -370,3 +384,19 @@ def discover_relationships_from_schema(
370
384
  timeout_seconds=timeout_seconds,
371
385
  max_tables=max_tables,
372
386
  )
387
+ def _split_table_identifier(identifier: str) -> Tuple[Optional[str], Optional[str], str]:
388
+ """
389
+ Split a table identifier that may include workspace/schema prefixes.
390
+
391
+ Supported formats:
392
+ - workspace.schema.table
393
+ - schema.table
394
+ - table
395
+ """
396
+
397
+ parts = [part.strip() for part in identifier.split(".") if part.strip()]
398
+ if len(parts) == 3:
399
+ return parts[0], parts[1], parts[2]
400
+ if len(parts) == 2:
401
+ return None, parts[0], parts[1]
402
+ return None, None, parts[0]
@@ -176,3 +176,47 @@ def test_discover_relationships_from_table_definitions_filters_generic_ids() ->
176
176
 
177
177
  assert result.summary.total_relationships_found == 0
178
178
  assert not result.relationships
179
+
180
+
181
+ def test_table_definitions_support_fully_qualified_names() -> None:
182
+ payload = [
183
+ {
184
+ "table_name": "demo.sales.orders",
185
+ "columns": [
186
+ {"name": "order_id", "type": "NUMBER", "is_primary_key": True},
187
+ {"name": "customer_id", "type": "NUMBER"},
188
+ ],
189
+ },
190
+ {
191
+ "table_name": "sales.customers",
192
+ "workspace": "demo",
193
+ "columns": [
194
+ {"name": "customer_id", "type": "NUMBER", "is_primary_key": True},
195
+ {"name": "name", "type": "STRING"},
196
+ ],
197
+ },
198
+ {
199
+ "table_name": "products",
200
+ "workspace": "demo",
201
+ "schema": "sales",
202
+ "columns": [
203
+ {"name": "product_id", "type": "NUMBER", "is_primary_key": True},
204
+ {"name": "name", "type": "STRING"},
205
+ ],
206
+ },
207
+ ]
208
+
209
+ result = discover_relationships_from_table_definitions(
210
+ payload,
211
+ default_workspace="fallback",
212
+ default_schema="fallback_schema",
213
+ )
214
+
215
+ table_names = {table.name for table in result.tables}
216
+ assert table_names == {"ORDERS", "CUSTOMERS", "PRODUCTS"}
217
+
218
+ # Ensure relationships include the orders -> customers edge despite mixed identifiers
219
+ assert any(
220
+ rel.left_table == "ORDERS" and rel.right_table == "CUSTOMERS"
221
+ for rel in result.relationships
222
+ )