clickzetta-semantic-model-generator 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: clickzetta-semantic-model-generator
3
- Version: 1.0.4
3
+ Version: 1.0.5
4
4
  Summary: Curate a Semantic Model for ClickZetta Lakehouse
5
5
  License: Apache Software License; BSD License
6
6
  Author: qililiang
@@ -1,5 +1,5 @@
1
1
  semantic_model_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- semantic_model_generator/clickzetta_utils/clickzetta_connector.py,sha256=LnGQTBj94aC8Zk9aVe2efA6-3UX_E8Q7ITvnfEoByjw,32819
2
+ semantic_model_generator/clickzetta_utils/clickzetta_connector.py,sha256=z8WYF2Ft2_u4JJsbaaN64IW-bIaiV9Bkv6e1pes3PdU,33777
3
3
  semantic_model_generator/clickzetta_utils/env_vars.py,sha256=8cbL6R75c1-aVQ2i1TDr9SiHCUjTrgvXbIRz4MbcmbE,7664
4
4
  semantic_model_generator/clickzetta_utils/utils.py,sha256=UBfWy9qOTyut8tL02gOHHbh6Uz8RqRz5Mm2YdKWFN54,4950
5
5
  semantic_model_generator/data_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -17,12 +17,12 @@ semantic_model_generator/protos/semantic_model.proto,sha256=WZiN4b8vR-ZX-Lj9Vsm6
17
17
  semantic_model_generator/protos/semantic_model_pb2.py,sha256=scbWkW-I-r3_hp_5SHoOWn02p52RJ9DJ0_-nRgr0LHc,25606
18
18
  semantic_model_generator/protos/semantic_model_pb2.pyi,sha256=iiBIZxtX9d6IuUO3aLcsJsHUeZqdi14vYNuUsSM8C0g,18267
19
19
  semantic_model_generator/relationships/__init__.py,sha256=I9-_QJdp36nEllzKTGXi2aWbRjiXrrexQXUfB6mi3Ww,477
20
- semantic_model_generator/relationships/discovery.py,sha256=BdPHIvlE6yuaQv0ELWwQlq0qx0uX7fkoEMfuvK8wO60,12147
20
+ semantic_model_generator/relationships/discovery.py,sha256=aw3LrthDZ6ng9P5eI3noxw-1E30csYqe2kyGn6CpLZA,13125
21
21
  semantic_model_generator/tests/clickzetta_connector_test.py,sha256=Fdx7jooNt1lslKB2Ub51wqOZ8OM0osgZiDDl3bV6riw,3086
22
22
  semantic_model_generator/tests/cte_utils_test.py,sha256=_9GAJiOPGSagdWmQsoAEOOhEgsBY0LFlr_xtwrlgf4A,17561
23
23
  semantic_model_generator/tests/generate_model_classification_test.py,sha256=Amq29cmeKd0S7iVikJ60RFm9gpWaQv1TijXofp3J-lI,2275
24
24
  semantic_model_generator/tests/llm_enrichment_test.py,sha256=1avLrPWp7J7o_K3PKbI_PIvduM5Id21MmoL0JTeDTfs,15738
25
- semantic_model_generator/tests/relationship_discovery_test.py,sha256=OvnK2jhWNFfHI31eeIEmclgaUoFjj_mZuDFAnjLMBpw,5411
25
+ semantic_model_generator/tests/relationship_discovery_test.py,sha256=CBeQVfd9XT5haXpNs6tsccH79v8zDa6abnUYL8f2gSs,6829
26
26
  semantic_model_generator/tests/relationships_filters_test.py,sha256=bUm3r1UGaXca-hJOot7jMPz4It_TVsoddd-Xpk-76zM,10166
27
27
  semantic_model_generator/tests/samples/validate_yamls.py,sha256=262j-2i2oFZtTyK2susOrbxxE5eS-6IN-V0jFEOpt_w,156249
28
28
  semantic_model_generator/tests/utils_test.py,sha256=HWRXR45QYL1f6L8xsMppqLXzF9HAsrMwTMQIKpZrc_M,539
@@ -32,7 +32,7 @@ semantic_model_generator/validate/context_length.py,sha256=HL-GfaRXNcVji1-pAFGXG
32
32
  semantic_model_generator/validate/keywords.py,sha256=frZ5HjRXP69K6dYAU5_d86oSp40_3yoLUg1eQwU3oLM,7080
33
33
  semantic_model_generator/validate/schema.py,sha256=eL_wl5yscIeczwNBRUKhF_7QqWW2wSGimkgaOhMFsrA,5893
34
34
  semantic_model_generator/validate_model.py,sha256=Uq-V-GfPeF2Dy4l9uF5Guv104gDCDGh0Cxz1AJOu5dk,836
35
- clickzetta_semantic_model_generator-1.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
36
- clickzetta_semantic_model_generator-1.0.4.dist-info/METADATA,sha256=zf4rBVSbisDDtZOnw5SoxGCRrO-PjfMQ66PinfYK3xg,7816
37
- clickzetta_semantic_model_generator-1.0.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
- clickzetta_semantic_model_generator-1.0.4.dist-info/RECORD,,
35
+ clickzetta_semantic_model_generator-1.0.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
36
+ clickzetta_semantic_model_generator-1.0.5.dist-info/METADATA,sha256=rxOjgbcKvTYIapoteFS2Lz9E1388cFCCpZPa4VjjcrE,7816
37
+ clickzetta_semantic_model_generator-1.0.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
+ clickzetta_semantic_model_generator-1.0.5.dist-info/RECORD,,
@@ -480,15 +480,30 @@ def _fetch_columns_via_show(
480
480
  return pd.DataFrame()
481
481
 
482
482
  rows: List[pd.DataFrame] = []
483
- catalog = workspace.upper()
484
- schema = table_schema.upper() if table_schema else ""
483
+ category = _catalog_category(session, workspace)
484
+ is_shared_catalog = category in {"SHARED", "EXTERNAL"}
485
+ catalog = workspace if is_shared_catalog else workspace.upper()
486
+ schema = (
487
+ table_schema or ""
488
+ )
489
+ if schema and not is_shared_catalog:
490
+ schema = schema.upper()
485
491
 
486
492
  for table_name in table_names:
487
493
  qualified_parts = [
488
- part for part in (catalog, schema, table_name.upper()) if part
494
+ part
495
+ for part in (
496
+ catalog,
497
+ schema,
498
+ table_name.upper() if not is_shared_catalog else table_name,
499
+ )
500
+ if part
489
501
  ]
490
502
  qualified_table = ".".join(qualified_parts)
491
- query = f"SHOW COLUMNS IN {qualified_table}"
503
+ if is_shared_catalog:
504
+ query = f"SHOW COLUMNS IN SHARE {qualified_table}"
505
+ else:
506
+ query = f"SHOW COLUMNS IN {qualified_table}"
492
507
  try:
493
508
  df = session.sql(query).to_pandas()
494
509
  except Exception as exc:
@@ -646,13 +661,25 @@ def fetch_tables_views_in_schema(
646
661
  parts = schema_name.split(".", maxsplit=1)
647
662
  workspace = parts[0]
648
663
  schema = parts[1] if len(parts) > 1 else ""
649
- workspace_upper = workspace.upper()
650
- schema_upper = schema.upper()
664
+ category = _catalog_category(session, workspace)
665
+ is_shared_catalog = category in {"SHARED", "EXTERNAL"}
666
+
667
+ workspace_token = workspace if is_shared_catalog else workspace.upper()
668
+ schema_token = schema if is_shared_catalog else schema.upper()
651
669
 
652
670
  try:
653
- if workspace_upper and schema_upper:
654
- scope = join_quoted_identifiers(workspace_upper, schema_upper)
655
- df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
671
+ if workspace_token and schema_token:
672
+ if is_shared_catalog:
673
+ scope = ".".join(
674
+ part for part in (workspace_token, schema_token) if part
675
+ )
676
+ df = session.sql(f"SHOW TABLES IN SHARE {scope}").to_pandas()
677
+ else:
678
+ scope = join_quoted_identifiers(
679
+ workspace_token,
680
+ schema_token,
681
+ )
682
+ df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
656
683
  else:
657
684
  df = session.sql("SHOW TABLES").to_pandas()
658
685
  except Exception as exc: # pragma: no cover
@@ -112,22 +112,36 @@ def _tables_payload_to_raw_tables(
112
112
  if not isinstance(table_entry, Mapping):
113
113
  raise TypeError("Each table definition must be a mapping of table metadata")
114
114
 
115
- table_name = str(
115
+ raw_table_identifier = str(
116
116
  table_entry.get("table_name")
117
117
  or table_entry.get("name")
118
118
  or table_entry.get("table")
119
119
  or ""
120
120
  ).strip()
121
- if not table_name:
121
+ if not raw_table_identifier:
122
122
  raise ValueError("Table definition missing 'table_name'")
123
123
 
124
- workspace = str(table_entry.get("workspace") or default_workspace).strip() or default_workspace
124
+ identifier_workspace, identifier_schema, identifier_table = _split_table_identifier(
125
+ raw_table_identifier
126
+ )
127
+
128
+ workspace = str(
129
+ table_entry.get("workspace")
130
+ or table_entry.get("database")
131
+ or identifier_workspace
132
+ or default_workspace
133
+ ).strip() or default_workspace
125
134
  schema = str(
126
135
  table_entry.get("schema")
127
136
  or table_entry.get("schema_name")
137
+ or identifier_schema
128
138
  or default_schema
129
139
  ).strip() or default_schema
130
140
 
141
+ table_name = identifier_table.strip()
142
+ if not table_name:
143
+ raise ValueError(f"Unable to parse table name from '{raw_table_identifier}'")
144
+
131
145
  columns_payload = table_entry.get("columns")
132
146
  if not isinstance(columns_payload, Sequence) or not columns_payload:
133
147
  raise ValueError(
@@ -370,3 +384,19 @@ def discover_relationships_from_schema(
370
384
  timeout_seconds=timeout_seconds,
371
385
  max_tables=max_tables,
372
386
  )
387
+ def _split_table_identifier(identifier: str) -> Tuple[Optional[str], Optional[str], str]:
388
+ """
389
+ Split a table identifier that may include workspace/schema prefixes.
390
+
391
+ Supported formats:
392
+ - workspace.schema.table
393
+ - schema.table
394
+ - table
395
+ """
396
+
397
+ parts = [part.strip() for part in identifier.split(".") if part.strip()]
398
+ if len(parts) == 3:
399
+ return parts[0], parts[1], parts[2]
400
+ if len(parts) == 2:
401
+ return None, parts[0], parts[1]
402
+ return None, None, parts[0]
@@ -176,3 +176,47 @@ def test_discover_relationships_from_table_definitions_filters_generic_ids() ->
176
176
 
177
177
  assert result.summary.total_relationships_found == 0
178
178
  assert not result.relationships
179
+
180
+
181
+ def test_table_definitions_support_fully_qualified_names() -> None:
182
+ payload = [
183
+ {
184
+ "table_name": "demo.sales.orders",
185
+ "columns": [
186
+ {"name": "order_id", "type": "NUMBER", "is_primary_key": True},
187
+ {"name": "customer_id", "type": "NUMBER"},
188
+ ],
189
+ },
190
+ {
191
+ "table_name": "sales.customers",
192
+ "workspace": "demo",
193
+ "columns": [
194
+ {"name": "customer_id", "type": "NUMBER", "is_primary_key": True},
195
+ {"name": "name", "type": "STRING"},
196
+ ],
197
+ },
198
+ {
199
+ "table_name": "products",
200
+ "workspace": "demo",
201
+ "schema": "sales",
202
+ "columns": [
203
+ {"name": "product_id", "type": "NUMBER", "is_primary_key": True},
204
+ {"name": "name", "type": "STRING"},
205
+ ],
206
+ },
207
+ ]
208
+
209
+ result = discover_relationships_from_table_definitions(
210
+ payload,
211
+ default_workspace="fallback",
212
+ default_schema="fallback_schema",
213
+ )
214
+
215
+ table_names = {table.name for table in result.tables}
216
+ assert table_names == {"ORDERS", "CUSTOMERS", "PRODUCTS"}
217
+
218
+ # Ensure relationships include the orders -> customers edge despite mixed identifiers
219
+ assert any(
220
+ rel.left_table == "ORDERS" and rel.right_table == "CUSTOMERS"
221
+ for rel in result.relationships
222
+ )