clickzetta-semantic-model-generator 1.0.14__py3-none-any.whl → 1.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {clickzetta_semantic_model_generator-1.0.14.dist-info → clickzetta_semantic_model_generator-1.0.15.dist-info}/METADATA +1 -1
- {clickzetta_semantic_model_generator-1.0.14.dist-info → clickzetta_semantic_model_generator-1.0.15.dist-info}/RECORD +7 -7
- semantic_model_generator/clickzetta_utils/clickzetta_connector.py +45 -18
- semantic_model_generator/relationships/discovery.py +9 -1
- semantic_model_generator/tests/clickzetta_connector_test.py +83 -0
- {clickzetta_semantic_model_generator-1.0.14.dist-info → clickzetta_semantic_model_generator-1.0.15.dist-info}/LICENSE +0 -0
- {clickzetta_semantic_model_generator-1.0.14.dist-info → clickzetta_semantic_model_generator-1.0.15.dist-info}/WHEEL +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
semantic_model_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
semantic_model_generator/clickzetta_utils/clickzetta_connector.py,sha256=
|
2
|
+
semantic_model_generator/clickzetta_utils/clickzetta_connector.py,sha256=1EnNk6Xr9uwUGdiPvn_HeafxmHBQlkXEEAb3iYHKAnI,37383
|
3
3
|
semantic_model_generator/clickzetta_utils/env_vars.py,sha256=8cbL6R75c1-aVQ2i1TDr9SiHCUjTrgvXbIRz4MbcmbE,7664
|
4
4
|
semantic_model_generator/clickzetta_utils/utils.py,sha256=UBfWy9qOTyut8tL02gOHHbh6Uz8RqRz5Mm2YdKWFN54,4950
|
5
5
|
semantic_model_generator/data_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -17,8 +17,8 @@ semantic_model_generator/protos/semantic_model.proto,sha256=WZiN4b8vR-ZX-Lj9Vsm6
|
|
17
17
|
semantic_model_generator/protos/semantic_model_pb2.py,sha256=scbWkW-I-r3_hp_5SHoOWn02p52RJ9DJ0_-nRgr0LHc,25606
|
18
18
|
semantic_model_generator/protos/semantic_model_pb2.pyi,sha256=iiBIZxtX9d6IuUO3aLcsJsHUeZqdi14vYNuUsSM8C0g,18267
|
19
19
|
semantic_model_generator/relationships/__init__.py,sha256=I9-_QJdp36nEllzKTGXi2aWbRjiXrrexQXUfB6mi3Ww,477
|
20
|
-
semantic_model_generator/relationships/discovery.py,sha256=
|
21
|
-
semantic_model_generator/tests/clickzetta_connector_test.py,sha256=
|
20
|
+
semantic_model_generator/relationships/discovery.py,sha256=JQ1uCMxdrXB66z5QuCSpeP3x8BxC7b9Q51zyrxckME4,13357
|
21
|
+
semantic_model_generator/tests/clickzetta_connector_test.py,sha256=e8sr5SzEhgSgshgyibcT_hS9geBbDQpd_6iADQDFj5w,5661
|
22
22
|
semantic_model_generator/tests/cte_utils_test.py,sha256=_9GAJiOPGSagdWmQsoAEOOhEgsBY0LFlr_xtwrlgf4A,17561
|
23
23
|
semantic_model_generator/tests/generate_model_classification_test.py,sha256=Amq29cmeKd0S7iVikJ60RFm9gpWaQv1TijXofp3J-lI,2275
|
24
24
|
semantic_model_generator/tests/llm_enrichment_test.py,sha256=1avLrPWp7J7o_K3PKbI_PIvduM5Id21MmoL0JTeDTfs,15738
|
@@ -32,7 +32,7 @@ semantic_model_generator/validate/context_length.py,sha256=HL-GfaRXNcVji1-pAFGXG
|
|
32
32
|
semantic_model_generator/validate/keywords.py,sha256=frZ5HjRXP69K6dYAU5_d86oSp40_3yoLUg1eQwU3oLM,7080
|
33
33
|
semantic_model_generator/validate/schema.py,sha256=eL_wl5yscIeczwNBRUKhF_7QqWW2wSGimkgaOhMFsrA,5893
|
34
34
|
semantic_model_generator/validate_model.py,sha256=Uq-V-GfPeF2Dy4l9uF5Guv104gDCDGh0Cxz1AJOu5dk,836
|
35
|
-
clickzetta_semantic_model_generator-1.0.
|
36
|
-
clickzetta_semantic_model_generator-1.0.
|
37
|
-
clickzetta_semantic_model_generator-1.0.
|
38
|
-
clickzetta_semantic_model_generator-1.0.
|
35
|
+
clickzetta_semantic_model_generator-1.0.15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
36
|
+
clickzetta_semantic_model_generator-1.0.15.dist-info/METADATA,sha256=4b41cAmodnWAFM_ci1AZoBqRtoBpA7SjA71nvp3HJcQ,7817
|
37
|
+
clickzetta_semantic_model_generator-1.0.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
38
|
+
clickzetta_semantic_model_generator-1.0.15.dist-info/RECORD,,
|
@@ -176,6 +176,25 @@ def _sanitize_identifier(value: Any, fallback: str = "") -> str:
|
|
176
176
|
return normalized
|
177
177
|
|
178
178
|
|
179
|
+
def _split_identifier(
|
180
|
+
identifier: Any,
|
181
|
+
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
182
|
+
"""
|
183
|
+
Split a potentially qualified identifier into catalog, schema, and table parts.
|
184
|
+
Returns normalized segments without surrounding quotes/backticks.
|
185
|
+
"""
|
186
|
+
|
187
|
+
text = normalize_identifier(identifier)
|
188
|
+
if not text:
|
189
|
+
return None, None, None
|
190
|
+
parts = [part.strip() for part in text.split(".") if part.strip()]
|
191
|
+
if len(parts) >= 3:
|
192
|
+
return parts[-3], parts[-2], parts[-1]
|
193
|
+
if len(parts) == 2:
|
194
|
+
return None, parts[0], parts[1]
|
195
|
+
return None, None, parts[0]
|
196
|
+
|
197
|
+
|
179
198
|
def _normalize_column_type(raw: Any) -> str:
|
180
199
|
if raw is None:
|
181
200
|
return ""
|
@@ -449,8 +468,14 @@ def _build_information_schema_query(
|
|
449
468
|
if table_schema:
|
450
469
|
where_conditions.append(f"upper(t.table_schema) = '{table_schema.upper()}'")
|
451
470
|
if table_names:
|
452
|
-
|
453
|
-
|
471
|
+
normalized_names: List[str] = []
|
472
|
+
for name in table_names:
|
473
|
+
_, _, table_only = _split_identifier(name)
|
474
|
+
if table_only:
|
475
|
+
normalized_names.append(table_only.upper())
|
476
|
+
if normalized_names:
|
477
|
+
formatted_names = ", ".join(f"'{name}'" for name in normalized_names)
|
478
|
+
where_conditions.append(f"upper(t.table_name) IN ({formatted_names})")
|
454
479
|
|
455
480
|
where_clause = " AND ".join(where_conditions)
|
456
481
|
return f"""
|
@@ -490,6 +515,14 @@ def _fetch_columns_via_show(
|
|
490
515
|
if not table_token:
|
491
516
|
continue
|
492
517
|
|
518
|
+
override_catalog, override_schema, override_table = _split_identifier(table_token)
|
519
|
+
table_leaf = override_table or table_token
|
520
|
+
if not table_leaf:
|
521
|
+
continue
|
522
|
+
|
523
|
+
catalog_token = override_catalog or catalog
|
524
|
+
schema_token_override = override_schema or schema_token
|
525
|
+
|
493
526
|
identifier_candidates: List[str] = []
|
494
527
|
seen_identifiers: set[str] = set()
|
495
528
|
|
@@ -505,9 +538,9 @@ def _fetch_columns_via_show(
|
|
505
538
|
identifier_candidates.append(identifier)
|
506
539
|
seen_identifiers.add(identifier)
|
507
540
|
|
508
|
-
raw_parts = (
|
509
|
-
schema_parts = (
|
510
|
-
table_parts = (
|
541
|
+
raw_parts = (catalog_token, schema_token_override, table_leaf)
|
542
|
+
schema_parts = (schema_token_override, table_leaf)
|
543
|
+
table_parts = (table_leaf,)
|
511
544
|
|
512
545
|
_add_identifier(raw_parts, quoted=False)
|
513
546
|
_add_identifier(schema_parts, quoted=False)
|
@@ -582,10 +615,10 @@ def _fetch_columns_via_show(
|
|
582
615
|
normalized[_TABLE_SCHEMA_COL] = (
|
583
616
|
df[schema_col]
|
584
617
|
if schema_col
|
585
|
-
else (
|
618
|
+
else (schema_token_override or table_schema or "")
|
586
619
|
)
|
587
620
|
normalized[_TABLE_NAME_COL] = (
|
588
|
-
df[table_col] if table_col else
|
621
|
+
df[table_col] if table_col else table_leaf
|
589
622
|
)
|
590
623
|
normalized[_COLUMN_NAME_COL] = (
|
591
624
|
df[column_col] if column_col else df.index.astype(str)
|
@@ -738,17 +771,11 @@ def fetch_tables_views_in_schema(
|
|
738
771
|
|
739
772
|
try:
|
740
773
|
if workspace_token and schema_token:
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
else:
|
747
|
-
scope = join_quoted_identifiers(
|
748
|
-
workspace_token,
|
749
|
-
schema_token,
|
750
|
-
)
|
751
|
-
df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
|
774
|
+
scope = join_quoted_identifiers(
|
775
|
+
workspace_token,
|
776
|
+
schema_token,
|
777
|
+
)
|
778
|
+
df = session.sql(f"SHOW TABLES IN {scope}").to_pandas()
|
752
779
|
else:
|
753
780
|
df = session.sql("SHOW TABLES").to_pandas()
|
754
781
|
except Exception as exc: # pragma: no cover
|
@@ -50,7 +50,15 @@ class RelationshipDiscoveryResult:
|
|
50
50
|
def _normalize_table_names(table_names: Optional[Iterable[str]]) -> Optional[List[str]]:
|
51
51
|
if table_names is None:
|
52
52
|
return None
|
53
|
-
|
53
|
+
normalized: List[str] = []
|
54
|
+
for name in table_names:
|
55
|
+
parts = [
|
56
|
+
part.strip().strip("`").strip('"')
|
57
|
+
for part in str(name).split(".")
|
58
|
+
if part and part.strip()
|
59
|
+
]
|
60
|
+
normalized.append(".".join(parts))
|
61
|
+
return normalized
|
54
62
|
|
55
63
|
|
56
64
|
def _build_tables_from_dataframe(
|
@@ -86,3 +86,86 @@ def test_get_valid_columns_falls_back_to_show_columns():
|
|
86
86
|
assert not df.empty
|
87
87
|
assert df["TABLE_NAME"].iloc[0] == "PARTSUPP"
|
88
88
|
assert df["COLUMN_NAME"].iloc[0] == "PS_PARTKEY"
|
89
|
+
|
90
|
+
|
91
|
+
def test_get_valid_columns_handles_fully_qualified_filters():
|
92
|
+
class DummyResult:
|
93
|
+
def __init__(self, df: pd.DataFrame):
|
94
|
+
self._df = df
|
95
|
+
|
96
|
+
def to_pandas(self) -> pd.DataFrame:
|
97
|
+
return self._df
|
98
|
+
|
99
|
+
table_df = pd.DataFrame(
|
100
|
+
{
|
101
|
+
"schema_name": ["S1"],
|
102
|
+
"table_name": ["TABLE_ONE"],
|
103
|
+
"column_name": ["ID"],
|
104
|
+
"data_type": ["INT"],
|
105
|
+
"comment": [""],
|
106
|
+
}
|
107
|
+
)
|
108
|
+
|
109
|
+
call_log: list[str] = []
|
110
|
+
|
111
|
+
def sql_side_effect(query: str):
|
112
|
+
call_log.append(query)
|
113
|
+
if "information_schema" in query:
|
114
|
+
raise RuntimeError("info schema unavailable")
|
115
|
+
if query == "SHOW COLUMNS IN TEST_WS.S1.TABLE_ONE":
|
116
|
+
return DummyResult(table_df)
|
117
|
+
raise RuntimeError("unsupported query")
|
118
|
+
|
119
|
+
session = mock.MagicMock()
|
120
|
+
session.sql.side_effect = sql_side_effect
|
121
|
+
connector._CATALOG_CATEGORY_CACHE.clear()
|
122
|
+
|
123
|
+
df = connector.get_valid_schemas_tables_columns_df(
|
124
|
+
session=session,
|
125
|
+
workspace="TEST_WS",
|
126
|
+
table_schema="S1",
|
127
|
+
table_names=["TEST_WS.S1.TABLE_ONE"],
|
128
|
+
)
|
129
|
+
|
130
|
+
assert not df.empty
|
131
|
+
assert any("SHOW COLUMNS IN TEST_WS.S1.TABLE_ONE" in q for q in call_log)
|
132
|
+
assert all("TEST_WS.S1.TEST_WS.S1" not in q for q in call_log)
|
133
|
+
|
134
|
+
|
135
|
+
def test_fetch_tables_views_in_schema_shared_catalog_does_not_use_share_clause():
|
136
|
+
class DummyResult:
|
137
|
+
def __init__(self, df: pd.DataFrame):
|
138
|
+
self._df = df
|
139
|
+
|
140
|
+
def to_pandas(self) -> pd.DataFrame:
|
141
|
+
return self._df
|
142
|
+
|
143
|
+
tables_df = pd.DataFrame(
|
144
|
+
{
|
145
|
+
"workspace_name": ["lakehouse_ai"],
|
146
|
+
"schema_name": ["schema_for_opencatalog"],
|
147
|
+
"table_name": ["czcustomer"],
|
148
|
+
"is_view": [False],
|
149
|
+
"is_materialized_view": [False],
|
150
|
+
}
|
151
|
+
)
|
152
|
+
|
153
|
+
executed_queries: list[str] = []
|
154
|
+
|
155
|
+
def sql_side_effect(query: str):
|
156
|
+
executed_queries.append(query)
|
157
|
+
if query.startswith("SHOW TABLES IN"):
|
158
|
+
return DummyResult(tables_df)
|
159
|
+
raise RuntimeError("Unexpected query")
|
160
|
+
|
161
|
+
session = mock.MagicMock()
|
162
|
+
session.sql.side_effect = sql_side_effect
|
163
|
+
connector._CATALOG_CATEGORY_CACHE.clear()
|
164
|
+
|
165
|
+
with mock.patch.object(connector, "_catalog_category", return_value="SHARED"):
|
166
|
+
tables = connector.fetch_tables_views_in_schema(
|
167
|
+
session=session, schema_name="lakehouse_ai.schema_for_opencatalog"
|
168
|
+
)
|
169
|
+
|
170
|
+
assert tables == ["lakehouse_ai.schema_for_opencatalog.czcustomer"]
|
171
|
+
assert all("IN SHARE" not in query for query in executed_queries)
|
File without changes
|
File without changes
|