clickzetta-semantic-model-generator 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/METADATA +5 -5
  2. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/RECORD +21 -21
  3. semantic_model_generator/clickzetta_utils/clickzetta_connector.py +91 -33
  4. semantic_model_generator/clickzetta_utils/env_vars.py +7 -2
  5. semantic_model_generator/data_processing/cte_utils.py +1 -1
  6. semantic_model_generator/generate_model.py +588 -224
  7. semantic_model_generator/llm/dashscope_client.py +4 -2
  8. semantic_model_generator/llm/enrichment.py +144 -57
  9. semantic_model_generator/llm/progress_tracker.py +16 -15
  10. semantic_model_generator/relationships/discovery.py +1 -6
  11. semantic_model_generator/tests/clickzetta_connector_test.py +3 -7
  12. semantic_model_generator/tests/cte_utils_test.py +1 -1
  13. semantic_model_generator/tests/generate_model_classification_test.py +12 -2
  14. semantic_model_generator/tests/llm_enrichment_test.py +152 -46
  15. semantic_model_generator/tests/relationship_discovery_test.py +6 -3
  16. semantic_model_generator/tests/relationships_filters_test.py +166 -30
  17. semantic_model_generator/tests/utils_test.py +1 -1
  18. semantic_model_generator/validate/keywords.py +453 -53
  19. semantic_model_generator/validate/schema.py +4 -2
  20. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/LICENSE +0 -0
  21. {clickzetta_semantic_model_generator-1.0.2.dist-info → clickzetta_semantic_model_generator-1.0.3.dist-info}/WHEEL +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: clickzetta-semantic-model-generator
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Curate a Semantic Model for ClickZetta Lakehouse
5
5
  License: Apache Software License; BSD License
6
6
  Author: qililiang
@@ -13,12 +13,12 @@ Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Provides-Extra: looker
15
15
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
16
- Requires-Dist: clickzetta-connector-python (==0.8.92)
17
- Requires-Dist: clickzetta-zettapark-python (==0.1.3)
16
+ Requires-Dist: clickzetta-connector-python (>=0.8.92)
17
+ Requires-Dist: clickzetta-zettapark-python (>=0.1.3)
18
18
  Requires-Dist: dashscope (>=1.22.2,<2.0.0)
19
19
  Requires-Dist: loguru (>=0.7.2,<0.8.0)
20
20
  Requires-Dist: looker-sdk (>=24.14.0,<25.0.0) ; extra == "looker"
21
- Requires-Dist: numpy (>=1.26.4,<2.0.0)
21
+ Requires-Dist: numpy (>=1.26.4,<3.0.0)
22
22
  Requires-Dist: pandas (>=2.0.1,<3.0.0)
23
23
  Requires-Dist: protobuf (==5.26.1)
24
24
  Requires-Dist: pyarrow (==14.0.2)
@@ -31,7 +31,7 @@ Requires-Dist: streamlit (==1.36.0)
31
31
  Requires-Dist: streamlit-extras (==0.4.0)
32
32
  Requires-Dist: strictyaml (>=1.7.3,<2.0.0)
33
33
  Requires-Dist: tqdm (>=4.66.5,<5.0.0)
34
- Requires-Dist: urllib3 (>=1.26.19,<2.0.0)
34
+ Requires-Dist: urllib3 (>=1.26.19,<3.0.0)
35
35
  Description-Content-Type: text/markdown
36
36
 
37
37
  # semantic-model-generator
@@ -1,38 +1,38 @@
1
1
  semantic_model_generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- semantic_model_generator/clickzetta_utils/clickzetta_connector.py,sha256=hnXBtwlZbS76e1H6w8dkVN1-WCd9CeVinoPVJiD4aJk,32335
3
- semantic_model_generator/clickzetta_utils/env_vars.py,sha256=01AQV7WXByZBBAHETEEQ22mRytKvzcge_yn9RZH09DE,7597
2
+ semantic_model_generator/clickzetta_utils/clickzetta_connector.py,sha256=rFBWdNQerLYinn6RoDV_J4k2G4LLofiFkLDa7j8hmng,32888
3
+ semantic_model_generator/clickzetta_utils/env_vars.py,sha256=8cbL6R75c1-aVQ2i1TDr9SiHCUjTrgvXbIRz4MbcmbE,7664
4
4
  semantic_model_generator/clickzetta_utils/utils.py,sha256=D0SX2faBjwvhFJLt1Yk4mlZmyHmQt7LN93Jrc5YIU-A,3800
5
5
  semantic_model_generator/data_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- semantic_model_generator/data_processing/cte_utils.py,sha256=FAsxbeAXYz1q0Iei_pDUKBrOWLnnH1jMd4h_5dXmoHE,16086
6
+ semantic_model_generator/data_processing/cte_utils.py,sha256=jfTJIwc89-0nnelVw_5vpIVRout7V0YooUDfZzTzDr4,16086
7
7
  semantic_model_generator/data_processing/cte_utils_test.py,sha256=l6QkyyH22FexLKjvvbS9Je3YtdTrJE3a-BiknCy1g9s,2822
8
8
  semantic_model_generator/data_processing/data_types.py,sha256=1HsSCkdCWvcXiwN3o1-HVQi_ZVIR0lYevXG9CE1TvRc,1172
9
9
  semantic_model_generator/data_processing/proto_utils.py,sha256=UwqCfQYilTx68KcA4IYZN7PeM4Pz_pK1h0FrVJomzV8,2938
10
- semantic_model_generator/generate_model.py,sha256=awBj72QkrfCq9Ij-lo6_tNDt7yQ0mFEElVuz7yfPy1E,116381
10
+ semantic_model_generator/generate_model.py,sha256=ogNvx1HNOnC5KIZlGDwcWL7PLMHRs8zcZZbwricffDo,121843
11
11
  semantic_model_generator/llm/__init__.py,sha256=rLQt2pzRmxtnBLKjxN_qZ2a_nvkFHtmguU5lyajCldw,1030
12
- semantic_model_generator/llm/dashscope_client.py,sha256=DBbxuqy1BIk9mxm20GSK-9rfDtohy3KRTsxpsG5sjf4,6031
13
- semantic_model_generator/llm/enrichment.py,sha256=8PlUFC2LtuSaQGdrJIoQpn_oGUyNV165B4FA3046CYI,40316
14
- semantic_model_generator/llm/progress_tracker.py,sha256=tbdbfCNv2wlh-N5wlcMa10JVcmbdkQojPVe-VPWmmxM,6757
12
+ semantic_model_generator/llm/dashscope_client.py,sha256=lHS36iqNZbFhwgidPpW1Bwwy4S2O7GeLyMSMdlSoBsY,6050
13
+ semantic_model_generator/llm/enrichment.py,sha256=49e9Jg_jHfhUIEQ3JserEc5DV5sFWA12K76TY4UwnCg,41448
14
+ semantic_model_generator/llm/progress_tracker.py,sha256=XmQUDSmc8Uz3bdnV-K-SHmmSoJjxLp3SPMxXwVDKf88,6740
15
15
  semantic_model_generator/output_models/.keep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  semantic_model_generator/protos/semantic_model.proto,sha256=WZiN4b8vR-ZX-Lj9Vsm6HjZNAyNvM1znIyut_YkPVSI,16473
17
17
  semantic_model_generator/protos/semantic_model_pb2.py,sha256=scbWkW-I-r3_hp_5SHoOWn02p52RJ9DJ0_-nRgr0LHc,25606
18
18
  semantic_model_generator/protos/semantic_model_pb2.pyi,sha256=iiBIZxtX9d6IuUO3aLcsJsHUeZqdi14vYNuUsSM8C0g,18267
19
19
  semantic_model_generator/relationships/__init__.py,sha256=HN6Opie25Oawt2fCDM_bZwRBVBEzqRsEXgDzYC7ytns,373
20
- semantic_model_generator/relationships/discovery.py,sha256=fy8mzfAZtWWVUjiSP5jeKffoa2GodNIKA3eGi4dxhHo,6020
21
- semantic_model_generator/tests/clickzetta_connector_test.py,sha256=NKDbhll8TxFtbwFJIUcQQDKJU1LkfEz9nL0SE32hx3o,3114
22
- semantic_model_generator/tests/cte_utils_test.py,sha256=8v2nrrD2GkH_PTGIsKm3lQ06unzis8iR31atT8bUX98,17385
23
- semantic_model_generator/tests/generate_model_classification_test.py,sha256=q7dh29h9iF17ChzuoFSLtRX9ASiAm2oY4OkGyFVfn5Y,2117
24
- semantic_model_generator/tests/llm_enrichment_test.py,sha256=YeYg4voQ3wy2vgF7H9JNdnMOyZmUfiMdL6oEXFv-ztg,14415
25
- semantic_model_generator/tests/relationship_discovery_test.py,sha256=1SVX59-mpHQvxk7RDGtglesg6VXU9TnnZZcfjZi5IHs,3448
26
- semantic_model_generator/tests/relationships_filters_test.py,sha256=fVyA-hwxGqdlFr_PGT9YdrPz13XGvI4J_5F3IpVTFEE,8009
20
+ semantic_model_generator/relationships/discovery.py,sha256=l_CixbfRvHBqxmLCmCq7bvQHRt3iUl0o5mui4R5LHXQ,5961
21
+ semantic_model_generator/tests/clickzetta_connector_test.py,sha256=Fdx7jooNt1lslKB2Ub51wqOZ8OM0osgZiDDl3bV6riw,3086
22
+ semantic_model_generator/tests/cte_utils_test.py,sha256=LdhWw_bHZDE1LyS2hBVy_VTNjLgodonesWaxw8jXpV4,17385
23
+ semantic_model_generator/tests/generate_model_classification_test.py,sha256=Amq29cmeKd0S7iVikJ60RFm9gpWaQv1TijXofp3J-lI,2275
24
+ semantic_model_generator/tests/llm_enrichment_test.py,sha256=1avLrPWp7J7o_K3PKbI_PIvduM5Id21MmoL0JTeDTfs,15738
25
+ semantic_model_generator/tests/relationship_discovery_test.py,sha256=SOuXCwbmSUgvZoOS2s5oGK1w0LW283M1hg--QlLaDVA,3490
26
+ semantic_model_generator/tests/relationships_filters_test.py,sha256=bUm3r1UGaXca-hJOot7jMPz4It_TVsoddd-Xpk-76zM,10166
27
27
  semantic_model_generator/tests/samples/validate_yamls.py,sha256=262j-2i2oFZtTyK2susOrbxxE5eS-6IN-V0jFEOpt_w,156249
28
- semantic_model_generator/tests/utils_test.py,sha256=Tfvb-ErZPBS_HjXr4N7XSJnL3hlncNGF5pay1xFfNHg,539
28
+ semantic_model_generator/tests/utils_test.py,sha256=HWRXR45QYL1f6L8xsMppqLXzF9HAsrMwTMQIKpZrc_M,539
29
29
  semantic_model_generator/tests/validate_model_test.py,sha256=kfG0dkhuDlJ47_Zt3seXPOV4DmzWOTTGXQINKvLYtlo,352
30
30
  semantic_model_generator/tests/yaml_to_semantic_model_test.py,sha256=sfjHahOLdn0xWnNn12e8K8TvyKl17WP4sLgSA1U9mB0,2626
31
31
  semantic_model_generator/validate/context_length.py,sha256=HL-GfaRXNcVji1-pAFGXGxMoH2fyQtQok7Y9ybXn6Tg,4454
32
- semantic_model_generator/validate/keywords.py,sha256=mFtmIK72YLQ7wODL-zCC-uMXzXMJT6Tg4fISAin2WtQ,5480
33
- semantic_model_generator/validate/schema.py,sha256=ImFUzO5EzNbl65-0dqt5AmdEvy4lqX8gBkOSOKN8uZM,5863
32
+ semantic_model_generator/validate/keywords.py,sha256=frZ5HjRXP69K6dYAU5_d86oSp40_3yoLUg1eQwU3oLM,7080
33
+ semantic_model_generator/validate/schema.py,sha256=eL_wl5yscIeczwNBRUKhF_7QqWW2wSGimkgaOhMFsrA,5893
34
34
  semantic_model_generator/validate_model.py,sha256=Uq-V-GfPeF2Dy4l9uF5Guv104gDCDGh0Cxz1AJOu5dk,836
35
- clickzetta_semantic_model_generator-1.0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
36
- clickzetta_semantic_model_generator-1.0.2.dist-info/METADATA,sha256=vgObGkoBnyfxUuJM8eAH8ZnIGuJrueJFnU1UfTo_gN4,7816
37
- clickzetta_semantic_model_generator-1.0.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
- clickzetta_semantic_model_generator-1.0.2.dist-info/RECORD,,
35
+ clickzetta_semantic_model_generator-1.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
36
+ clickzetta_semantic_model_generator-1.0.3.dist-info/METADATA,sha256=A1kBc4PO_LEbIjWM-24jHnnV6NynmowuX5Jy91tlWBk,7816
37
+ clickzetta_semantic_model_generator-1.0.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
38
+ clickzetta_semantic_model_generator-1.0.3.dist-info/RECORD,,
@@ -4,7 +4,6 @@ import concurrent.futures
4
4
  import re
5
5
  from collections import defaultdict
6
6
  from contextlib import contextmanager
7
- from types import SimpleNamespace
8
7
  from typing import Any, Dict, Generator, List, Optional, TypeVar, Union
9
8
 
10
9
  import pandas as pd
@@ -12,10 +11,7 @@ from clickzetta.zettapark.session import Session
12
11
  from loguru import logger
13
12
 
14
13
  from semantic_model_generator.clickzetta_utils import env_vars
15
- from semantic_model_generator.clickzetta_utils.utils import (
16
- clickzetta_connection,
17
- create_session,
18
- )
14
+ from semantic_model_generator.clickzetta_utils.utils import create_session
19
15
  from semantic_model_generator.data_processing.data_types import Column, Table
20
16
 
21
17
  ConnectionType = TypeVar("ConnectionType", bound=Session)
@@ -115,7 +111,9 @@ class ClickzettaCursor:
115
111
  def execute(self, query: str) -> "ClickzettaCursor":
116
112
  self._df = _execute_query_to_pandas(self._session, query)
117
113
  columns = [] if self._df is None else list(self._df.columns)
118
- self.description = [(col, None, None, None, None, None, None) for col in columns]
114
+ self.description = [
115
+ (col, None, None, None, None, None, None) for col in columns
116
+ ]
119
117
  return self
120
118
 
121
119
  def fetchone(self) -> Optional[tuple[Any, ...]]:
@@ -159,7 +157,11 @@ def _quote_identifier(name: str) -> str:
159
157
 
160
158
  def _qualify_table(workspace: str, schema_name: str, table_name: str) -> str:
161
159
  return ".".join(
162
- [_quote_identifier(workspace), _quote_identifier(schema_name), _quote_identifier(table_name)]
160
+ [
161
+ _quote_identifier(workspace),
162
+ _quote_identifier(schema_name),
163
+ _quote_identifier(table_name),
164
+ ]
163
165
  )
164
166
 
165
167
 
@@ -214,12 +216,18 @@ def _fetch_distinct_values(
214
216
  column_name: str,
215
217
  ndv: int,
216
218
  ) -> Optional[List[str]]:
217
- workspace_part = _sanitize_identifier(workspace, workspace).upper() if workspace else ""
218
- schema_part = _sanitize_identifier(schema_name, schema_name).upper() if schema_name else ""
219
+ workspace_part = (
220
+ _sanitize_identifier(workspace, workspace).upper() if workspace else ""
221
+ )
222
+ schema_part = (
223
+ _sanitize_identifier(schema_name, schema_name).upper() if schema_name else ""
224
+ )
219
225
  table_part = _sanitize_identifier(table_name, table_name).upper()
220
226
  column_part = _sanitize_identifier(column_name, column_name).upper()
221
227
 
222
- qualified_parts = [part for part in (workspace_part, schema_part, table_part) if part]
228
+ qualified_parts = [
229
+ part for part in (workspace_part, schema_part, table_part) if part
230
+ ]
223
231
  qualified_table = ".".join(qualified_parts)
224
232
 
225
233
  query = f"SELECT DISTINCT {column_part} FROM {qualified_table} LIMIT {ndv}"
@@ -257,7 +265,6 @@ def _get_column_representation(
257
265
  else:
258
266
  column_datatype = str(column_datatype_raw)
259
267
  column_datatype = _normalize_column_type(column_datatype)
260
- normalized_type = column_datatype.split("(")[0].strip()
261
268
  column_values = (
262
269
  _fetch_distinct_values(
263
270
  session=session,
@@ -351,7 +358,14 @@ def _catalog_category(session: Session, workspace: str) -> str:
351
358
  return "UNKNOWN"
352
359
 
353
360
  df.columns = [str(col).upper() for col in df.columns]
354
- name_col = next((col for col in ("WORKSPACE_NAME", "NAME", "CATALOG_NAME") if col in df.columns), None)
361
+ name_col = next(
362
+ (
363
+ col
364
+ for col in ("WORKSPACE_NAME", "NAME", "CATALOG_NAME")
365
+ if col in df.columns
366
+ ),
367
+ None,
368
+ )
355
369
  category_col = next((col for col in ("CATEGORY",) if col in df.columns), None)
356
370
  if not name_col or not category_col:
357
371
  _CATALOG_CATEGORY_CACHE[workspace_upper] = "UNKNOWN"
@@ -408,7 +422,9 @@ ORDER BY kc.ordinal_position
408
422
  if result is not None:
409
423
  return result
410
424
  except Exception:
411
- logger.debug("Primary key lookup via sys.information_schema failed; falling back.")
425
+ logger.debug(
426
+ "Primary key lookup via sys.information_schema failed; falling back."
427
+ )
412
428
 
413
429
  fallback_query = f"""
414
430
  SELECT kc.column_name
@@ -423,7 +439,13 @@ ORDER BY kc.ordinal_position
423
439
  if result is not None:
424
440
  return result
425
441
  except Exception as exc:
426
- logger.warning("Primary key lookup failed for {}.{}.{}: {}", workspace, schema_name, table_name, exc)
442
+ logger.warning(
443
+ "Primary key lookup failed for {}.{}.{}: {}",
444
+ workspace,
445
+ schema_name,
446
+ table_name,
447
+ exc,
448
+ )
427
449
  return None
428
450
 
429
451
 
@@ -432,9 +454,7 @@ def _build_information_schema_query(
432
454
  table_schema: Optional[str],
433
455
  table_names: Optional[List[str]],
434
456
  ) -> str:
435
- where_conditions: List[str] = [
436
- "1=1"
437
- ]
457
+ where_conditions: List[str] = ["1=1"]
438
458
  if table_schema:
439
459
  where_conditions.append(f"upper(t.table_schema) = '{table_schema.upper()}'")
440
460
  if table_names:
@@ -442,7 +462,6 @@ def _build_information_schema_query(
442
462
  where_conditions.append(f"upper(t.table_name) IN ({formatted_names})")
443
463
 
444
464
  where_clause = " AND ".join(where_conditions)
445
- base = "information_schema"
446
465
  return f"""
447
466
  SELECT
448
467
  t.table_schema AS {_TABLE_SCHEMA_COL},
@@ -474,27 +493,48 @@ def _fetch_columns_via_show(
474
493
  schema = table_schema.upper() if table_schema else ""
475
494
 
476
495
  for table_name in table_names:
477
- qualified_parts = [part for part in (catalog, schema, table_name.upper()) if part]
496
+ qualified_parts = [
497
+ part for part in (catalog, schema, table_name.upper()) if part
498
+ ]
478
499
  qualified_table = ".".join(qualified_parts)
479
500
  query = f"SHOW COLUMNS IN {qualified_table}"
480
501
  try:
481
502
  df = session.sql(query).to_pandas()
482
503
  except Exception as exc:
483
- logger.debug("SHOW COLUMNS fallback failed for {}: {}", qualified_table, exc)
504
+ logger.debug(
505
+ "SHOW COLUMNS fallback failed for {}: {}", qualified_table, exc
506
+ )
484
507
  continue
485
508
  if df.empty:
486
509
  continue
487
510
  df.columns = [str(col).upper() for col in df.columns]
488
- schema_col = next((col for col in ("TABLE_SCHEMA", "SCHEMA_NAME") if col in df.columns), None)
489
- table_col = next((col for col in ("TABLE_NAME", "NAME") if col in df.columns), None)
490
- column_col = next((col for col in ("COLUMN_NAME", "NAME") if col in df.columns and col != table_col), None)
491
- datatype_col = next((col for col in ("DATA_TYPE", "TYPE") if col in df.columns), None)
492
- comment_col = next((col for col in ("COMMENT", "COLUMN_COMMENT") if col in df.columns), None)
511
+ schema_col = next(
512
+ (col for col in ("TABLE_SCHEMA", "SCHEMA_NAME") if col in df.columns), None
513
+ )
514
+ table_col = next(
515
+ (col for col in ("TABLE_NAME", "NAME") if col in df.columns), None
516
+ )
517
+ column_col = next(
518
+ (
519
+ col
520
+ for col in ("COLUMN_NAME", "NAME")
521
+ if col in df.columns and col != table_col
522
+ ),
523
+ None,
524
+ )
525
+ datatype_col = next(
526
+ (col for col in ("DATA_TYPE", "TYPE") if col in df.columns), None
527
+ )
528
+ comment_col = next(
529
+ (col for col in ("COMMENT", "COLUMN_COMMENT") if col in df.columns), None
530
+ )
493
531
 
494
532
  normalized = pd.DataFrame()
495
533
  normalized[_TABLE_SCHEMA_COL] = df[schema_col] if schema_col else table_schema
496
534
  normalized[_TABLE_NAME_COL] = df[table_col] if table_col else table_name
497
- normalized[_COLUMN_NAME_COL] = df[column_col] if column_col else df.index.astype(str)
535
+ normalized[_COLUMN_NAME_COL] = (
536
+ df[column_col] if column_col else df.index.astype(str)
537
+ )
498
538
  normalized[_DATATYPE_COL] = df[datatype_col] if datatype_col else ""
499
539
  normalized[_COLUMN_COMMENT_ALIAS] = df[comment_col] if comment_col else ""
500
540
  normalized[_TABLE_COMMENT_COL] = ""
@@ -552,6 +592,7 @@ def get_valid_schemas_tables_columns_df(
552
592
  if _TABLE_SCHEMA_COL in result.columns:
553
593
  result[_TABLE_SCHEMA_COL] = result[_TABLE_SCHEMA_COL].astype(str).str.upper()
554
594
  if _IS_PRIMARY_KEY_COL in result.columns:
595
+
555
596
  def _normalize_pk(value: Any) -> bool:
556
597
  if isinstance(value, bool):
557
598
  return value
@@ -617,10 +658,11 @@ def fetch_tables_views_in_schema(
617
658
  workspace_upper = workspace.upper()
618
659
  schema_upper = schema.upper()
619
660
 
620
- target = ""
621
661
  try:
622
662
  if workspace_upper and schema_upper:
623
- df = session.sql(f"SHOW TABLES IN {workspace_upper}.{schema_upper}").to_pandas()
663
+ df = session.sql(
664
+ f"SHOW TABLES IN {workspace_upper}.{schema_upper}"
665
+ ).to_pandas()
624
666
  else:
625
667
  df = session.sql("SHOW TABLES").to_pandas()
626
668
  except Exception as exc: # pragma: no cover
@@ -634,17 +676,27 @@ def fetch_tables_views_in_schema(
634
676
  df.columns = [str(col).upper() for col in df.columns]
635
677
  name_column = "TABLE_NAME" if "TABLE_NAME" in df.columns else df.columns[0]
636
678
  schema_column = next(
637
- (col for col in ("SCHEMA_NAME", "TABLE_SCHEMA", "NAMESPACE") if col in df.columns),
679
+ (
680
+ col
681
+ for col in ("SCHEMA_NAME", "TABLE_SCHEMA", "NAMESPACE")
682
+ if col in df.columns
683
+ ),
638
684
  None,
639
685
  )
640
686
  catalog_column = next(
641
- (col for col in ("CATALOG_NAME", "WORKSPACE_NAME", "TABLE_CATALOG") if col in df.columns),
687
+ (
688
+ col
689
+ for col in ("CATALOG_NAME", "WORKSPACE_NAME", "TABLE_CATALOG")
690
+ if col in df.columns
691
+ ),
642
692
  None,
643
693
  )
644
694
 
645
695
  results: List[str] = []
646
696
  for _, row in df.iterrows():
647
- if _value_is_true(row.get("IS_VIEW")) and not _value_is_true(row.get("IS_MATERIALIZED_VIEW")):
697
+ if _value_is_true(row.get("IS_VIEW")) and not _value_is_true(
698
+ row.get("IS_MATERIALIZED_VIEW")
699
+ ):
648
700
  continue
649
701
  if _value_is_true(row.get("IS_EXTERNAL")):
650
702
  continue
@@ -756,7 +808,11 @@ def fetch_yaml_names_in_stage(
756
808
  if stage.lower().startswith("volume:user://"):
757
809
  volume_body = stage[len("volume:") :]
758
810
  # Normalize relative directory
759
- relative = volume_body[len("user://") :] if volume_body.startswith("user://") else volume_body
811
+ relative = (
812
+ volume_body[len("user://") :]
813
+ if volume_body.startswith("user://")
814
+ else volume_body
815
+ )
760
816
  relative = relative.lstrip("~/")
761
817
  relative = relative.strip("/")
762
818
 
@@ -842,7 +898,9 @@ def create_table_in_schema(
842
898
  table_fqn: str,
843
899
  columns_schema: Dict[str, str],
844
900
  ) -> bool:
845
- fields = ", ".join(f"{_quote_identifier(name)} {dtype}" for name, dtype in columns_schema.items())
901
+ fields = ", ".join(
902
+ f"{_quote_identifier(name)} {dtype}" for name, dtype in columns_schema.items()
903
+ )
846
904
  query = f"CREATE TABLE IF NOT EXISTS {table_fqn} ({fields})"
847
905
  try:
848
906
  session.sql(query).collect()
@@ -20,7 +20,9 @@ _CONFIG_PATHS = [
20
20
  _ACTIVE_CONFIG_PATH: Optional[str] = None
21
21
 
22
22
 
23
- def _load_config_from_file() -> Tuple[Optional[Dict[str, str]], Dict[str, Dict[str, str]]]:
23
+ def _load_config_from_file() -> (
24
+ Tuple[Optional[Dict[str, str]], Dict[str, Dict[str, str]]]
25
+ ):
24
26
  global _ACTIVE_CONFIG_PATH
25
27
  _ACTIVE_CONFIG_PATH = None
26
28
  for path in _CONFIG_PATHS:
@@ -91,7 +93,10 @@ def _deep_lookup(mapping: Any, key: str) -> Optional[Any]:
91
93
  if isinstance(current, dict):
92
94
  for candidate_key, candidate_value in current.items():
93
95
  candidate_key_str = str(candidate_key).lower()
94
- if candidate_key_str == normalized_key and candidate_value not in (None, ""):
96
+ if candidate_key_str == normalized_key and candidate_value not in (
97
+ None,
98
+ "",
99
+ ):
95
100
  return candidate_value
96
101
  if isinstance(candidate_value, (dict, list)):
97
102
  queue.append(candidate_value)
@@ -8,10 +8,10 @@ import sqlglot.expressions
8
8
  from loguru import logger
9
9
  from sqlglot import Dialect
10
10
 
11
- from semantic_model_generator.protos import semantic_model_pb2
12
11
  from semantic_model_generator.clickzetta_utils.clickzetta_connector import (
13
12
  OBJECT_DATATYPES,
14
13
  )
14
+ from semantic_model_generator.protos import semantic_model_pb2
15
15
 
16
16
  _SQLGLOT_CLICKZETTA_KEY = "".join(["snow", "flake"])
17
17
  ClickzettaDialect = Dialect.get_or_raise(_SQLGLOT_CLICKZETTA_KEY)