@clickzetta/cz-cli-darwin-arm64 0.5.16 → 0.5.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/lakehouse-doc-en/SKILL.md +6 -11
  3. package/bin/skills/lakehouse-doc-en/references/AIGateway.md +58 -13
  4. package/bin/skills/lakehouse-doc-en/references/Computation.md +1 -1
  5. package/bin/skills/lakehouse-doc-en/references/DataSource_Amazon_DocumentDB.md +3 -1
  6. package/bin/skills/lakehouse-doc-en/references/Foreach.md +14 -14
  7. package/bin/skills/lakehouse-doc-en/references/JDBC-Driver.md +0 -1
  8. package/bin/skills/lakehouse-doc-en/references/LakehouseAI-overview.md +21 -8
  9. package/bin/skills/lakehouse-doc-en/references/LakehouseDataGPT-tour.md +4 -9
  10. package/bin/skills/lakehouse-doc-en/references/LakehouseStudio-tour.md +14 -19
  11. package/bin/skills/lakehouse-doc-en/references/Lakehouse_Zilliz_MakeDataReadyforBIandAI.md +1 -1
  12. package/bin/skills/lakehouse-doc-en/references/Logstash.md +3 -3
  13. package/bin/skills/lakehouse-doc-en/references/Migrate_Spark_DataEngineeringBestPractices_Project_to_Lakehouse.md +1 -1
  14. package/bin/skills/lakehouse-doc-en/references/Notebook.md +17 -17
  15. package/bin/skills/lakehouse-doc-en/references/RemoteFunction-as-udf.md +14 -14
  16. package/bin/skills/lakehouse-doc-en/references/SQL_External_Catalog_Guide.md +1 -9
  17. package/bin/skills/lakehouse-doc-en/references/SUMMARY.md +59 -29
  18. package/bin/skills/lakehouse-doc-en/references/WINDOWFUNCTION.md +99 -57
  19. package/bin/skills/lakehouse-doc-en/references/Zettapark_Data_Engineering_Demo.md +1 -1
  20. package/bin/skills/lakehouse-doc-en/references/access-control-configuration.md +1 -8
  21. package/bin/skills/lakehouse-doc-en/references/aigw-2026-2-5-1.0.md +16 -0
  22. package/bin/skills/lakehouse-doc-en/references/aigw-2026-3-29-1.0.2.md +14 -0
  23. package/bin/skills/lakehouse-doc-en/references/aigw-2026-3-8-1.0.1.md +16 -0
  24. package/bin/skills/lakehouse-doc-en/references/aigw-2026-4-28-1.1.md +29 -0
  25. package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-12-1.1.1.md +18 -0
  26. package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-15-1.2.md +9 -0
  27. package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-21-1.3.md +9 -0
  28. package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-28-1.4.md +10 -0
  29. package/bin/skills/lakehouse-doc-en/references/aigw-2026-6-3-1.5.md +9 -0
  30. package/bin/skills/lakehouse-doc-en/references/alicloud-arn-externalid.md +0 -5
  31. package/bin/skills/lakehouse-doc-en/references/answer-accuracy-improve.md +120 -103
  32. package/bin/skills/lakehouse-doc-en/references/application-list.md +1 -3
  33. package/bin/skills/lakehouse-doc-en/references/approval-list.md +16 -17
  34. package/bin/skills/lakehouse-doc-en/references/batch-load-parquet-file-into-lakehouse.md +1 -1
  35. package/bin/skills/lakehouse-doc-en/references/batch_sync.md +9 -9
  36. package/bin/skills/lakehouse-doc-en/references/batch_sync_Sop.md +2 -2
  37. package/bin/skills/lakehouse-doc-en/references/batchloadparquetfileintoLakehouse.md +1 -1
  38. package/bin/skills/lakehouse-doc-en/references/bulkloadv1-python-sdk.md +3 -3
  39. package/bin/skills/lakehouse-doc-en/references/chart-auto-refresh-guide.md +12 -6
  40. package/bin/skills/lakehouse-doc-en/references/clickzetta-sample-data.md +3 -3
  41. package/bin/skills/lakehouse-doc-en/references/code_approval.md +1 -5
  42. package/bin/skills/lakehouse-doc-en/references/composite_task.md +31 -42
  43. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_environment_and_data_generate.md +6 -9
  44. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_javasdk_bulkload_realtime.md +4 -10
  45. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_kafka_realtime_sync.md +1 -10
  46. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_local_file_into_table_by_studio.md +0 -6
  47. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_batchload_public_network.md +0 -5
  48. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_python_node.md +2 -7
  49. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_realtime_cdc_public_network.md +13 -18
  50. package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_sql_insert.md +0 -1
  51. package/bin/skills/lakehouse-doc-en/references/concepts.md +1 -1
  52. package/bin/skills/lakehouse-doc-en/references/config-datasource.md +5 -7
  53. package/bin/skills/lakehouse-doc-en/references/connect-with-cli.md +116 -72
  54. package/bin/skills/lakehouse-doc-en/references/connect-with-cz-cli.md +151 -0
  55. package/bin/skills/lakehouse-doc-en/references/continue-job.md +9 -17
  56. package/bin/skills/lakehouse-doc-en/references/create-api-connection.md +315 -286
  57. package/bin/skills/lakehouse-doc-en/references/create-catalog-connection.md +1 -0
  58. package/bin/skills/lakehouse-doc-en/references/create-dynamic-table.md +4 -4
  59. package/bin/skills/lakehouse-doc-en/references/create-external-catalog.md +85 -22
  60. package/bin/skills/lakehouse-doc-en/references/create-table-ddl.md +45 -0
  61. package/bin/skills/lakehouse-doc-en/references/creating_alicloud_privatelinkendpoint.md +4 -6
  62. package/bin/skills/lakehouse-doc-en/references/creating_alicloud_privatelinkservice.md +4 -7
  63. package/bin/skills/lakehouse-doc-en/references/creating_tencentcloud_privatelinkendpoint.md +2 -7
  64. package/bin/skills/lakehouse-doc-en/references/creating_tencentcloud_privatelinkservice.md +1 -5
  65. package/bin/skills/lakehouse-doc-en/references/cz-cli-agent.md +15 -10
  66. package/bin/skills/lakehouse-doc-en/references/cz-cli-datasource.md +0 -8
  67. package/bin/skills/lakehouse-doc-en/references/cz-cli-sql.md +2 -45
  68. package/bin/skills/lakehouse-doc-en/references/cz-cli.md +53 -42
  69. package/bin/skills/lakehouse-doc-en/references/dashboard-version-management-guide.md +12 -4
  70. package/bin/skills/lakehouse-doc-en/references/data-integration-intro.md +1 -1
  71. package/bin/skills/lakehouse-doc-en/references/data-integration.md +29 -27
  72. package/bin/skills/lakehouse-doc-en/references/data-load-summary.md +3 -3
  73. package/bin/skills/lakehouse-doc-en/references/data-quality.md +25 -25
  74. package/bin/skills/lakehouse-doc-en/references/data-sharing.md +31 -54
  75. package/bin/skills/lakehouse-doc-en/references/data-sources.md +45 -45
  76. package/bin/skills/lakehouse-doc-en/references/data_catalog.md +23 -25
  77. package/bin/skills/lakehouse-doc-en/references/data_privacy.md +5 -2
  78. package/bin/skills/lakehouse-doc-en/references/data_sharing_between_accounts_guide.md +0 -4
  79. package/bin/skills/lakehouse-doc-en/references/data_visualization.md +4 -15
  80. package/bin/skills/lakehouse-doc-en/references/dataagent.md +39 -7
  81. package/bin/skills/lakehouse-doc-en/references/databricks-delta-to-lakehouse-migration.md +168 -0
  82. package/bin/skills/lakehouse-doc-en/references/databricks-dlt-to-lakehouse-migration.md +331 -0
  83. package/bin/skills/lakehouse-doc-en/references/databricks-external-catalog-practice.md +367 -0
  84. package/bin/skills/lakehouse-doc-en/references/databricks-jobs-to-studio-migration.md +199 -0
  85. package/bin/skills/lakehouse-doc-en/references/databricks-notebook-to-studio-migration.md +350 -0
  86. package/bin/skills/lakehouse-doc-en/references/databricks-uc-governance-to-lakehouse-migration.md +327 -0
  87. package/bin/skills/lakehouse-doc-en/references/datagpt-model-config.md +34 -0
  88. package/bin/skills/lakehouse-doc-en/references/datagpt_data_source.md +50 -37
  89. package/bin/skills/lakehouse-doc-en/references/datagpt_introduction.md +55 -79
  90. package/bin/skills/lakehouse-doc-en/references/datagpt_quickstart.md +50 -64
  91. package/bin/skills/lakehouse-doc-en/references/datalake-acceleration.md +75 -2
  92. package/bin/skills/lakehouse-doc-en/references/dbt-databricks-to-clickzetta-migration.md +242 -0
  93. package/bin/skills/lakehouse-doc-en/references/dynamic-mask.md +30 -30
  94. package/bin/skills/lakehouse-doc-en/references/dynamic-table-bestpractice.md +1 -1
  95. package/bin/skills/lakehouse-doc-en/references/dynamic-table-introduce.md +1 -1
  96. package/bin/skills/lakehouse-doc-en/references/dynamic_table_summary.md +1 -1
  97. package/bin/skills/lakehouse-doc-en/references/eco_integration/streamlit.md +1 -1
  98. package/bin/skills/lakehouse-doc-en/references/eco_integration/superset.md +1 -1
  99. package/bin/skills/lakehouse-doc-en/references/ecosystem-all.md +1 -3
  100. package/bin/skills/lakehouse-doc-en/references/ecosystem.md +145 -0
  101. package/bin/skills/lakehouse-doc-en/references/external-catalog-summary.md +33 -38
  102. package/bin/skills/lakehouse-doc-en/references/external-function-combo-practice.md +466 -0
  103. package/bin/skills/lakehouse-doc-en/references/f6fc6447ee.md +7 -9
  104. package/bin/skills/lakehouse-doc-en/references/federation-query.md +56 -6
  105. package/bin/skills/lakehouse-doc-en/references/finebi-mysql.md +2 -0
  106. package/bin/skills/lakehouse-doc-en/references/get-started-with-sample-data.md +10 -11
  107. package/bin/skills/lakehouse-doc-en/references/gitfolder.md +2 -3
  108. package/bin/skills/lakehouse-doc-en/references/grant-privileges.md +2 -0
  109. package/bin/skills/lakehouse-doc-en/references/iceberg-rest-catalog-databricks.md +166 -0
  110. package/bin/skills/lakehouse-doc-en/references/ide.md +1 -1
  111. package/bin/skills/lakehouse-doc-en/references/if_else_task.md +59 -57
  112. package/bin/skills/lakehouse-doc-en/references/input_output.md +10 -7
  113. package/bin/skills/lakehouse-doc-en/references/jobprofile-bestpractices.md +60 -64
  114. package/bin/skills/lakehouse-doc-en/references/kafka-connection.md +0 -1
  115. package/bin/skills/lakehouse-doc-en/references/key-concepts.md +146 -117
  116. package/bin/skills/lakehouse-doc-en/references/lakehouse-ai-gateway-cz-cli.md +317 -0
  117. package/bin/skills/lakehouse-doc-en/references/lakehouse-ai-sql-analysis.md +345 -0
  118. package/bin/skills/lakehouse-doc-en/references/lakehouse-dqc-guide.md +300 -0
  119. package/bin/skills/lakehouse-doc-en/references/lakehouse-medallion-sql-dt-guide.md +543 -0
  120. package/bin/skills/lakehouse-doc-en/references/lakehouse-multi-cloud-acceleration.md +274 -0
  121. package/bin/skills/lakehouse-doc-en/references/lakehouse-multimodal-ai-pipeline.md +198 -0
  122. package/bin/skills/lakehouse-doc-en/references/lakehouse-quick-experience_guide.md +49 -52
  123. package/bin/skills/lakehouse-doc-en/references/lakehouse-volume-pipe-acceleration-guide.md +380 -0
  124. package/bin/skills/lakehouse-doc-en/references/langchain-plug-installation.md +1 -1
  125. package/bin/skills/lakehouse-doc-en/references/management.md +4 -9
  126. package/bin/skills/lakehouse-doc-en/references/medallion-lakehouse-from-scratch.md +2 -1
  127. package/bin/skills/lakehouse-doc-en/references/metrics_answer_build.md +58 -21
  128. package/bin/skills/lakehouse-doc-en/references/migrate-spark-data-engineering-best-practices-to-lakehouse.md +1 -1
  129. package/bin/skills/lakehouse-doc-en/references/mindsdb.md +1 -1
  130. package/bin/skills/lakehouse-doc-en/references/monitoring_and_alerting.md +65 -60
  131. package/bin/skills/lakehouse-doc-en/references/monitoring_item_specification.md +33 -33
  132. package/bin/skills/lakehouse-doc-en/references/multitable_batch_sync.md +16 -16
  133. package/bin/skills/lakehouse-doc-en/references/multitable_realtime_sync.md +65 -72
  134. package/bin/skills/lakehouse-doc-en/references/multitable_realtime_sync_sop.md +54 -52
  135. package/bin/skills/lakehouse-doc-en/references/navicat-mysql.md +2 -0
  136. package/bin/skills/lakehouse-doc-en/references/om-dynamic-table.md +71 -66
  137. package/bin/skills/lakehouse-doc-en/references/om-vcluster.md +2 -0
  138. package/bin/skills/lakehouse-doc-en/references/open-api-create-session.md +79 -0
  139. package/bin/skills/lakehouse-doc-en/references/open-api-generate-auth-token.md +63 -0
  140. package/bin/skills/lakehouse-doc-en/references/open-api-overview.md +96 -0
  141. package/bin/skills/lakehouse-doc-en/references/open-api-quick-start.md +286 -0
  142. package/bin/skills/lakehouse-doc-en/references/open-api-response-guide.md +264 -0
  143. package/bin/skills/lakehouse-doc-en/references/open-api-safe-question-poll.md +201 -0
  144. package/bin/skills/lakehouse-doc-en/references/open-api-text2insight-query.md +99 -0
  145. package/bin/skills/lakehouse-doc-en/references/open-api-text2insight-stop.md +74 -0
  146. package/bin/skills/lakehouse-doc-en/references/overview.md +6 -7
  147. package/bin/skills/lakehouse-doc-en/references/permission-application.md +5 -5
  148. package/bin/skills/lakehouse-doc-en/references/pipe-introduction.md +1 -0
  149. package/bin/skills/lakehouse-doc-en/references/pipe-kafka-table-stream.md +72 -70
  150. package/bin/skills/lakehouse-doc-en/references/pipe-kafka.md +105 -110
  151. package/bin/skills/lakehouse-doc-en/references/pipe-overview.md +40 -40
  152. package/bin/skills/lakehouse-doc-en/references/pipe-storage-object.md +43 -48
  153. package/bin/skills/lakehouse-doc-en/references/pipe-summary.md +14 -4
  154. package/bin/skills/lakehouse-doc-en/references/pipe-syntax.md +58 -151
  155. package/bin/skills/lakehouse-doc-en/references/practice_python_task.md +4 -4
  156. package/bin/skills/lakehouse-doc-en/references/pricing-ai-gateway.md +181 -0
  157. package/bin/skills/lakehouse-doc-en/references/pricing-lakehouse.md +316 -0
  158. package/bin/skills/lakehouse-doc-en/references/pricing.md +44 -288
  159. package/bin/skills/lakehouse-doc-en/references/private-link-general.md +0 -2
  160. package/bin/skills/lakehouse-doc-en/references/pyspark-to-zettapark-migration-f1.md +1 -1
  161. package/bin/skills/lakehouse-doc-en/references/python-igs.md +7 -3
  162. package/bin/skills/lakehouse-doc-en/references/python-sample-put-github-rt-events.md +1 -1
  163. package/bin/skills/lakehouse-doc-en/references/python-task.md +1 -1
  164. package/bin/skills/lakehouse-doc-en/references/python_reference/connector.md +3 -3
  165. package/bin/skills/lakehouse-doc-en/references/python_reference/connector_advanced.md +2 -2
  166. package/bin/skills/lakehouse-doc-en/references/python_reference/connector_examples.md +2 -2
  167. package/bin/skills/lakehouse-doc-en/references/python_sdk_guide.md +1 -1
  168. package/bin/skills/lakehouse-doc-en/references/python_shell_datasource.md +11 -9
  169. package/bin/skills/lakehouse-doc-en/references/quick_start_batch_sync_data.md +9 -18
  170. package/bin/skills/lakehouse-doc-en/references/quick_start_bi_analysis.md +8 -25
  171. package/bin/skills/lakehouse-doc-en/references/quick_start_create_workspace.md +4 -6
  172. package/bin/skills/lakehouse-doc-en/references/quick_start_data_quality.md +8 -8
  173. package/bin/skills/lakehouse-doc-en/references/quick_start_etl.md +16 -20
  174. package/bin/skills/lakehouse-doc-en/references/quick_start_monitoring_and_alerting.md +10 -18
  175. package/bin/skills/lakehouse-doc-en/references/quick_start_sql_query.md +7 -10
  176. package/bin/skills/lakehouse-doc-en/references/quick_start_upload_data.md +5 -7
  177. package/bin/skills/lakehouse-doc-en/references/quick_start_user_management.md +8 -8
  178. package/bin/skills/lakehouse-doc-en/references/quick_start_workspace.md +0 -5
  179. package/bin/skills/lakehouse-doc-en/references/quick_start_workspace_user.md +8 -8
  180. package/bin/skills/lakehouse-doc-en/references/quickstart.md +69 -56
  181. package/bin/skills/lakehouse-doc-en/references/quickstart_datashare_between_companies.md +0 -5
  182. package/bin/skills/lakehouse-doc-en/references/quickstart_envirment_for_team.md +0 -24
  183. package/bin/skills/lakehouse-doc-en/references/realtime-pipeline-selection-guide.md +1 -2
  184. package/bin/skills/lakehouse-doc-en/references/realtime-sales-dashboard-with-dynamic-table.md +3 -3
  185. package/bin/skills/lakehouse-doc-en/references/realtime_sync.md +0 -1
  186. package/bin/skills/lakehouse-doc-en/references/release-note-2026-05-19.md +5 -3
  187. package/bin/skills/lakehouse-doc-en/references/revoke-privileges.md +3 -1
  188. package/bin/skills/lakehouse-doc-en/references/roles.md +2 -3
  189. package/bin/skills/lakehouse-doc-en/references/row-filter.md +165 -0
  190. package/bin/skills/lakehouse-doc-en/references/row_level_permission.md +30 -19
  191. package/bin/skills/lakehouse-doc-en/references/scheduled_task.md +28 -21
  192. package/bin/skills/lakehouse-doc-en/references/security_overview.md +99 -21
  193. package/bin/skills/lakehouse-doc-en/references/set-command.md +1 -1
  194. package/bin/skills/lakehouse-doc-en/references/setup.md +13 -15
  195. package/bin/skills/lakehouse-doc-en/references/show-grants.md +1 -1
  196. package/bin/skills/lakehouse-doc-en/references/snowflake-dynamic-tables-to-lakehouse.md +2 -2
  197. package/bin/skills/lakehouse-doc-en/references/spark-connector-summary.md +1 -1
  198. package/bin/skills/lakehouse-doc-en/references/sql_functions/context_functions/current_vcluster.md +1 -1
  199. package/bin/skills/lakehouse-doc-en/references/sso-configuration.md +2 -2
  200. package/bin/skills/lakehouse-doc-en/references/streaming_pipeline_with_dynamic_table.md +0 -1
  201. package/bin/skills/lakehouse-doc-en/references/studio-incremental-sync-practice.md +27 -23
  202. package/bin/skills/lakehouse-doc-en/references/studio-shell-task.md +1 -1
  203. package/bin/skills/lakehouse-doc-en/references/supported-cloud-platforms.md +32 -0
  204. package/bin/skills/lakehouse-doc-en/references/table_rendering.md +18 -12
  205. package/bin/skills/lakehouse-doc-en/references/task-develop.md +89 -91
  206. package/bin/skills/lakehouse-doc-en/references/task_development.md +19 -17
  207. package/bin/skills/lakehouse-doc-en/references/task_group.md +16 -14
  208. package/bin/skills/lakehouse-doc-en/references/task_instance.md +21 -21
  209. package/bin/skills/lakehouse-doc-en/references/task_param.md +38 -35
  210. package/bin/skills/lakehouse-doc-en/references/task_param_reference.md +81 -79
  211. package/bin/skills/lakehouse-doc-en/references/task_scheduling_dependency.md +20 -21
  212. package/bin/skills/lakehouse-doc-en/references/tencentcloud_arn_and_externalid.md +1 -5
  213. package/bin/skills/lakehouse-doc-en/references/trial-account-quotas-and-limits.md +1 -3
  214. package/bin/skills/lakehouse-doc-en/references/tutorial_connect_to_lakehouse.md +69 -0
  215. package/bin/skills/lakehouse-doc-en/references/tutorials.md +4 -1
  216. package/bin/skills/lakehouse-doc-en/references/unique-key.md +167 -0
  217. package/bin/skills/lakehouse-doc-en/references/usageandbillingview.md +138 -0
  218. package/bin/skills/lakehouse-doc-en/references/use-dbt-dev.md +3 -3
  219. package/bin/skills/lakehouse-doc-en/references/use-java-sdk-realtime-uploaddata.md +1 -1
  220. package/bin/skills/lakehouse-doc-en/references/use-java-sdk-upload-data-local.md +3 -3
  221. package/bin/skills/lakehouse-doc-en/references/use-models.md +128 -0
  222. package/bin/skills/lakehouse-doc-en/references/use-mysql-client.md +81 -81
  223. package/bin/skills/lakehouse-doc-en/references/use-python-sdk-upload-data.md +10 -12
  224. package/bin/skills/lakehouse-doc-en/references/user-identification.md +2 -3
  225. package/bin/skills/lakehouse-doc-en/references/user_permission_grand_guide.md +1 -1
  226. package/bin/skills/lakehouse-doc-en/references/using-udf-in-dynamic-table.md +1 -1
  227. package/bin/skills/lakehouse-doc-en/references/vc_cache.md +18 -22
  228. package/bin/skills/lakehouse-doc-en/references/vcluster_size_description.md +33 -31
  229. package/bin/skills/lakehouse-doc-en/references/virtual-cluster.md +43 -45
  230. package/bin/skills/lakehouse-doc-en/references/web-job-history.md +94 -108
  231. package/bin/skills/lakehouse-doc-en/references/web_search.md +16 -7
  232. package/bin/skills/lakehouse-doc-en/references/zettapark-data-engineering-demo.md +1 -1
  233. package/bin/skills/lakehouse-doc-en/references/zettapark-dataframe-guide.md +144 -70
  234. package/bin/skills/lakehouse-doc-en/references/zettapark-dynamic-table-guide.md +2 -2
  235. package/bin/skills/lakehouse-doc-en/references/zettapark-etl-guide.md +73 -33
  236. package/bin/skills/lakehouse-doc-en/references/zettapark-feature-engineering.md +2 -2
  237. package/bin/skills/lakehouse-doc-en/references/zettapark-functions-guide.md +75 -46
  238. package/bin/skills/lakehouse-doc-en/references/zettapark-quick-start.md +2 -2
  239. package/bin/skills/lakehouse-doc-en/references/zettapark-stream-guide.md +4 -4
  240. package/bin/skills/lakehouse-doc-en/references/zettapark-volume-guide.md +93 -29
  241. package/package.json +1 -1
  242. package/bin/skills/lakehouse-doc-en/references/CLAUDE.md +0 -606
  243. package/bin/skills/lakehouse-doc-en/references/modelprice.md +0 -155
@@ -0,0 +1,543 @@
1
+ # Singdata Lakehouse Medallion Architecture in Practice: Pure SQL Dynamic Table Approach
2
+
3
+ The Medallion architecture (Bronze → Silver → Gold) is a data lake organization pattern popularized by Databricks. On Singdata Lakehouse, beyond implementing it with ZettaPark Python API, there is a cleaner alternative: **building all three layers declaratively using SQL Dynamic Tables**—no Python code required, no scheduling platform configuration needed, and all three layers automatically refresh incrementally based on dependency chains.
4
+
5
+ This article uses the NHL (National Hockey League) real-world dataset (10 tables, ~14 million rows) to fully demonstrate this approach.
6
+
7
+ > 💡 If you are familiar with Databricks Medallion but prefer not to write Python/ZettaPark, or want to manage data pipelines with pure SQL, this article is your reference. It complements the [ZettaPark migration approach](medallion-lakehouse-from-scratch.md), with the two covering different technical preferences.
8
+
9
+ ### Data Lake Acceleration Overview: Where This Article Fits
10
+
11
+ A typical data lake acceleration pipeline looks like: **Object storage files → Volume (mount) → Pipe (continuous ingestion) → Target table → Dynamic Table (incremental aggregation)**. The first two steps handle "automatic data loading," while this article focuses on the final step—cleansing, modeling, and aggregation after data is loaded, using Dynamic Tables to declaratively build the Bronze → Silver → Gold three-layer pipeline.
12
+
13
+ If you have not set up data ingestion yet, start with [Volume + Pipe End-to-End Practice](lakehouse-volume-pipe-acceleration-guide.md) to get file auto-loading working first. If your data is already in Lakehouse tables (like the NHL dataset in this article), start directly here.
14
+
15
+ ---
16
+
17
+ ## Why Use Dynamic Tables to Build Medallion
18
+
19
+ Traditional Medallion architecture typically relies on scheduling platforms (Airflow/Databricks Workflows) to execute Python Notebooks or SQL scripts sequentially. Dynamic Tables offer a different paradigm:
20
+
21
+ | Dimension | Traditional ETL Scheduling | Dynamic Table Approach |
22
+ |---|---|---|
23
+ | Coding style | Python/ZettaPark or SQL scripts | Pure SQL (`CREATE DYNAMIC TABLE ... AS SELECT`) |
24
+ | Scheduling config | Requires DAG and Cron configuration | Declarative `REFRESH INTERVAL`, system auto-schedules |
25
+ | Incremental computation | Manual incremental logic required | System CBO automatically detects incremental changes |
26
+ | Dependency management | Manual orchestration of upstream/downstream order | DT automatically determines refresh order by reference |
27
+ | Data lineage | Requires additional tools to track | `SHOW DYNAMIC TABLE REFRESH HISTORY` built-in |
28
+ | Code as assets | Notebooks/scripts scattered across management | Centralized in Studio, searchable, comparable, reusable |
29
+
30
+ The core difference: **you do not need to worry about "when to run" or "what to run"—you only need to declare "what result you want"**. The system handles computation orchestration, incremental detection, and parallel scheduling.
31
+
32
+ ---
33
+
34
+ ## Dataset Overview
35
+
36
+ NHL hockey data from the `nhl_game_data` schema (Bronze layer, already loaded):
37
+
38
+ | Table | Rows | Description |
39
+ |---|---|---|
40
+ | `game` | 26,305 | Main game table (matchups, scores, venues, seasons) |
41
+ | `player_info` | 3,925 | Player profiles (name, nationality, position, height/weight) |
42
+ | `team_info` | 33 | Team information (name, abbreviation) |
43
+ | `game_skater_stats` | 945,830 | Skater stats (goals, assists, shots, hits, +/-, etc.) |
44
+ | `game_goalie_stats` | 56,656 | Goalie stats (saves, goals against, save percentage) |
45
+ | `game_goals` | 148,992 | Goal details |
46
+ | `game_plays` | 5,050,529 | Game events (play-by-play) |
47
+ | `game_plays_players` | 7,586,604 | Player participation details per event |
48
+ | `game_penalties` | 247,828 | Penalty records |
49
+ | `game_teams_stats` | 52,610 | Team game-level statistics |
50
+
51
+ Data relationships: `game` is the core fact table, linked to other tables via `game_id`, `player_id`, and `team_id`. Covers 10 seasons from 2010 to 2020.
52
+
53
+ ---
54
+
55
+ ## Architecture Design
56
+
57
+ ```
58
+ Bronze (nhl_game_data.*) Silver (silver.*) DT Gold (gold.*) DT
59
+ ═══════════════════════ ══════════════════ ══════════════════
60
+ Raw data, zero transformation Cleansed + dimension joins Business metrics
61
+
62
+ game ─────────┐ ┌─ dim_team (33) ┌─ scoring_leaders
63
+ team_info ────┤ LEFT JOIN ──→├─ dim_player (3,925) ├─ player_career_stats
64
+ player_info ──┘ ├─ fact_skater_stats ├─ team_season_summary
65
+ skater_stats ── LEFT JOIN ──→ └─ fact_goalie_stats ├─ goalie_season_rankings
66
+ goalie_stats ── LEFT JOIN ──→ └─ team_home_away_split
67
+ ```
68
+
69
+ Three-layer responsibilities:
70
+
71
+ | Layer | Schema | Table Type | Responsibility |
72
+ |---|---|---|---|
73
+ | **Bronze** | `nhl_game_data` | Regular table | Raw data, no transformation |
74
+ | **Silver** | `silver` | Dynamic Table | JOIN dimension tables for names, cleanse field types (STRING→INT), standardize |
75
+ | **Gold** | `gold` | Dynamic Table | Aggregated metrics: top scorers, team records, goalie rankings, career stats |
76
+
77
+ > ⚠️ Silver and Gold both use Dynamic Tables; **materialized views are not recommended**. DT supports incremental refresh and Time Travel; materialized views do not.
78
+
79
+ ---
80
+
81
+ ## Implementation Steps
82
+
83
+ ### Prerequisites
84
+
85
+ - Virtual Cluster available (use `DEFAULT`, GP type, Serverless on-demand wake-up)
86
+ - Bronze data loaded (`nhl_game_data.*` 10 tables)
87
+ - Permissions for CREATE SCHEMA / CREATE DYNAMIC TABLE
88
+
89
+ ### Step 1: Create Schemas
90
+
91
+ Use separate schemas to physically isolate each layer:
92
+
93
+ ```sql
94
+ CREATE SCHEMA IF NOT EXISTS silver COMMENT 'Medallion Silver cleansed layer';
95
+ CREATE SCHEMA IF NOT EXISTS gold COMMENT 'Medallion Gold aggregated metrics layer';
96
+ ```
97
+
98
+ ### Step 2: Silver Layer — Dimension Tables
99
+
100
+ The simplest DT: directly filter/transform columns from Bronze tables. These two tables are small (33 rows and 3,925 rows), so even FULL refreshes are effortless.
101
+
102
+ ```sql
103
+ -- Team dimension
104
+ CREATE OR REPLACE DYNAMIC TABLE silver.dim_team
105
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
106
+ COMMENT 'Silver team dimension table'
107
+ AS
108
+ SELECT
109
+ team_id,
110
+ franchiseid,
111
+ shortname,
112
+ teamname,
113
+ abbreviation,
114
+ link
115
+ FROM nhl_game_data.team_info;
116
+
117
+ -- Player dimension (standardized + full name column added)
118
+ CREATE OR REPLACE DYNAMIC TABLE silver.dim_player
119
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
120
+ COMMENT 'Silver player dimension table — standardized fields + full name'
121
+ AS
122
+ SELECT
123
+ player_id,
124
+ firstname,
125
+ lastname,
126
+ CONCAT(firstname, ' ', lastname) AS full_name,
127
+ nationality,
128
+ birthcity,
129
+ primaryposition AS position,
130
+ birthdate,
131
+ height,
132
+ height_cm,
133
+ CAST(NULLIF(REGEXP_REPLACE(weight, ',', ''), '') AS INT) AS weight_kg,
134
+ shootscatches
135
+ FROM nhl_game_data.player_info;
136
+ ```
137
+
138
+ > **Why use `REGEXP_REPLACE(weight, ',', '')`?** In NHL raw data, numeric fields (such as hits, weight) may contain thousands separators (e.g., "1,234"). Direct CAST would throw an error. Removing the comma before casting to INT is a necessary cleansing step.
139
+
140
+ ### Step 3: Silver Layer — Fact Tables
141
+
142
+ The core work of fact tables: **JOIN dimension tables to resolve names + type standardization**. Using skater stats as an example:
143
+
144
+ ```sql
145
+ CREATE OR REPLACE DYNAMIC TABLE silver.fact_skater_stats
146
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
147
+ COMMENT 'Silver skater stats fact table — joined with player name + team name + season'
148
+ AS
149
+ SELECT
150
+ s.game_id,
151
+ s.player_id,
152
+ p.full_name AS player_name,
153
+ p.position,
154
+ s.team_id,
155
+ t.teamname AS team_name,
156
+ t.abbreviation AS team_abbr,
157
+ g.season,
158
+ g.date_time_gmt AS game_date,
159
+ s.timeonice,
160
+ s.goals,
161
+ s.assists,
162
+ s.goals + s.assists AS points, -- computed field: points
163
+ s.shots,
164
+ CAST(NULLIF(REGEXP_REPLACE(s.hits, ',', ''), '') AS INT) AS hits,
165
+ s.powerplaygoals,
166
+ s.penaltyminutes,
167
+ s.plusminus,
168
+ s.eventimeonice,
169
+ s.powerplaytimeonice
170
+ FROM nhl_game_data.game_skater_stats s
171
+ LEFT JOIN nhl_game_data.game g
172
+ ON s.game_id = g.game_id
173
+ LEFT JOIN silver.dim_player p
174
+ ON s.player_id = p.player_id
175
+ LEFT JOIN silver.dim_team t
176
+ ON s.team_id = t.team_id;
177
+ ```
178
+
179
+ > ⚠️ **The Silver fact table references Silver dimension tables** (`silver.dim_player`, `silver.dim_team`). This means the system refreshes dimension tables first, then fact tables—DT handles the dependency chain automatically.
180
+
181
+ Goalie stats fact table follows the same pattern, with additional save percentage calculation:
182
+
183
+ ```sql
184
+ CREATE OR REPLACE DYNAMIC TABLE silver.fact_goalie_stats
185
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
186
+ COMMENT 'Silver goalie stats fact table — includes save percentage calculation'
187
+ AS
188
+ SELECT
189
+ gs.game_id,
190
+ gs.player_id,
191
+ p.full_name AS player_name,
192
+ t.teamname AS team_name,
193
+ t.abbreviation AS team_abbr,
194
+ g.season,
195
+ g.date_time_gmt AS game_date,
196
+ gs.timeonice,
197
+ gs.shots AS shots_faced,
198
+ gs.saves,
199
+ CASE WHEN gs.shots > 0
200
+ THEN ROUND(gs.saves * 1.0 / gs.shots, 3)
201
+ ELSE NULL
202
+ END AS save_pct, -- computed field: save percentage
203
+ gs.decision
204
+ FROM nhl_game_data.game_goalie_stats gs
205
+ LEFT JOIN nhl_game_data.game g
206
+ ON gs.game_id = g.game_id
207
+ LEFT JOIN silver.dim_player p
208
+ ON gs.player_id = p.player_id
209
+ LEFT JOIN silver.dim_team t
210
+ ON gs.team_id = t.team_id;
211
+ ```
212
+
213
+ ### Step 4: Initial Refresh of Silver Layer
214
+
215
+ After DT creation, only the computation logic is defined—there is no data yet. You need to manually trigger the first refresh:
216
+
217
+ ```sql
218
+ REFRESH DYNAMIC TABLE silver.dim_team;
219
+ REFRESH DYNAMIC TABLE silver.dim_player;
220
+ REFRESH DYNAMIC TABLE silver.fact_skater_stats;
221
+ REFRESH DYNAMIC TABLE silver.fact_goalie_stats;
222
+ ```
223
+
224
+ > 💡 Refresh dimension tables first, then fact tables—since fact tables reference dimension tables. Although order does not matter when executing manually (the system waits for dependencies to be ready), following the dependency order is recommended for faster initial completion.
225
+
226
+ ### Step 5: Gold Layer — Aggregated Metrics
227
+
228
+ The Gold layer reads data from the Silver layer and uses aggregate functions to generate business metrics. All tables use a `1 DAY` refresh interval (T+1 scenario).
229
+
230
+ #### Top Scorers: TOP 20 Scorers Per Season
231
+
232
+ Use the `RANK()` window function to rank by season:
233
+
234
+ ```sql
235
+ CREATE OR REPLACE DYNAMIC TABLE gold.scoring_leaders
236
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
237
+ COMMENT 'Gold top 20 scorers per season — ranked by points (goals + assists)'
238
+ AS
239
+ SELECT season, rank, player_id, player_name, position, team_abbr,
240
+ games_played, goals, assists, points,
241
+ ROUND(points * 1.0 / games_played, 2) AS pts_per_game
242
+ FROM (
243
+ SELECT
244
+ season, player_id, player_name, position, team_abbr,
245
+ COUNT(*) AS games_played,
246
+ SUM(goals) AS goals,
247
+ SUM(assists) AS assists,
248
+ SUM(points) AS points,
249
+ RANK() OVER (PARTITION BY season ORDER BY SUM(points) DESC) AS rank
250
+ FROM silver.fact_skater_stats
251
+ GROUP BY season, player_id, player_name, position, team_abbr
252
+ ) t
253
+ WHERE rank <= 20;
254
+ ```
255
+
256
+ **Validation results** (2019-20 season):
257
+
258
+ | rank | player | team | goals | assists | points |
259
+ |---|---|---|---|---|---|
260
+ | 1 | Nikita Kucherov | TBL | 160 | 316 | 476 |
261
+ | 2 | Nathan MacKinnon | COL | 176 | 296 | 472 |
262
+ | 3 | Leon Draisaitl | EDM | 181 | 274 | 455 |
263
+ | 4 | David Pastrnak | BOS | 204 | 216 | 420 |
264
+ | 5 | Connor McDavid | EDM | 153 | 262 | 415 |
265
+
266
+ > ✅ Rankings match NHL official records, data accuracy validation passed.
267
+
268
+ #### Team Season Records
269
+
270
+ Bronze data only has a home/away team perspective per game. Each game needs to be expanded into two rows (one for home team, one for away team), then aggregated by team and season. This is implemented with `UNION ALL` + `CASE WHEN`:
271
+
272
+ ```sql
273
+ CREATE OR REPLACE DYNAMIC TABLE gold.team_season_summary
274
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
275
+ COMMENT 'Gold team season records — wins/losses/goals/goals-against/points'
276
+ AS
277
+ SELECT
278
+ g.season, g.team_id,
279
+ t.teamname AS team_name,
280
+ t.abbreviation AS team_abbr,
281
+ COUNT(*) AS games_played,
282
+ SUM(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 1
283
+ WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 1
284
+ ELSE 0 END) AS wins,
285
+ SUM(CASE WHEN g.side = 'home' AND g.outcome LIKE 'away win%' THEN 1
286
+ WHEN g.side = 'away' AND g.outcome LIKE 'home win%' THEN 1
287
+ ELSE 0 END) AS losses,
288
+ SUM(CASE WHEN g.side = 'home' THEN g.home_goals
289
+ ELSE g.away_goals END) AS goals_for,
290
+ SUM(CASE WHEN g.side = 'home' THEN g.away_goals
291
+ ELSE g.home_goals END) AS goals_against,
292
+ SUM(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 2
293
+ WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 2
294
+ ELSE 0 END) AS points
295
+ FROM (
296
+ SELECT season, home_team_id AS team_id, outcome,
297
+ home_goals, away_goals, 'home' AS side
298
+ FROM nhl_game_data.game
299
+ UNION ALL
300
+ SELECT season, away_team_id AS team_id, outcome,
301
+ home_goals, away_goals, 'away' AS side
302
+ FROM nhl_game_data.game
303
+ ) g
304
+ LEFT JOIN silver.dim_team t ON g.team_id = t.team_id
305
+ GROUP BY g.season, g.team_id, t.teamname, t.abbreviation;
306
+ ```
307
+
308
+ > ⚠️ **Note**: An early version used `outcome LIKE '%win%'` to match wins, but this caused the away team row to also be counted as a win when the home team won. You must cross-match `side` and `outcome`: home team rows only match `'home win%'`, and away team rows only match `'away win%'`.
309
+
310
+ **Validation results** (2019-20 season TOP 5):
311
+
312
+ | team | games | wins | losses | points |
313
+ |---|---|---|---|---|
314
+ | Lightning (TBL) | 190 | 122 | 68 | 244 |
315
+ | Stars (DAL) | 192 | 104 | 88 | 208 |
316
+ | Golden Knights (VGK) | 182 | 102 | 80 | 204 |
317
+ | Avalanche (COL) | 170 | 102 | 68 | 204 |
318
+ | Flyers (PHI) | 170 | 102 | 68 | 204 |
319
+
320
+ #### Goalie Season Rankings + Player Career Stats + Home/Away Split
321
+
322
+ Full DDL is in the appendix. The core pattern is the same: aggregate from Silver layer → `RANK() OVER (PARTITION BY season ...)` → filter TOP N.
323
+
324
+ ### Step 6: Validate the Full Pipeline
325
+
326
+ ```sql
327
+ -- Row count comparison across layers
328
+ SELECT 'Bronze game' AS layer, COUNT(*) FROM nhl_game_data.game
329
+ UNION ALL SELECT 'Silver dim_team', COUNT(*) FROM silver.dim_team
330
+ UNION ALL SELECT 'Silver fact_skater', COUNT(*) FROM silver.fact_skater_stats
331
+ UNION ALL SELECT 'Gold scoring_leaders', COUNT(*) FROM gold.scoring_leaders
332
+ UNION ALL SELECT 'Gold team_season', COUNT(*) FROM gold.team_season_summary;
333
+
334
+ -- View DT refresh history
335
+ SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'scoring_leaders';
336
+ ```
337
+
338
+ **Complete validation results:**
339
+
340
+ | Layer | Table | Rows | Refresh Mode | Status |
341
+ |---|---|---|---|---|
342
+ | Silver | dim_team | 33 | FULL | ✅ Matches Bronze |
343
+ | Silver | dim_player | 3,925 | FULL | ✅ Matches Bronze |
344
+ | Silver | fact_skater_stats | 1,130,682 | FULL | ✅ Includes player_name/team_name/points |
345
+ | Silver | fact_goalie_stats | 67,642 | FULL | ✅ Includes computed save_pct |
346
+ | Gold | scoring_leaders | 399 | FULL | ✅ TOP 20 per season |
347
+ | Gold | player_career_stats | 3,353 | FULL | ✅ Career summary |
348
+ | Gold | team_season_summary | 580 | FULL | ✅ 33 teams × 18 seasons |
349
+ | Gold | goalie_season_rankings | 294 | FULL | ✅ TOP 15 per season |
350
+ | Gold | team_home_away_split | 580 | FULL | ✅ Home/away split |
351
+
352
+ > 💡 **Why all FULL?** On the first refresh there is no incremental baseline, so DT must perform a full scan of the source tables to establish the initial state. After Bronze layer receives new data, DT will automatically switch to INCREMENTAL mode and process only the changed parts. Source tables need `change_tracking` enabled to support incremental refresh (`ALTER TABLE table_name SET PROPERTIES ('change_tracking' = 'true')`).
353
+
354
+ ---
355
+
356
+ ## Design Principles
357
+
358
+ ### 1. Cross-Layer Reference Rules
359
+
360
+ | Reference Direction | Allowed | Example |
361
+ |---|---|---|
362
+ | Silver → Bronze | ✅ | `FROM nhl_game_data.game` |
363
+ | Gold → Silver | ✅ | `FROM silver.fact_skater_stats` |
364
+ | Gold → Bronze | ⚠️ Not recommended | Should access indirectly through Silver layer |
365
+ | Gold → Gold | ⚠️ Use with caution | Only for multi-level aggregation |
366
+ | Bronze → Silver | ❌ Forbidden | Lower layers should not depend on upper layers |
367
+
368
+ ### 2. LEFT JOIN Filter Conditions Must Go in ON Clause
369
+
370
+ ```sql
371
+ -- ❌ Wrong: WHERE filter degrades LEFT JOIN to INNER JOIN
372
+ SELECT * FROM skater_stats s
373
+ LEFT JOIN team_info t ON s.team_id = t.team_id
374
+ WHERE t.abbreviation = 'TBL';
375
+
376
+ -- ✅ Correct: filter condition in ON clause
377
+ SELECT * FROM skater_stats s
378
+ LEFT JOIN team_info t
379
+ ON s.team_id = t.team_id AND t.abbreviation = 'TBL';
380
+ ```
381
+
382
+ ### 3. First Refresh Baseline Time
383
+
384
+ `REFRESH INTERVAL 1 DAY` calculates the next trigger based on creation time and does not align to clock hours. It is recommended to immediately execute `REFRESH DYNAMIC TABLE` after creation to manually trigger the first refresh and reset the baseline time:
385
+
386
+ ```sql
387
+ CREATE DYNAMIC TABLE gold.scoring_leaders ...;
388
+ REFRESH DYNAMIC TABLE gold.scoring_leaders;
389
+ ```
390
+
391
+ ### 4. String Cleansing
392
+
393
+ When raw data comes from external systems, numeric fields may contain non-standard characters:
394
+
395
+ ```sql
396
+ CAST(NULLIF(REGEXP_REPLACE(hits, ',', ''), '') AS INT)
397
+ ```
398
+
399
+ Three-step cleansing: remove commas → NULLIF empty string → CAST to target type. `NULLIF` prevents CAST failures caused by empty strings.
400
+
401
+ ---
402
+
403
+ ## Cost Analysis
404
+
405
+ | Layer | DT Count | Refresh Frequency | Estimated CRU |
406
+ |---|---|---|---|
407
+ | Silver | 4 | 1 DAY | Low (full refresh, but small data volume) |
408
+ | Gold | 5 | 1 DAY | Medium (involves aggregation, ~14M row scan) |
409
+
410
+ All use GP type Virtual Cluster (`DEFAULT`), Serverless on-demand billing. In T+1 scenarios with only one refresh per day, this is lower cost than traditional hourly ETL.
411
+
412
+ > 💡 To reduce Gold layer costs, infrequently used metrics (such as `goalie_season_rankings`, `team_home_away_split`) can be set to `7 DAY` refresh frequency.
413
+
414
+ ---
415
+
416
+ ## Comparison with ZettaPark Approach
417
+
418
+ | | ZettaPark Approach | Pure SQL DT Approach (this article) |
419
+ |---|---|---|
420
+ | Target audience | Python developers, Data Scientists | SQL developers, Data Analysts |
421
+ | Code volume | Python scripts + Spark API | Pure SQL (DDL) |
422
+ | Scheduling | Requires external scheduling (Studio/Notebook) | DT auto-refresh, no scheduling needed |
423
+ | Incremental computation | Manual CDC management required | System handles automatically |
424
+ | Flexibility | High (Python can call any library) | Medium (within SQL expression capabilities) |
425
+ | Learning curve | Pandas/PySpark/ZettaPark | Pure SQL |
426
+ | Use cases | Complex transformations, ML feature engineering, external API calls | Standard ETL, aggregation, JOINs, window functions |
427
+
428
+ **Both approaches coexist without conflict**: use ZettaPark for complex cleansing, use DT for aggregated metrics, leveraging the strengths of each within the same Medallion architecture.
429
+
430
+ ---
431
+
432
+ ## Notes
433
+
434
+ | Note | Description |
435
+ |---|---|
436
+ | Bronze data changes trigger DT automatically | All 9 DTs in the pipeline refresh in dependency order, no manual trigger needed |
437
+ | DT does not support ALTER to modify SQL | Use `CREATE OR REPLACE` to rebuild |
438
+ | Virtual Cluster must be GP type | AP type does not support small file merging, queries slow down over time |
439
+ | Silver fact tables reference Silver dimension tables | System automatically ensures dimension tables refresh first |
440
+ | String numeric fields need cleansing | Remove commas → NULLIF → CAST, three steps |
441
+ | UNION ALL row expansion requires careful business logic | When splitting home/away teams, win/loss determination must cross-match side and outcome |
442
+ | Manual REFRESH required after initial creation | `REFRESH INTERVAL` does not immediately trigger the first computation |
443
+
444
+ ---
445
+
446
+ ## Appendix: Complete Gold Layer DDL
447
+
448
+ ### Player Career Stats
449
+
450
+ ```sql
451
+ CREATE OR REPLACE DYNAMIC TABLE gold.player_career_stats
452
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
453
+ COMMENT 'Gold player career overview — all-season totals + per-game efficiency'
454
+ AS
455
+ SELECT
456
+ player_id, player_name, position,
457
+ COUNT(*) AS games_played,
458
+ SUM(goals) AS total_goals,
459
+ SUM(assists) AS total_assists,
460
+ SUM(points) AS total_points,
461
+ ROUND(SUM(points) * 1.0 / COUNT(*), 2) AS pts_per_game,
462
+ ROUND(SUM(goals) * 1.0 / NULLIF(SUM(shots), 0), 3) AS shooting_pct,
463
+ AVG(timeonice) AS avg_timeonice_sec,
464
+ SUM(penaltyminutes) AS total_pim,
465
+ AVG(plusminus) AS avg_plusminus
466
+ FROM silver.fact_skater_stats
467
+ GROUP BY player_id, player_name, position;
468
+ ```
469
+
470
+ ### Goalie Season Rankings
471
+
472
+ ```sql
473
+ CREATE OR REPLACE DYNAMIC TABLE gold.goalie_season_rankings
474
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
475
+ COMMENT 'Gold goalie season rankings TOP 15 — ranked by wins'
476
+ AS
477
+ SELECT season, rank, player_id, player_name, team_abbr,
478
+ games_played, wins, saves, shots_faced,
479
+ ROUND(save_pct, 3) AS save_pct
480
+ FROM (
481
+ SELECT
482
+ season, player_id, player_name, team_abbr,
483
+ COUNT(*) AS games_played,
484
+ SUM(CASE WHEN decision = 'W' THEN 1 ELSE 0 END) AS wins,
485
+ SUM(saves) AS saves,
486
+ SUM(shots_faced) AS shots_faced,
487
+ CASE WHEN SUM(shots_faced) > 0
488
+ THEN SUM(saves) * 1.0 / SUM(shots_faced)
489
+ ELSE NULL END AS save_pct,
490
+ RANK() OVER (PARTITION BY season ORDER BY
491
+ SUM(CASE WHEN decision = 'W' THEN 1 ELSE 0 END) DESC) AS rank
492
+ FROM silver.fact_goalie_stats
493
+ GROUP BY season, player_id, player_name, team_abbr
494
+ ) t
495
+ WHERE rank <= 15;
496
+ ```
497
+
498
+ ### Home/Away Split
499
+
500
+ ```sql
501
+ CREATE OR REPLACE DYNAMIC TABLE gold.team_home_away_split
502
+ REFRESH INTERVAL 1 DAY vcluster DEFAULT
503
+ COMMENT 'Gold team home vs. away performance — home win% vs away win%'
504
+ AS
505
+ SELECT
506
+ g.season, g.team_id,
507
+ t.teamname AS team_name,
508
+ t.abbreviation AS team_abbr,
509
+ COUNT(CASE WHEN g.side = 'home' THEN 1 END) AS home_games,
510
+ COUNT(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 1 END) AS home_wins,
511
+ COUNT(CASE WHEN g.side = 'away' THEN 1 END) AS away_games,
512
+ COUNT(CASE WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 1 END) AS away_wins,
513
+ ROUND(
514
+ COUNT(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 1 END) * 1.0 /
515
+ NULLIF(COUNT(CASE WHEN g.side = 'home' THEN 1 END), 0), 3
516
+ ) AS home_win_pct,
517
+ ROUND(
518
+ COUNT(CASE WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 1 END) * 1.0 /
519
+ NULLIF(COUNT(CASE WHEN g.side = 'away' THEN 1 END), 0), 3
520
+ ) AS away_win_pct
521
+ FROM (
522
+ SELECT season, home_team_id AS team_id, outcome, 'home' AS side
523
+ FROM nhl_game_data.game
524
+ UNION ALL
525
+ SELECT season, away_team_id AS team_id, outcome, 'away' AS side
526
+ FROM nhl_game_data.game
527
+ ) g
528
+ LEFT JOIN silver.dim_team t ON g.team_id = t.team_id
529
+ GROUP BY g.season, g.team_id, t.teamname, t.abbreviation;
530
+ ```
531
+
532
+ ---
533
+
534
+ ## Related Documents
535
+
536
+ Complete data lake acceleration pipeline: Volume mount → Pipe ingestion → Dynamic Table modeling. The following documents cover each stage:
537
+
538
+ - [Volume + Pipe Data Lake Acceleration](lakehouse-volume-pipe-acceleration-guide.md) — File auto-ingestion, the upstream step for this article
539
+ - [Multi-Cloud Unified Data Lake Acceleration](lakehouse-multi-cloud-acceleration.md) — Same SQL runs on Alibaba Cloud/Tencent Cloud/AWS
540
+ - [Dynamic Table Introduction](dynamic-table-introduce.md) — Incremental computation mechanism and scheduling principles
541
+ - [CREATE DYNAMIC TABLE](create-dynamic-table.md) — Complete DDL syntax
542
+ - [Incremental Computing Overview](incremental-computing.md) — DT incremental refresh support matrix
543
+ - [Medallion from Scratch (ZettaPark Approach)](medallion-lakehouse-from-scratch.md) — Python API version covering the same topic