@clickzetta/cz-cli-darwin-arm64 0.3.78 → 0.3.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/bin/cz-cli +0 -0
  2. package/package.json +1 -1
  3. package/bin/skills/clickzetta-access-control/LICENSE +0 -16
  4. package/bin/skills/clickzetta-access-control/SKILL.md +0 -243
  5. package/bin/skills/clickzetta-access-control/eval_cases.jsonl +0 -3
  6. package/bin/skills/clickzetta-access-control/references/dynamic-masking.md +0 -86
  7. package/bin/skills/clickzetta-access-control/references/grant-revoke.md +0 -103
  8. package/bin/skills/clickzetta-access-control/references/role-management.md +0 -66
  9. package/bin/skills/clickzetta-access-control/references/user-management.md +0 -61
  10. package/bin/skills/clickzetta-app-python-sdk/LICENSE +0 -16
  11. package/bin/skills/clickzetta-app-python-sdk/SKILL.md +0 -153
  12. package/bin/skills/clickzetta-app-python-sdk/eval_cases.jsonl +0 -12
  13. package/bin/skills/clickzetta-app-python-sdk/references/bulkload.md +0 -196
  14. package/bin/skills/clickzetta-app-python-sdk/references/connector.md +0 -143
  15. package/bin/skills/clickzetta-app-python-sdk/references/realtime.md +0 -122
  16. package/bin/skills/clickzetta-batch-sync-pipeline/LICENSE +0 -16
  17. package/bin/skills/clickzetta-batch-sync-pipeline/SKILL.md +0 -227
  18. package/bin/skills/clickzetta-batch-sync-pipeline/eval_cases.jsonl +0 -5
  19. package/bin/skills/clickzetta-bi-connect/LICENSE +0 -16
  20. package/bin/skills/clickzetta-bi-connect/SKILL.md +0 -176
  21. package/bin/skills/clickzetta-bi-connect/eval_cases.jsonl +0 -5
  22. package/bin/skills/clickzetta-bi-connect/references/bi-tools.md +0 -170
  23. package/bin/skills/clickzetta-cdc-sync-pipeline/LICENSE +0 -16
  24. package/bin/skills/clickzetta-cdc-sync-pipeline/SKILL.md +0 -633
  25. package/bin/skills/clickzetta-cdc-sync-pipeline/eval_cases.jsonl +0 -5
  26. package/bin/skills/clickzetta-data-ingest-pipeline/LICENSE +0 -16
  27. package/bin/skills/clickzetta-data-ingest-pipeline/SKILL.md +0 -237
  28. package/bin/skills/clickzetta-data-ingest-pipeline/eval_cases.jsonl +0 -5
  29. package/bin/skills/clickzetta-data-retention/LICENSE +0 -16
  30. package/bin/skills/clickzetta-data-retention/SKILL.md +0 -160
  31. package/bin/skills/clickzetta-data-retention/eval_cases.jsonl +0 -5
  32. package/bin/skills/clickzetta-data-retention/references/lifecycle-reference.md +0 -175
  33. package/bin/skills/clickzetta-data-science/LICENSE +0 -16
  34. package/bin/skills/clickzetta-data-science/SKILL.md +0 -125
  35. package/bin/skills/clickzetta-data-science/eval_cases.jsonl +0 -12
  36. package/bin/skills/clickzetta-data-science/references/bitmap-profile.md +0 -146
  37. package/bin/skills/clickzetta-data-science/references/data-patterns.md +0 -110
  38. package/bin/skills/clickzetta-data-science/references/setup.md +0 -160
  39. package/bin/skills/clickzetta-data-science/references/stats-functions.md +0 -195
  40. package/bin/skills/clickzetta-data-science/references/write-and-infer.md +0 -122
  41. package/bin/skills/clickzetta-data-science/references/zettapark-api.md +0 -156
  42. package/bin/skills/clickzetta-data-sharing/LICENSE +0 -16
  43. package/bin/skills/clickzetta-data-sharing/SKILL.md +0 -160
  44. package/bin/skills/clickzetta-data-sharing/eval_cases.jsonl +0 -3
  45. package/bin/skills/clickzetta-data-sharing/references/share-ddl.md +0 -134
  46. package/bin/skills/clickzetta-dba-guide/LICENSE +0 -16
  47. package/bin/skills/clickzetta-dba-guide/SKILL.md +0 -542
  48. package/bin/skills/clickzetta-dba-guide/eval_cases.jsonl +0 -3
  49. package/bin/skills/clickzetta-dw-modeling/LICENSE +0 -16
  50. package/bin/skills/clickzetta-dw-modeling/SKILL.md +0 -351
  51. package/bin/skills/clickzetta-dw-modeling/eval_cases.jsonl +0 -4
  52. package/bin/skills/clickzetta-dw-modeling/references/modeling-patterns.md +0 -100
  53. package/bin/skills/clickzetta-dynamic-table/LICENSE +0 -16
  54. package/bin/skills/clickzetta-dynamic-table/SKILL.md +0 -230
  55. package/bin/skills/clickzetta-dynamic-table/best-practices/dimension-table-join-guide.md +0 -253
  56. package/bin/skills/clickzetta-dynamic-table/best-practices/medallion-and-stream-patterns.md +0 -124
  57. package/bin/skills/clickzetta-dynamic-table/best-practices/non-partitioned-merge-into-warning.md +0 -96
  58. package/bin/skills/clickzetta-dynamic-table/best-practices/performance-optimization.md +0 -109
  59. package/bin/skills/clickzetta-dynamic-table/best-practices/scheduling-guide.md +0 -135
  60. package/bin/skills/clickzetta-dynamic-table/dt-creator/SKILL.md +0 -15
  61. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/dt-declaration-strategy.md +0 -185
  62. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/incremental-config-reference.md +0 -427
  63. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/refresh-history-guide.md +0 -260
  64. package/bin/skills/clickzetta-dynamic-table/dt-creator/references/sql-limitations.md +0 -80
  65. package/bin/skills/clickzetta-dynamic-table/dynamic-table-alter/SKILL.md +0 -190
  66. package/bin/skills/clickzetta-dynamic-table/eval_cases.jsonl +0 -5
  67. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/SKILL.md +0 -27
  68. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-column-validation-rules.md +0 -118
  69. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-conversion-rules.md +0 -225
  70. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-placeholder-rules.md +0 -182
  71. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-refresh-rules.md +0 -98
  72. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-self-reference-rules.md +0 -76
  73. package/bin/skills/clickzetta-dynamic-table/sql-to-dt/references/sql2dt-workflow.md +0 -109
  74. package/bin/skills/clickzetta-external-catalog/LICENSE +0 -16
  75. package/bin/skills/clickzetta-external-catalog/SKILL.md +0 -123
  76. package/bin/skills/clickzetta-external-catalog/eval_cases.jsonl +0 -5
  77. package/bin/skills/clickzetta-external-catalog/references/external-catalog-ddl.md +0 -130
  78. package/bin/skills/clickzetta-external-function/LICENSE +0 -16
  79. package/bin/skills/clickzetta-external-function/SKILL.md +0 -203
  80. package/bin/skills/clickzetta-external-function/eval_cases.jsonl +0 -4
  81. package/bin/skills/clickzetta-external-function/references/external-function-ddl.md +0 -171
  82. package/bin/skills/clickzetta-file-import-pipeline/LICENSE +0 -16
  83. package/bin/skills/clickzetta-file-import-pipeline/SKILL.md +0 -190
  84. package/bin/skills/clickzetta-file-import-pipeline/eval_cases.jsonl +0 -5
  85. package/bin/skills/clickzetta-index-manager/LICENSE +0 -16
  86. package/bin/skills/clickzetta-index-manager/SKILL.md +0 -140
  87. package/bin/skills/clickzetta-index-manager/eval_cases.jsonl +0 -5
  88. package/bin/skills/clickzetta-index-manager/references/bloomfilter-index.md +0 -67
  89. package/bin/skills/clickzetta-index-manager/references/index-management.md +0 -73
  90. package/bin/skills/clickzetta-index-manager/references/inverted-index.md +0 -80
  91. package/bin/skills/clickzetta-index-manager/references/vector-index.md +0 -81
  92. package/bin/skills/clickzetta-java-sdk/LICENSE +0 -16
  93. package/bin/skills/clickzetta-java-sdk/SKILL.md +0 -186
  94. package/bin/skills/clickzetta-java-sdk/eval_cases.jsonl +0 -12
  95. package/bin/skills/clickzetta-java-sdk/references/bulkload.md +0 -163
  96. package/bin/skills/clickzetta-java-sdk/references/realtime.md +0 -212
  97. package/bin/skills/clickzetta-kafka-ingest-pipeline/LICENSE +0 -16
  98. package/bin/skills/clickzetta-kafka-ingest-pipeline/SKILL.md +0 -769
  99. package/bin/skills/clickzetta-kafka-ingest-pipeline/eval_cases.jsonl +0 -5
  100. package/bin/skills/clickzetta-kafka-ingest-pipeline/references/kafka-pipe-syntax.md +0 -324
  101. package/bin/skills/clickzetta-lakehouse-connect/LICENSE +0 -16
  102. package/bin/skills/clickzetta-lakehouse-connect/SKILL.md +0 -218
  103. package/bin/skills/clickzetta-lakehouse-connect/eval_cases.jsonl +0 -3
  104. package/bin/skills/clickzetta-lakehouse-connect/evals/evals.json +0 -35
  105. package/bin/skills/clickzetta-lakehouse-connect/references/config-file.md +0 -435
  106. package/bin/skills/clickzetta-lakehouse-connect/references/jdbc.md +0 -478
  107. package/bin/skills/clickzetta-lakehouse-connect/references/python-sdk.md +0 -225
  108. package/bin/skills/clickzetta-lakehouse-connect/references/sqlalchemy.md +0 -468
  109. package/bin/skills/clickzetta-lakehouse-connect/references/zettapark-session.md +0 -445
  110. package/bin/skills/clickzetta-manage-comments/LICENSE +0 -16
  111. package/bin/skills/clickzetta-manage-comments/SKILL.md +0 -219
  112. package/bin/skills/clickzetta-manage-comments/eval_cases.jsonl +0 -3
  113. package/bin/skills/clickzetta-metadata/LICENSE +0 -16
  114. package/bin/skills/clickzetta-metadata/SKILL.md +0 -502
  115. package/bin/skills/clickzetta-metadata/eval_cases.jsonl +0 -5
  116. package/bin/skills/clickzetta-metadata/references/instance-views-reference.md +0 -276
  117. package/bin/skills/clickzetta-metadata/references/metering-views-reference.md +0 -137
  118. package/bin/skills/clickzetta-metadata/references/show-desc-reference.md +0 -326
  119. package/bin/skills/clickzetta-metadata/references/views-reference.md +0 -271
  120. package/bin/skills/clickzetta-monitoring/LICENSE +0 -16
  121. package/bin/skills/clickzetta-monitoring/SKILL.md +0 -215
  122. package/bin/skills/clickzetta-monitoring/eval_cases.jsonl +0 -5
  123. package/bin/skills/clickzetta-monitoring/references/job-history-analysis.md +0 -97
  124. package/bin/skills/clickzetta-monitoring/references/show-jobs.md +0 -48
  125. package/bin/skills/clickzetta-oss-ingest-pipeline/LICENSE +0 -16
  126. package/bin/skills/clickzetta-oss-ingest-pipeline/SKILL.md +0 -562
  127. package/bin/skills/clickzetta-oss-ingest-pipeline/eval_cases.jsonl +0 -5
  128. package/bin/skills/clickzetta-overview/LICENSE +0 -16
  129. package/bin/skills/clickzetta-overview/SKILL.md +0 -102
  130. package/bin/skills/clickzetta-overview/eval_cases.jsonl +0 -5
  131. package/bin/skills/clickzetta-overview/references/brands-and-endpoints.md +0 -79
  132. package/bin/skills/clickzetta-overview/references/object-model.md +0 -311
  133. package/bin/skills/clickzetta-overview/references/studio-modules.md +0 -173
  134. package/bin/skills/clickzetta-pipeline-review/LICENSE +0 -16
  135. package/bin/skills/clickzetta-pipeline-review/SKILL.md +0 -377
  136. package/bin/skills/clickzetta-query-optimizer/LICENSE +0 -16
  137. package/bin/skills/clickzetta-query-optimizer/SKILL.md +0 -156
  138. package/bin/skills/clickzetta-query-optimizer/eval_cases.jsonl +0 -5
  139. package/bin/skills/clickzetta-query-optimizer/references/explain.md +0 -56
  140. package/bin/skills/clickzetta-query-optimizer/references/hints-and-sortkey.md +0 -78
  141. package/bin/skills/clickzetta-query-optimizer/references/optimize.md +0 -65
  142. package/bin/skills/clickzetta-query-optimizer/references/result-cache.md +0 -49
  143. package/bin/skills/clickzetta-query-optimizer/references/show-jobs.md +0 -42
  144. package/bin/skills/clickzetta-realtime-sync-pipeline/LICENSE +0 -16
  145. package/bin/skills/clickzetta-realtime-sync-pipeline/SKILL.md +0 -323
  146. package/bin/skills/clickzetta-realtime-sync-pipeline/eval_cases.jsonl +0 -5
  147. package/bin/skills/clickzetta-semantic-view/LICENSE +0 -16
  148. package/bin/skills/clickzetta-semantic-view/SKILL.md +0 -207
  149. package/bin/skills/clickzetta-semantic-view/eval_cases.jsonl +0 -12
  150. package/bin/skills/clickzetta-semantic-view/references/semantic-view-reference.md +0 -167
  151. package/bin/skills/clickzetta-spark-flink-connector/LICENSE +0 -16
  152. package/bin/skills/clickzetta-spark-flink-connector/SKILL.md +0 -92
  153. package/bin/skills/clickzetta-spark-flink-connector/eval_cases.jsonl +0 -5
  154. package/bin/skills/clickzetta-spark-flink-connector/references/flink.md +0 -147
  155. package/bin/skills/clickzetta-spark-flink-connector/references/spark.md +0 -132
  156. package/bin/skills/clickzetta-sql-pipeline-manager/LICENSE +0 -16
  157. package/bin/skills/clickzetta-sql-pipeline-manager/SKILL.md +0 -485
  158. package/bin/skills/clickzetta-sql-pipeline-manager/eval_cases.jsonl +0 -12
  159. package/bin/skills/clickzetta-sql-pipeline-manager/evals/evals.json +0 -166
  160. package/bin/skills/clickzetta-sql-pipeline-manager/references/dynamic-table.md +0 -185
  161. package/bin/skills/clickzetta-sql-pipeline-manager/references/materialized-view.md +0 -129
  162. package/bin/skills/clickzetta-sql-pipeline-manager/references/pipe.md +0 -222
  163. package/bin/skills/clickzetta-sql-pipeline-manager/references/table-stream.md +0 -125
  164. package/bin/skills/clickzetta-sql-syntax-guide/LICENSE +0 -16
  165. package/bin/skills/clickzetta-sql-syntax-guide/SKILL.md +0 -249
  166. package/bin/skills/clickzetta-sql-syntax-guide/eval_cases.jsonl +0 -3
  167. package/bin/skills/clickzetta-sql-syntax-guide/references/ddl-reference.md +0 -350
  168. package/bin/skills/clickzetta-sql-syntax-guide/references/dml-reference.md +0 -279
  169. package/bin/skills/clickzetta-sql-syntax-guide/references/dql-reference.md +0 -504
  170. package/bin/skills/clickzetta-sql-syntax-guide/references/functions-reference.md +0 -372
  171. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-databricks.md +0 -260
  172. package/bin/skills/clickzetta-sql-syntax-guide/references/migration-snowflake.md +0 -382
  173. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-snowflake.md +0 -346
  174. package/bin/skills/clickzetta-sql-syntax-guide/references/vs-spark.md +0 -229
  175. package/bin/skills/clickzetta-studio-task-manager/LICENSE +0 -16
  176. package/bin/skills/clickzetta-studio-task-manager/SKILL.md +0 -652
  177. package/bin/skills/clickzetta-table-lineage/LICENSE +0 -16
  178. package/bin/skills/clickzetta-table-lineage/SKILL.md +0 -90
  179. package/bin/skills/clickzetta-table-lineage/eval_cases.jsonl +0 -1
  180. package/bin/skills/clickzetta-table-lineage/references/normalize_func.sql +0 -14
  181. package/bin/skills/clickzetta-table-lineage/references/table_cost.sql +0 -38
  182. package/bin/skills/clickzetta-table-lineage/references/table_lineage_standalone.html +0 -562
  183. package/bin/skills/clickzetta-table-lineage/references/table_relation.sql +0 -25
  184. package/bin/skills/clickzetta-table-stream-pipeline/LICENSE +0 -16
  185. package/bin/skills/clickzetta-table-stream-pipeline/SKILL.md +0 -206
  186. package/bin/skills/clickzetta-table-stream-pipeline/eval_cases.jsonl +0 -5
  187. package/bin/skills/clickzetta-vcluster-manager/LICENSE +0 -16
  188. package/bin/skills/clickzetta-vcluster-manager/SKILL.md +0 -212
  189. package/bin/skills/clickzetta-vcluster-manager/eval_cases.jsonl +0 -5
  190. package/bin/skills/clickzetta-vcluster-manager/references/vc-cache.md +0 -54
  191. package/bin/skills/clickzetta-vcluster-manager/references/vcluster-ddl.md +0 -150
  192. package/bin/skills/clickzetta-volume-manager/LICENSE +0 -16
  193. package/bin/skills/clickzetta-volume-manager/SKILL.md +0 -292
  194. package/bin/skills/clickzetta-volume-manager/eval_cases.jsonl +0 -5
  195. package/bin/skills/clickzetta-volume-manager/references/volume-ddl.md +0 -199
  196. package/bin/skills/clickzetta-zettapark/LICENSE +0 -16
  197. package/bin/skills/clickzetta-zettapark/SKILL.md +0 -248
  198. package/bin/skills/clickzetta-zettapark/eval_cases.jsonl +0 -12
  199. package/bin/skills/clickzetta-zettapark/references/zettapark-api.md +0 -283
  200. package/bin/skills/cz-cli/SKILL.md +0 -311
  201. package/bin/skills/cz-cli/references/profile-setup.md +0 -120
@@ -1,132 +0,0 @@
1
- # Spark Connector 详细参考
2
-
3
- ## Maven 依赖
4
-
5
- ```xml
6
- <dependencies>
7
- <dependency>
8
- <groupId>org.apache.spark</groupId>
9
- <artifactId>spark-sql_2.12</artifactId>
10
- <version>3.4.0</version>
11
- <scope>provided</scope>
12
- </dependency>
13
- <dependency>
14
- <groupId>com.clickzetta</groupId>
15
- <artifactId>spark-clickzetta</artifactId>
16
- <version>1.0.0</version>
17
- </dependency>
18
- </dependencies>
19
- ```
20
-
21
- > ⚠️ `spark-clickzetta` JAR 需从 ClickZetta 官方下载,不在 Maven Central。联系 ClickZetta 支持获取。
22
-
23
- ## 连接参数
24
-
25
- | 参数 | 必填 | 说明 |
26
- |---|---|---|
27
- | `endpoint` | ✅ | 如 `your_instance.cn-shanghai-alicloud.api.clickzetta.com` |
28
- | `username` | ✅ | 用户名 |
29
- | `password` | ✅ | 密码 |
30
- | `workspace` | ✅ | 工作空间 |
31
- | `virtualCluster` | ✅ | 虚拟集群,默认 `default_ap` |
32
- | `schema` | ✅ | Schema 名称 |
33
- | `table` | ✅ | 目标表名 |
34
-
35
- ## 完整 Scala 示例
36
-
37
- ```scala
38
- import org.apache.spark.sql.SparkSession
39
-
40
- object SparkToLakehouse {
41
- def main(args: Array[String]): Unit = {
42
- val spark = SparkSession.builder()
43
- .appName("SparkToLakehouse")
44
- .getOrCreate()
45
-
46
- val endpoint = sys.env("CZ_ENDPOINT")
47
- val username = sys.env("CZ_USERNAME")
48
- val password = sys.env("CZ_PASSWORD")
49
- val workspace = sys.env("CZ_WORKSPACE")
50
-
51
- // 读取
52
- val df = spark.read.format("clickzetta")
53
- .option("endpoint", endpoint)
54
- .option("username", username)
55
- .option("password", password)
56
- .option("workspace", workspace)
57
- .option("virtualCluster", "default_ap")
58
- .option("schema", "silver")
59
- .option("table", "orders_cleaned")
60
- .load()
61
-
62
- // 转换
63
- import org.apache.spark.sql.functions._
64
- val result = df
65
- .filter(col("amount") > 0)
66
- .groupBy("region")
67
- .agg(sum("amount").as("total_revenue"), count("*").as("order_count"))
68
-
69
- // 写入(必须写全部字段,不支持主键表)
70
- result.write.format("clickzetta")
71
- .option("endpoint", endpoint)
72
- .option("username", username)
73
- .option("password", password)
74
- .option("workspace", workspace)
75
- .option("virtualCluster", "default_ap")
76
- .option("schema", "gold")
77
- .option("table", "region_summary")
78
- .mode("append")
79
- .save()
80
-
81
- spark.stop()
82
- }
83
- }
84
- ```
85
-
86
- ## Python(PySpark)示例
87
-
88
- ```python
89
- from pyspark.sql import SparkSession
90
- import os
91
-
92
- spark = SparkSession.builder.appName("PySparkToLakehouse").getOrCreate()
93
-
94
- options = {
95
- "endpoint": os.environ["CZ_ENDPOINT"],
96
- "username": os.environ["CZ_USERNAME"],
97
- "password": os.environ["CZ_PASSWORD"],
98
- "workspace": os.environ["CZ_WORKSPACE"],
99
- "virtualCluster": "default_ap",
100
- "schema": "public",
101
- "table": "orders",
102
- }
103
-
104
- # 读取
105
- df = spark.read.format("clickzetta").options(**options).load()
106
- df.show(5)
107
-
108
- # 写入
109
- df.write.format("clickzetta").options(**options).mode("append").save()
110
- ```
111
-
112
- ## 类型映射
113
-
114
- | Spark 类型 | Lakehouse 类型 |
115
- |---|---|
116
- | BooleanType | BOOLEAN |
117
- | IntegerType | INT32 |
118
- | LongType | INT64 |
119
- | FloatType | FLOAT32 |
120
- | DoubleType | FLOAT64 |
121
- | StringType | STRING |
122
- | TimestampType | TIMESTAMP |
123
- | DateType | DATE |
124
- | ArrayType | ARRAY |
125
- | MapType | MAP |
126
- | StructType | STRUCT |
127
-
128
- ## 限制
129
-
130
- - **不支持主键表写入**:目标表不能有主键,否则报错
131
- - **必须写全部字段**:DataFrame schema 必须与目标表完全匹配,不支持部分字段写入
132
- - **仅支持 append 模式**:不支持 overwrite(会报错)
@@ -1,16 +0,0 @@
1
- ClickZetta Skills License
2
- © 2026 Yunqi Inc. All rights reserved.
3
- LICENSE: Use of these materials (including all code, prompts, assets, files, and other components of these skills (collectively, "Skills")) is governed by your agreement with ClickZetta for the Service. If no separate agreement exists, use is governed by ClickZetta's Terms of Service (available at: https://yunqi.tech/documents/user-aggrement).
4
- Your applicable agreement is referred to as the "Agreement." "Service" is as defined in the Agreement.
5
- ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the contrary, you may not:
6
-
7
- Extract from the Service or retain copies of the Skills outside use with the Service;
8
- Reproduce or copy the Skills, except for temporary copies created automatically during authorized use of the Service;
9
- Create derivative works based on the Skills;
10
- Distribute, sublicense, or transfer the Skills to any third party;
11
- Make, offer to sell, sell, or import any inventions embodied in the Skills; nor,
12
- Reverse engineer, decompile, or disassemble the Skills.
13
-
14
- The receipt, viewing, or possession of the Skills does not convey or imply any license or right beyond those expressly granted above.
15
- Yunqi retains all rights, title, and interest in the Skills, including all copyrights, trademarks, patents, and all other applicable intellectual property rights.
16
- THE SKILLS ARE PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SKILLS OR THE USE OR OTHER DEALINGS IN THE SKILLS.
@@ -1,485 +0,0 @@
1
- ---
2
- name: clickzetta-sql-pipeline-manager
3
- description: >
4
- 管理 ClickZetta Lakehouse 的 SQL 数据管道对象,包括动态表(Dynamic Table)、
5
- 物化视图(Materialized View)、表流(Table Stream)和 Pipe。
6
- 覆盖创建、修改、暂停/恢复、删除、查看状态等完整生命周期操作。
7
- 仅涉及 SQL 命令操作,不涉及 Lakehouse Studio 图形化界面。
8
-
9
- 当用户说"创建动态表"、"创建物化视图"、"创建 Pipe"、"创建表流"、
10
- "暂停/恢复动态表"、"查看刷新历史"、"修改刷新频率"、"接入 Kafka"、
11
- "从对象存储持续导入"、"CDC 变更捕获"、"增量计算"、"实时 ETL"、
12
- "数据管道"、"pipeline"、"流式处理"、"动态表刷新失败"、
13
- "帮我设计 ETL"、"构建数据管道"、"数据接入方案"、
14
- "Medallion Architecture"、"Bronze Silver Gold"、"奖章架构"、
15
- "湖仓分层"、"Bronze 层"、"Silver 层"、"Gold 层"时触发。
16
- Keywords: SQL pipeline, dynamic table, materialized view, table stream, Pipe, data pipeline
17
- ---
18
-
19
- # ClickZetta SQL 数据管道管理
20
-
21
- ## ⚠️ ClickZetta 与标准 SQL / Snowflake 的关键语法差异
22
-
23
- 这些是最容易写错的地方,必须使用 ClickZetta 特有语法:
24
-
25
- | 功能 | ❌ 错误写法(Snowflake/标准SQL) | ✅ ClickZetta 正确写法 |
26
- |---|---|---|
27
- | 动态表计算集群 | `WAREHOUSE = compute_wh` | `vcluster default`(直接跟名称,不带等号) |
28
- | 动态表刷新调度 | `TARGET_LAG = '1 minutes'` | `REFRESH INTERVAL 1 MINUTE vcluster default` |
29
- | Kafka 读取函数 | `TABLE(READ_KAFKA(KAFKA_BROKER => ...))` | `read_kafka('broker', 'topic', '', 'group', '', '', '', '', 'raw', 'raw', 0, MAP(...))` — 位置参数 |
30
- | 物化视图定时刷新 | `REFRESH EVERY 1 HOUR` | `REFRESH INTERVAL 60 MINUTE vcluster default`(与动态表语法相同) |
31
- | 物化视图手动刷新 | `REFRESH MATERIALIZED VIEW` 放在 CREATE 里 | 单独执行 `REFRESH MATERIALIZED VIEW <name>;` |
32
- | 修改动态表 SQL | `ALTER DYNAMIC TABLE ... AS ...` | `CREATE OR REPLACE DYNAMIC TABLE ...`(ALTER 不支持修改 AS 子句) |
33
- | JSON 字段访问 | `$1:field::TYPE` 或 `data:key` | `parse_json(value::string)['field']::TYPE` 或 `data['key']` |
34
- | COPY INTO 导入格式 | `FILE_FORMAT = (TYPE = CSV)` | `USING CSV OPTIONS(...)` |
35
- | COPY INTO 导出格式 | `USING CSV` | `FILE_FORMAT = (TYPE = CSV)` |
36
-
37
- ---
38
-
39
- ## 向导:明确操作意图
40
-
41
- 收到请求后,先判断用户意图,选择对应工作流:
42
-
43
- > 你想做什么?
44
- >
45
- > **A. 设计并创建新的数据管道**(从数据源到各层 DT 的完整 SQL)→ 进入 Pipeline Wizard
46
- > **B. 管理已有管道对象**(修改 DT 刷新间隔、暂停/恢复、查看刷新历史)→ 直接执行对应操作
47
- > **C. 排查管道问题**(DT 刷新失败、Pipe 停止摄入、Stream 积压)→ 进入故障排查流程
48
-
49
- **如果用户已经明确说了要做什么(如"帮我创建一个 Kafka 到 DWD 的管道"、"暂停这个动态表"),直接执行,不再询问。**
50
-
51
- ---
52
-
53
- ## Pipeline Wizard(管道设计向导)
54
-
55
- 当用户想设计或构建一个完整的数据管道时,这是最高优先级的模式。触发词包括:
56
- "帮我设计/构建 ETL"、"完整的数据管道"、"从 Kafka/OSS 接入数据"、"ODS→DWD→DWS"、"端到端 pipeline"、
57
- "Medallion Architecture"、"Bronze/Silver/Gold"、"奖章架构"、"湖仓分层"。
58
-
59
- ### 层次命名约定
60
-
61
- 用户可能使用不同的分层命名,含义相同,按用户偏好保留原始命名:
62
-
63
- | 用户说的 | 含义 | Schema 命名建议 |
64
- |---|---|---|
65
- | Bronze / Silver / Gold | Medallion Architecture | `bronze` / `silver` / `gold` |
66
- | ODS / DWD / DWS | 国内数仓分层惯例 | `ods` / `dwd` / `dws` |
67
- | Raw / Cleansed / Aggregated | 通用英文描述 | `raw` / `cleansed` / `agg` |
68
-
69
- **不要把 Bronze 映射成 ODS、Silver 映射成 DWD 等——保留用户选择的命名,在 SQL 中直接使用对应的 schema 和表名前缀。**
70
-
71
- **Schema 命名必须加业务/项目前缀,避免与其他项目冲突。** 如果用户未提供前缀,询问项目名称或业务域名称,然后生成带前缀的 Schema 名:
72
-
73
- ```sql
74
- -- ❌ 容易重名,不要这样生成
75
- CREATE SCHEMA IF NOT EXISTS bronze;
76
-
77
- -- ✅ 加项目前缀
78
- CREATE SCHEMA IF NOT EXISTS ecommerce_bronze;
79
- CREATE SCHEMA IF NOT EXISTS ecommerce_silver;
80
- CREATE SCHEMA IF NOT EXISTS ecommerce_gold;
81
- ```
82
-
83
- ### 需求收集
84
-
85
- **如果用户已经提供了足够信息(数据来源、字段、层次需求、项目前缀),直接生成完整 SQL,不要再问。**
86
-
87
- 如果信息不完整,优先使用交互式问答工具(如 `question`)收集以下信息并弹出选项菜单;若无此类工具,则用文字一次性列出所有问题:
88
-
89
- ```
90
- question({
91
- questions: [
92
- {
93
- question: "数据来源?",
94
- options: [
95
- { label: "Kafka", description: "提供 broker 地址和 topic 名称" },
96
- { label: "对象存储(OSS/S3/COS)", description: "提供 Volume 路径和文件格式" },
97
- { label: "已有 Lakehouse 表(仅 INSERT)", description: "Dynamic Table 直接读源表" },
98
- { label: "已有 Lakehouse 表(含 UPDATE/DELETE)", description: "需要 Table Stream + Dynamic Table" }
99
- ]
100
- },
101
- {
102
- question: "刷新频率?",
103
- options: [
104
- { label: "实时(秒级)", description: "REFRESH INTERVAL 10~60 SECOND" },
105
- { label: "近实时(分钟级)", description: "REFRESH INTERVAL 1~10 MINUTE" },
106
- { label: "低频(小时/天)", description: "REFRESH INTERVAL 1 HOUR 或 1 DAY" }
107
- ]
108
- }
109
- ]
110
- })
111
- ```
112
-
113
- 还需确认:项目/业务前缀(Schema 命名用)、层次需求(几层、每层做什么)、目标表字段结构。这些可在用户回答后追问,或从上下文推断。
114
-
115
- ### 生成完整 SQL
116
-
117
- 收到回答后,生成完整的端到端 SQL,包含以下所有部分:
118
-
119
- ```
120
- 1. Schema 创建(CREATE SCHEMA IF NOT EXISTS,使用用户指定的层次名称)
121
- 2. 入口层建表(如果是外部摄入)
122
- 3. 数据入口(Pipe 或 Table Stream,根据来源选择)
123
- 4. 中间层动态表(清洗/过滤,REFRESH interval N MINUTE VCLUSTER name)
124
- 5. 服务层动态表(聚合/维度,REFRESH interval N MINUTE VCLUSTER name)
125
- 6. 各动态表创建后立即执行 REFRESH DYNAMIC TABLE(重置刷新基准)
126
- 7. 验证命令(SHOW + REFRESH HISTORY)
127
- 8. 运维操作(SUSPEND/RESUME)
128
- ```
129
-
130
- **SQL 生成后,将各段代码保存为 Studio 任务(代码资产化):**
131
-
132
- 数据管道开发场景下,所有 SQL 都应保存为 Studio 任务,作为可管理的代码资产:
133
-
134
- ```bash
135
- # 建表 DDL → 保存为 DRAFT 任务(不配 Cron)
136
- cz-cli task save-content <ddl_task_name> --content "<ddl_sql>"
137
-
138
- # ETL/转换 SQL → 保存为调度任务(配 Cron + 依赖)
139
- cz-cli task save-content <etl_task_name> --content "<etl_sql>"
140
- cz-cli task save-cron <etl_task_name> --cron '0 30 2 * * ? *'
141
- cz-cli task deploy <etl_task_name>
142
- ```
143
-
144
- > Dynamic Table DDL 也应保存为 DRAFT 任务(`03_ddl_dws_ads`),方便后续查阅和多环境迁移。
145
-
146
- **⚠️ DDL 任务 vs 数据流转任务的调度规则(硬性约束,不得违反):**
147
-
148
- | 任务类型 | 判断标准 | 调度配置 | Studio 状态 |
149
- |---|---|---|---|
150
- | DDL 任务 | 包含 `CREATE / DROP / ALTER TABLE/SCHEMA` | **禁止配置 Cron,禁止配置依赖** | DRAFT |
151
- | 数据流转任务 | 数据同步、ETL 转换、数据质量检查 | 配置 Cron + 上下游依赖 | PUBLISHED |
152
- | Dynamic Table | DWS/ADS 聚合层 | **不建 Studio 任务**,系统自动刷新 | — |
153
-
154
- > AI 生成 SQL 管道时,如果涉及 Studio 任务编排,必须遵守以上规则。不得为 DDL 语句生成 Cron 调度配置。
155
-
156
- **来源 → 入口对象的选择规则:**
157
- - Kafka → `CREATE PIPE ... AS COPY INTO ... FROM (SELECT ... FROM read_kafka('broker', 'topic', '', 'group', '', '', '', '', 'raw', 'raw', 0, MAP(...)))`
158
- - 对象存储(OSS/S3/COS)→ `CREATE PIPE ... VIRTUAL_CLUSTER = 'name' INGEST_MODE = 'LIST_PURGE' AS COPY INTO ... FROM VOLUME <volume_name> USING <format> PURGE=true`
159
- - 已有表 + 有 UPDATE/DELETE → `CREATE TABLE STREAM ... WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD')`,中间层过滤 `__change_type IN ('INSERT', 'UPDATE_AFTER', 'DELETE')`
160
- - 已有表 + 仅 INSERT → Dynamic Table 直接 `FROM` 源表
161
-
162
- **刷新频率规则:**
163
- - 第一个转换层(Bronze→Silver 或 ODS→DWD)设置用户指定的刷新频率(如 `REFRESH INTERVAL 1 MINUTE vcluster default`)
164
- - 下游层根据业务需求设置各自的刷新频率(如 `REFRESH INTERVAL 5 MINUTE vcluster default`)
165
-
166
- ---
167
-
168
- ## 对象类型速查
169
-
170
- | 对象 | 适用场景 | 核心特点 |
171
- |---|---|---|
172
- | **Dynamic Table** | 实时/近实时增量 ETL | SQL 定义,自动增量刷新,秒/分钟级延迟 |
173
- | **Materialized View** | 固定聚合加速查询 | 预计算存储,手动或定时全量刷新 |
174
- | **Table Stream** | CDC 变更数据捕获 | 捕获 INSERT/UPDATE/DELETE,配合 Dynamic Table 消费 |
175
- | **Pipe** | 持续数据摄入 | 从 Kafka 或对象存储自动持续导入,无需调度 |
176
-
177
- ## 决策树
178
-
179
- ```
180
- 用户需求
181
- ├── 持续从外部摄入数据(Kafka / OSS / S3)
182
- │ └── → Pipe
183
- ├── 对已有表做实时/增量转换
184
- │ ├── 需要感知 UPDATE/DELETE → Table Stream + Dynamic Table
185
- │ └── 只需 INSERT 追加 → Dynamic Table(直接查源表)
186
- ├── 固定聚合,不要求实时
187
- │ └── → Materialized View
188
- └── 多层 ETL(ODS→DWD→DWS 或 Bronze→Silver→Gold)
189
- └── → 多个 Dynamic Table 级联(各层设置独立 REFRESH interval)
190
- ```
191
-
192
- ## 步骤 0:确认连接
193
-
194
- 操作前先确认已连接到 ClickZetta Lakehouse。参考 `clickzetta-lakehouse-connect` skill 获取连接参数。
195
-
196
- ## 步骤 1:选择对象类型
197
-
198
- 根据决策树选择对象类型,阅读对应参考文件:
199
-
200
- | 对象 | 参考文件 |
201
- |---|---|
202
- | Dynamic Table | [references/dynamic-table.md](references/dynamic-table.md) |
203
- | Materialized View | [references/materialized-view.md](references/materialized-view.md) |
204
- | Table Stream | [references/table-stream.md](references/table-stream.md) |
205
- | Pipe | [references/pipe.md](references/pipe.md) |
206
-
207
- ## 步骤 2:生成并执行 SQL
208
-
209
- 阅读对应参考文件后,根据用户提供的参数生成完整可运行 SQL。
210
-
211
- **必填参数检查:**
212
- - Dynamic Table:`REFRESH INTERVAL N MINUTE vcluster name`、AS 查询
213
- - Table Stream:源表名、MODE(STANDARD 或 APPEND_ONLY)
214
- - Pipe(Kafka):bootstrap_servers、topic、group_id、目标表(位置参数语法)
215
- - Pipe(对象存储):Volume 路径、文件格式、目标表、`PURGE=true`(LIST_PURGE 模式)
216
-
217
- 若用户未提供 VCLUSTER,默认使用 `default`(GP 型集群)。
218
-
219
- ## 步骤 3:验证
220
-
221
- ```sql
222
- -- 验证动态表
223
- SHOW TABLES WHERE is_dynamic = true;
224
- SHOW DYNAMIC TABLE REFRESH HISTORY <name> LIMIT 5;
225
-
226
- -- 验证物化视图
227
- SHOW TABLES WHERE is_materialized_view = true;
228
-
229
- -- 验证 Table Stream
230
- SHOW TABLE STREAMS;
231
- SELECT COUNT(*) FROM <stream_name>; -- 查看待消费变更数
232
-
233
- -- 验证 Pipe
234
- SHOW PIPES;
235
- ```
236
-
237
- ---
238
-
239
- ## 典型场景示例
240
-
241
- ### 场景 A:Kafka → 动态表(实时 ETL)
242
-
243
- ```sql
244
- -- Step 1: 创建 Pipe 持续摄入 Kafka 数据到 ODS 层
245
- -- ⚠️ 注意:ClickZetta 不支持 CREATE OR REPLACE PIPE,需用 CREATE PIPE 或先 DROP 再 CREATE
246
- CREATE PIPE kafka_orders_pipe
247
- VIRTUAL_CLUSTER = 'default'
248
- BATCH_INTERVAL_IN_SECONDS = '60'
249
- AS
250
- COPY INTO ods.orders FROM (
251
- SELECT
252
- j['order_id']::STRING,
253
- j['user_id']::STRING,
254
- j['amount']::DECIMAL(10,2),
255
- j['status']::STRING,
256
- j['created_at']::TIMESTAMP
257
- FROM (
258
- SELECT parse_json(value::string) AS j
259
- FROM read_kafka(
260
- 'kafka.example.com:9092', -- bootstrap_servers
261
- 'orders', -- topic
262
- '', -- reserved
263
- 'lakehouse_ingest', -- group_id
264
- '', '', '', '', -- 位置参数留空,由 Pipe 管理
265
- 'raw', 'raw', 0,
266
- MAP('kafka.security.protocol', 'PLAINTEXT')
267
- )
268
- )
269
- );
270
-
271
- -- Step 2: 动态表做 DWD 层清洗(每分钟增量刷新)
272
- CREATE OR REPLACE DYNAMIC TABLE dwd.orders_clean
273
- REFRESH INTERVAL 1 MINUTE vcluster default
274
- AS
275
- SELECT
276
- order_id,
277
- user_id,
278
- amount,
279
- UPPER(status) AS status,
280
- created_at,
281
- DATE(created_at) AS dt
282
- FROM ods.orders
283
- WHERE amount > 0;
284
-
285
- -- Step 3: 动态表做 DWS 层聚合(每 5 分钟刷新)
286
- CREATE OR REPLACE DYNAMIC TABLE dws.order_hourly
287
- REFRESH INTERVAL 5 MINUTE vcluster default
288
- AS
289
- SELECT
290
- DATE_TRUNC('hour', created_at) AS hour,
291
- status,
292
- COUNT(*) AS order_cnt,
293
- SUM(amount) AS total_amount
294
- FROM dwd.orders_clean
295
- GROUP BY 1, 2;
296
- ```
297
-
298
- ### 场景 B:Table Stream + Dynamic Table(CDC UPSERT)
299
-
300
- ```sql
301
- -- Step 1: 在源表上创建 Stream 捕获变更
302
- CREATE TABLE STREAM ods.orders_stream
303
- ON TABLE ods.orders
304
- WITH PROPERTIES ('TABLE_STREAM_MODE' = 'STANDARD');
305
-
306
- -- Step 2: 动态表消费 Stream,过滤出最新状态
307
- CREATE OR REPLACE DYNAMIC TABLE dwd.orders_latest
308
- REFRESH INTERVAL 2 MINUTE vcluster default
309
- AS
310
- SELECT order_id, user_id, amount, status, created_at
311
- FROM ods.orders_stream
312
- WHERE __change_type IN ('INSERT', 'UPDATE_AFTER');
313
- ```
314
-
315
- ### 场景 C:物化视图加速 BI 查询
316
-
317
- ```sql
318
- -- 创建每小时刷新的物化视图
319
- -- ⚠️ 注意:ClickZetta 不支持 CREATE OR REPLACE MATERIALIZED VIEW
320
- -- 方法 1: 先 DROP 再 CREATE(推荐)
321
- DROP MATERIALIZED VIEW IF EXISTS dws.mv_daily_revenue;
322
- CREATE MATERIALIZED VIEW dws.mv_daily_revenue
323
- COMMENT '每日收入汇总,供 BI 工具查询'
324
- REFRESH INTERVAL 60 MINUTE vcluster default
325
- AS
326
- SELECT
327
- DATE(created_at) AS day,
328
- region,
329
- SUM(amount) AS revenue,
330
- COUNT(DISTINCT user_id) AS uv
331
- FROM dwd.orders_clean
332
- GROUP BY 1, 2;
333
-
334
- -- 方法 2: 使用 BUILD DEFERRED + DISABLE QUERY REWRITE(复杂,不推荐)
335
- -- CREATE OR REPLACE MATERIALIZED VIEW ... BUILD DEFERRED DISABLE QUERY REWRITE AS ...
336
-
337
- -- 手动触发刷新
338
- REFRESH MATERIALIZED VIEW dws.mv_daily_revenue;
339
-
340
- -- 删除物化视图(⚠️ 注意:必须用 DROP MATERIALIZED VIEW,不能用 DROP TABLE)
341
- DROP MATERIALIZED VIEW dws.mv_daily_revenue;
342
- ```
343
-
344
- ### 场景 D:运维操作
345
-
346
- ```sql
347
- -- 暂停动态表(如集群维护)
348
- ALTER DYNAMIC TABLE dwd.orders_clean SUSPEND;
349
-
350
- -- 恢复
351
- ALTER DYNAMIC TABLE dwd.orders_clean RESUME;
352
-
353
- -- 查看刷新历史排查失败
354
- SHOW DYNAMIC TABLE REFRESH HISTORY dwd.orders_clean LIMIT 10;
355
-
356
- -- 暂停 Pipe
357
- ALTER PIPE kafka_orders_pipe SET PIPE_EXECUTION_PAUSED = true;
358
-
359
- -- 恢复 Pipe
360
- ALTER PIPE kafka_orders_pipe SET PIPE_EXECUTION_PAUSED = false;
361
- ```
362
-
363
- ### 场景 E:参数化动态表(按分区刷新)
364
-
365
- 通过 `SESSION_CONFIGS()` 函数定义参数化查询,在刷新时传入分区值控制全量或增量刷新范围:
366
-
367
- ```sql
368
- -- 创建参数化动态表(使用 SESSION_CONFIGS 定义参数)
369
- CREATE OR REPLACE DYNAMIC TABLE dwd.orders_partitioned
370
- REFRESH INTERVAL 30 MINUTE vcluster default
371
- AS
372
- SELECT order_id, user_id, amount, status, created_at, DATE(created_at) AS dt
373
- FROM ods.orders
374
- WHERE dt = SESSION_CONFIGS('target_date', CAST(CURRENT_DATE() AS STRING));
375
-
376
- -- 手动触发刷新并传入参数
377
- REFRESH DYNAMIC TABLE dwd.orders_partitioned
378
- WITH PROPERTIES ('target_date' = '2024-06-15');
379
- ```
380
-
381
- > **适用场景**:传统按天/按小时全量 ETL 任务改造为增量任务时,用 SESSION_CONFIGS 替换调度变量(如 `${bizdate}`),实现参数化分区刷新。
382
-
383
- ### 场景 F:动态表 DML 操作(手动修正数据)
384
-
385
- ⚠️ **重要**:ClickZetta 动态表**不支持 DML 操作**(INSERT/UPDATE/DELETE)。如需修正数据,有以下方案:
386
-
387
- **方案 1:重建动态表(推荐)**
388
- ```sql
389
- -- 1. 在源表中修正数据
390
- -- 2. 等待动态表自动刷新(下一次 REFRESH INTERVAL 会全量刷新)
391
- ```
392
-
393
- **方案 2:使用普通表替代动态表**
394
- ```sql
395
- -- 对于需要频繁手动修正的场景,建议使用普通表 + 定时调度任务
396
- -- 而不是动态表
397
- CREATE TABLE dwd.orders_manual (
398
- order_id STRING,
399
- user_id STRING,
400
- amount DECIMAL(10,2),
401
- status STRING,
402
- created_at TIMESTAMP,
403
- dt DATE
404
- );
405
- ```
406
-
407
- > ⚠️ **动态表限制**:
408
- > - 动态表是只读的,不支持 INSERT/UPDATE/DELETE
409
- > - 数据修正应在源表进行,动态表会自动刷新
410
- > - 如需手动控制数据,使用普通表 + Studio 调度任务
411
-
412
- ---
413
-
414
- ## 常见错误
415
-
416
- | 错误 | 原因 | 解决方案 |
417
- |---|---|---|
418
- | `VCluster not available` | 计算集群未启动或名称错误 | 确认 VCLUSTER 名称,检查集群状态 |
419
- | 动态表刷新失败 | SQL 查询报错或源表结构变更 | `SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'xxx'` 查看错误详情 |
420
- | Stream 数据为空 | 已被消费或超出保留周期 | 检查源表 `data_retention_days`,确认是否已消费 |
421
- | Pipe 停止摄入 | Kafka offset 问题或连接断开 | `DESC PIPE EXTENDED` 查看状态,检查 Kafka 连接 |
422
- | `Cannot ALTER AS clause` | 尝试用 ALTER 修改动态表 SQL | 改用 `CREATE OR REPLACE DYNAMIC TABLE` |
423
- | `CREATE OR REPLACE PIPE` 语法报错 | ClickZetta 不支持该语法 | 用 `CREATE PIPE` 或先 `DROP PIPE` 再 `CREATE` |
424
- | `CREATE OR REPLACE MATERIALIZED VIEW` 语法报错 | 仅支持 `REWRITE DISABLED + BUILD DEFER` 模式 | 推荐用 `DROP MATERIALIZED VIEW` + `CREATE MATERIALIZED VIEW` |
425
- | `DROP TABLE` 删除物化视图报错 | 对象类型不匹配 | 用 `DROP MATERIALIZED VIEW`(不是 `DROP TABLE`) |
426
- | 动态表 DML 报错 `not allowed` | 动态表不支持 DML | 在源表修正数据,或使用普通表 + 调度任务 |
427
- | `SET cz.sql.dt.allow.dml` 报错 | 不支持 session statement | 动态表不支持 DML 操作,改用其他方案 |
428
-
429
- ---
430
-
431
- ## 交付验收 Checklist
432
-
433
- 管道创建完成后,**必须逐项验证**,不得跳过:
434
-
435
- ```sql
436
- -- 1. 行数比对:各层行数与预期一致
437
- SELECT COUNT(*) FROM ods.<table>; -- ODS 行数 ≈ 源端
438
- SELECT COUNT(*) FROM dwd.<table>; -- DWD 行数 ≤ ODS(清洗后)
439
- SELECT COUNT(*) FROM dws.<table>; -- DWS 行数符合聚合逻辑
440
-
441
- -- 2. Dynamic Table 刷新状态
442
- SHOW DYNAMIC TABLE REFRESH HISTORY <schema>.<table> LIMIT 5;
443
- -- 确认最近一次 status = SUCCESS,refresh_mode = INCREMENTAL 或 FULL
444
-
445
- -- 3. 关键字段非空率
446
- SELECT
447
- COUNT(*) AS total,
448
- COUNT(key_field) AS non_null,
449
- ROUND(COUNT(key_field) * 100.0 / COUNT(*), 2) AS non_null_pct
450
- FROM <schema>.<table>;
451
- -- 核心业务字段非空率应 > 99%
452
-
453
- -- 4. 主键唯一性(DWD 层事实表)
454
- SELECT key_col, COUNT(*) AS cnt
455
- FROM dwd.<table>
456
- GROUP BY key_col
457
- HAVING cnt > 1
458
- LIMIT 10;
459
- -- 结果为空 = 无重复,符合预期
460
-
461
- -- 5. Pipe 摄入状态(如有)
462
- SHOW PIPES;
463
- -- status = RUNNING,last_ingested_timestamp 持续更新
464
- ```
465
-
466
- **验收标准:**
467
- - [ ] 各层行数与预期一致
468
- - [ ] Dynamic Table 最近刷新状态为 SUCCESS
469
- - [ ] 关键字段非空率 > 99%
470
- - [ ] DWD 层主键无重复
471
- - [ ] Pipe 状态 RUNNING(如有)
472
- - [ ] 所有 DDL 任务为 DRAFT 状态(如涉及 Studio 任务)
473
- - [ ] DWS/ADS 层无冗余 Studio 调度任务
474
-
475
- ---
476
-
477
- ## 参考文档
478
-
479
- - [增量计算概述](https://www.yunqi.tech/documents/streaming_data_pipeline_overview)
480
- - [Dynamic Table](https://www.yunqi.tech/documents/dynamic-table)
481
- - [Table Stream 变化数据捕获](https://www.yunqi.tech/documents/table_stream)
482
- - [物化视图](https://www.yunqi.tech/documents/materialized_ddl)
483
- - [Pipe 简介](https://www.yunqi.tech/documents/pipe-summary)
484
- - [使用 Dynamic Table 开展实时 ETL](https://www.yunqi.tech/documents/tutorials-streaming-data-pipeline-with_dynamic-table)
485
- - [LLM 全量文档索引](https://yunqi.tech/llms-full.txt)
@@ -1,12 +0,0 @@
1
- {"case_id":"001","type":"should_call","user_input":"帮我创建一个动态表,每 5 分钟从 raw_events 聚合数据","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["CREATE DYNAMIC TABLE","REFRESH INTERVAL"]}
2
- {"case_id":"002","type":"should_call","user_input":"怎么创建物化视图?","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["CREATE MATERIALIZED VIEW"]}
3
- {"case_id":"003","type":"should_call","user_input":"创建一个 Table Stream 捕获 orders 表的变更","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["CREATE TABLE STREAM"]}
4
- {"case_id":"004","type":"should_call","user_input":"怎么暂停动态表的刷新","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["ALTER","SUSPEND"]}
5
- {"case_id":"005","type":"should_call","user_input":"怎么查看动态表的刷新历史","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["SHOW DYNAMIC TABLE REFRESH HISTORY"]}
6
- {"case_id":"006","type":"should_call","user_input":"帮我设计一个 Medallion 架构的数据管道","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["Bronze","Silver","Gold"]}
7
- {"case_id":"007","type":"should_call","user_input":"从 Kafka 持续导入数据到 Lakehouse 用什么方式","expected_skill":"clickzetta-sql-pipeline-manager","expected_output_contains":["Pipe","read_kafka"]}
8
- {"case_id":"008","type":"should_not_call","user_input":"帮我写一个 Node.js 后端","forbidden_skill":"clickzetta-sql-pipeline-manager"}
9
- {"case_id":"009","type":"should_not_call","user_input":"怎么创建用户和授权","forbidden_skill":"clickzetta-sql-pipeline-manager"}
10
- {"case_id":"010","type":"should_not_call","user_input":"Kubernetes 怎么部署","forbidden_skill":"clickzetta-sql-pipeline-manager"}
11
- {"case_id":"011","type":"should_not_call","user_input":"怎么连接 Superset","forbidden_skill":"clickzetta-sql-pipeline-manager"}
12
- {"case_id":"012","type":"should_not_call","user_input":"帮我优化一个慢查询","forbidden_skill":"clickzetta-sql-pipeline-manager"}